This allows to pass the last failing test from the Windows HLK TPM 2.0
TCG PPI 1.3 tests.
The interface is described in the "TCG Platform Reset Attack
Mitigation Specification", chapter 6 "ACPI _DSM Function". According
to Laszlo, it's not so easy to implement in OVMF, he suggested to do
it in qemu instead.
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
hw/tpm/tpm_ppi.h | 2 ++
hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
hw/tpm/tpm_crb.c | 1 +
hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
hw/tpm/tpm_tis.c | 1 +
docs/specs/tpm.txt | 2 ++
hw/tpm/trace-events | 3 +++
7 files changed, 78 insertions(+)
diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
index f6458bf87e..3239751e9f 100644
--- a/hw/tpm/tpm_ppi.h
+++ b/hw/tpm/tpm_ppi.h
@@ -23,4 +23,6 @@ typedef struct TPMPPI {
bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
hwaddr addr, Object *obj, Error **errp);
+void tpm_ppi_reset(TPMPPI *tpmppi);
+
#endif /* TPM_TPM_PPI_H */
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index c5e9a6e11d..2ab3e8fae7 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
pprq = aml_name("PPRQ");
pprm = aml_name("PPRM");
+ aml_append(dev,
+ aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
+ aml_int(TPM_PPI_ADDR_BASE + 0x15a),
+ 0x1));
+ field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
+ aml_append(field, aml_named_field("MOVV", 8));
+ aml_append(dev, field);
/*
* DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
* operation region inside of a method for getting FUNC[op].
@@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
}
aml_append(method, ifctx);
+
+ ifctx = aml_if(
+ aml_equal(uuid,
+ aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
+ {
+ /* standard DSM query function */
+ ifctx2 = aml_if(aml_equal(function, zero));
+ {
+ uint8_t byte_list[1] = { 0x03 };
+ aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
+ }
+ aml_append(ifctx, ifctx2);
+
+ /*
+ * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
+ *
+ * Arg 2 (Integer): Function Index = 1
+ * Arg 3 (Package): Arguments = Package: Type: Integer
+ * Operation Value of the Request
+ * Returns: Type: Integer
+ * 0: Success
+ * 1: General Failure
+ */
+ ifctx2 = aml_if(aml_equal(function, one));
+ {
+ aml_append(ifctx2,
+ aml_store(aml_derefof(aml_index(arguments, zero)),
+ op));
+ {
+ aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
+
+ /* 0: success */
+ aml_append(ifctx2, aml_return(zero));
+ }
+ }
+ aml_append(ifctx, ifctx2);
+ }
+ aml_append(method, ifctx);
}
+
aml_append(dev, method);
}
diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index b243222fd6..48f6a716ad 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
{
CRBState *s = CRB(dev);
+ tpm_ppi_reset(&s->ppi);
tpm_backend_reset(s->tpmbe);
memset(s->regs, 0, sizeof(s->regs));
diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
index 8b46b9dd4b..ce43bc5729 100644
--- a/hw/tpm/tpm_ppi.c
+++ b/hw/tpm/tpm_ppi.c
@@ -16,8 +16,30 @@
#include "qapi/error.h"
#include "cpu.h"
#include "sysemu/memory_mapping.h"
+#include "sysemu/reset.h"
#include "migration/vmstate.h"
#include "tpm_ppi.h"
+#include "trace.h"
+
+void tpm_ppi_reset(TPMPPI *tpmppi)
+{
+ char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
+
+ if (ptr[0x15a] & 0x1) {
+ GuestPhysBlockList guest_phys_blocks;
+ GuestPhysBlock *block;
+
+ guest_phys_blocks_init(&guest_phys_blocks);
+ guest_phys_blocks_append(&guest_phys_blocks);
+ QTAILQ_FOREACH(block, &guest_phys_blocks.head, next) {
+ trace_tpm_ppi_memset(block->host_addr,
+ block->target_end - block->target_start);
+ memset(block->host_addr, 0,
+ block->target_end - block->target_start);
+ }
+ guest_phys_blocks_free(&guest_phys_blocks);
+ }
+}
bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
hwaddr addr, Object *obj, Error **errp)
@@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
memory_region_add_subregion(m, addr, &tpmppi->ram);
+
return true;
}
diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
index 70432ffe8b..d9bfa956cc 100644
--- a/hw/tpm/tpm_tis.c
+++ b/hw/tpm/tpm_tis.c
@@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
TPM_TIS_BUFFER_MAX);
+ tpm_ppi_reset(&s->ppi);
tpm_backend_reset(s->be_driver);
s->active_locty = TPM_TIS_NO_LOCALITY;
diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
index 332c2ae597..ce9bda3c89 100644
--- a/docs/specs/tpm.txt
+++ b/docs/specs/tpm.txt
@@ -121,6 +121,8 @@ layout:
+----------+--------+--------+-------------------------------------------+
| next_step| 0x1 | 0x159 | Operation to execute after reboot by |
| | | | firmware. Used by firmware. |
+ +----------+--------+--------+-------------------------------------------+
+ | movv | 0x1 | 0x15a | Memory overwrite variable |
+----------+--------+--------+-------------------------------------------+
The following values are supported for the 'func' field. They correspond
diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
index 25bee0cecf..920d32ad55 100644
--- a/hw/tpm/trace-events
+++ b/hw/tpm/trace-events
@@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
+
+# hw/tpm/tpm_ppi.c
+tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
--
2.19.0.rc1
On Fri, 31 Aug 2018 19:24:24 +0200
Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> This allows to pass the last failing test from the Windows HLK TPM 2.0
> TCG PPI 1.3 tests.
>
> The interface is described in the "TCG Platform Reset Attack
> Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> it in qemu instead.
>
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
> hw/tpm/tpm_ppi.h | 2 ++
> hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> hw/tpm/tpm_crb.c | 1 +
> hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> hw/tpm/tpm_tis.c | 1 +
> docs/specs/tpm.txt | 2 ++
> hw/tpm/trace-events | 3 +++
> 7 files changed, 78 insertions(+)
>
> diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> index f6458bf87e..3239751e9f 100644
> --- a/hw/tpm/tpm_ppi.h
> +++ b/hw/tpm/tpm_ppi.h
> @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> hwaddr addr, Object *obj, Error **errp);
>
> +void tpm_ppi_reset(TPMPPI *tpmppi);
> +
> #endif /* TPM_TPM_PPI_H */
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index c5e9a6e11d..2ab3e8fae7 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> pprq = aml_name("PPRQ");
> pprm = aml_name("PPRM");
>
> + aml_append(dev,
> + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> + 0x1));
> + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> + aml_append(field, aml_named_field("MOVV", 8));
> + aml_append(dev, field);
> /*
> * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> * operation region inside of a method for getting FUNC[op].
> @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> }
> aml_append(method, ifctx);
> +
> + ifctx = aml_if(
> + aml_equal(uuid,
> + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> + {
> + /* standard DSM query function */
> + ifctx2 = aml_if(aml_equal(function, zero));
> + {
> + uint8_t byte_list[1] = { 0x03 };
> + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> + }
> + aml_append(ifctx, ifctx2);
> +
> + /*
> + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> + *
> + * Arg 2 (Integer): Function Index = 1
> + * Arg 3 (Package): Arguments = Package: Type: Integer
> + * Operation Value of the Request
> + * Returns: Type: Integer
> + * 0: Success
> + * 1: General Failure
> + */
> + ifctx2 = aml_if(aml_equal(function, one));
> + {
> + aml_append(ifctx2,
> + aml_store(aml_derefof(aml_index(arguments, zero)),
> + op));
> + {
> + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> +
> + /* 0: success */
> + aml_append(ifctx2, aml_return(zero));
> + }
> + }
> + aml_append(ifctx, ifctx2);
> + }
> + aml_append(method, ifctx);
> }
> +
> aml_append(dev, method);
> }
>
> diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> index b243222fd6..48f6a716ad 100644
> --- a/hw/tpm/tpm_crb.c
> +++ b/hw/tpm/tpm_crb.c
> @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> {
> CRBState *s = CRB(dev);
>
> + tpm_ppi_reset(&s->ppi);
> tpm_backend_reset(s->tpmbe);
>
> memset(s->regs, 0, sizeof(s->regs));
> diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> index 8b46b9dd4b..ce43bc5729 100644
> --- a/hw/tpm/tpm_ppi.c
> +++ b/hw/tpm/tpm_ppi.c
> @@ -16,8 +16,30 @@
> #include "qapi/error.h"
> #include "cpu.h"
> #include "sysemu/memory_mapping.h"
> +#include "sysemu/reset.h"
> #include "migration/vmstate.h"
> #include "tpm_ppi.h"
> +#include "trace.h"
> +
> +void tpm_ppi_reset(TPMPPI *tpmppi)
> +{
> + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
nvdimm seems to use cpu_physical_memory_read() to access guest
accessible memory, so question is what's difference?
> +
> + if (ptr[0x15a] & 0x1) {
> + GuestPhysBlockList guest_phys_blocks;
> + GuestPhysBlock *block;
> +
> + guest_phys_blocks_init(&guest_phys_blocks);
> + guest_phys_blocks_append(&guest_phys_blocks);
> + QTAILQ_FOREACH(block, &guest_phys_blocks.head, next) {
> + trace_tpm_ppi_memset(block->host_addr,
> + block->target_end - block->target_start);
> + memset(block->host_addr, 0,
> + block->target_end - block->target_start);
> + }
> + guest_phys_blocks_free(&guest_phys_blocks);
> + }
> +}
>
> bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> hwaddr addr, Object *obj, Error **errp)
> @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
>
> memory_region_add_subregion(m, addr, &tpmppi->ram);
> +
> return true;
> }
> diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> index 70432ffe8b..d9bfa956cc 100644
> --- a/hw/tpm/tpm_tis.c
> +++ b/hw/tpm/tpm_tis.c
> @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> TPM_TIS_BUFFER_MAX);
>
> + tpm_ppi_reset(&s->ppi);
> tpm_backend_reset(s->be_driver);
>
> s->active_locty = TPM_TIS_NO_LOCALITY;
> diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> index 332c2ae597..ce9bda3c89 100644
> --- a/docs/specs/tpm.txt
> +++ b/docs/specs/tpm.txt
> @@ -121,6 +121,8 @@ layout:
> +----------+--------+--------+-------------------------------------------+
> | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> | | | | firmware. Used by firmware. |
> + +----------+--------+--------+-------------------------------------------+
> + | movv | 0x1 | 0x15a | Memory overwrite variable |
> +----------+--------+--------+-------------------------------------------+
>
> The following values are supported for the 'func' field. They correspond
> diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> index 25bee0cecf..920d32ad55 100644
> --- a/hw/tpm/trace-events
> +++ b/hw/tpm/trace-events
> @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> +
> +# hw/tpm/tpm_ppi.c
> +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
Hi
On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
>
> On Fri, 31 Aug 2018 19:24:24 +0200
> Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
>
> > This allows to pass the last failing test from the Windows HLK TPM 2.0
> > TCG PPI 1.3 tests.
> >
> > The interface is described in the "TCG Platform Reset Attack
> > Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> > to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> > it in qemu instead.
> >
> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > ---
> > hw/tpm/tpm_ppi.h | 2 ++
> > hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> > hw/tpm/tpm_crb.c | 1 +
> > hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> > hw/tpm/tpm_tis.c | 1 +
> > docs/specs/tpm.txt | 2 ++
> > hw/tpm/trace-events | 3 +++
> > 7 files changed, 78 insertions(+)
> >
> > diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> > index f6458bf87e..3239751e9f 100644
> > --- a/hw/tpm/tpm_ppi.h
> > +++ b/hw/tpm/tpm_ppi.h
> > @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > hwaddr addr, Object *obj, Error **errp);
> >
> > +void tpm_ppi_reset(TPMPPI *tpmppi);
> > +
> > #endif /* TPM_TPM_PPI_H */
> > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > index c5e9a6e11d..2ab3e8fae7 100644
> > --- a/hw/i386/acpi-build.c
> > +++ b/hw/i386/acpi-build.c
> > @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > pprq = aml_name("PPRQ");
> > pprm = aml_name("PPRM");
> >
> > + aml_append(dev,
> > + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> > + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> > + 0x1));
> > + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> > + aml_append(field, aml_named_field("MOVV", 8));
> > + aml_append(dev, field);
> > /*
> > * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> > * operation region inside of a method for getting FUNC[op].
> > @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> > }
> > aml_append(method, ifctx);
> > +
> > + ifctx = aml_if(
> > + aml_equal(uuid,
> > + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> > + {
> > + /* standard DSM query function */
> > + ifctx2 = aml_if(aml_equal(function, zero));
> > + {
> > + uint8_t byte_list[1] = { 0x03 };
> > + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> > + }
> > + aml_append(ifctx, ifctx2);
> > +
> > + /*
> > + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> > + *
> > + * Arg 2 (Integer): Function Index = 1
> > + * Arg 3 (Package): Arguments = Package: Type: Integer
> > + * Operation Value of the Request
> > + * Returns: Type: Integer
> > + * 0: Success
> > + * 1: General Failure
> > + */
> > + ifctx2 = aml_if(aml_equal(function, one));
> > + {
> > + aml_append(ifctx2,
> > + aml_store(aml_derefof(aml_index(arguments, zero)),
> > + op));
> > + {
> > + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> > +
> > + /* 0: success */
> > + aml_append(ifctx2, aml_return(zero));
> > + }
> > + }
> > + aml_append(ifctx, ifctx2);
> > + }
> > + aml_append(method, ifctx);
> > }
> > +
> > aml_append(dev, method);
> > }
> >
> > diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> > index b243222fd6..48f6a716ad 100644
> > --- a/hw/tpm/tpm_crb.c
> > +++ b/hw/tpm/tpm_crb.c
> > @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> > {
> > CRBState *s = CRB(dev);
> >
> > + tpm_ppi_reset(&s->ppi);
> > tpm_backend_reset(s->tpmbe);
> >
> > memset(s->regs, 0, sizeof(s->regs));
> > diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> > index 8b46b9dd4b..ce43bc5729 100644
> > --- a/hw/tpm/tpm_ppi.c
> > +++ b/hw/tpm/tpm_ppi.c
> > @@ -16,8 +16,30 @@
> > #include "qapi/error.h"
> > #include "cpu.h"
> > #include "sysemu/memory_mapping.h"
> > +#include "sysemu/reset.h"
> > #include "migration/vmstate.h"
> > #include "tpm_ppi.h"
> > +#include "trace.h"
> > +
> > +void tpm_ppi_reset(TPMPPI *tpmppi)
> > +{
>
>
> > + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> nvdimm seems to use cpu_physical_memory_read() to access guest
> accessible memory, so question is what's difference?
cpu_physical_memory_read() is higher level, doing dispatch on address
and length checks.
This is a bit unnecessary, as ppi->buf could be accessed directly.
>
> > +
> > + if (ptr[0x15a] & 0x1) {
> > + GuestPhysBlockList guest_phys_blocks;
> > + GuestPhysBlock *block;
> > +
> > + guest_phys_blocks_init(&guest_phys_blocks);
> > + guest_phys_blocks_append(&guest_phys_blocks);
> > + QTAILQ_FOREACH(block, &guest_phys_blocks.head, next) {
> > + trace_tpm_ppi_memset(block->host_addr,
> > + block->target_end - block->target_start);
> > + memset(block->host_addr, 0,
> > + block->target_end - block->target_start);
> > + }
> > + guest_phys_blocks_free(&guest_phys_blocks);
> > + }
> > +}
> >
> > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > hwaddr addr, Object *obj, Error **errp)
> > @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
> >
> > memory_region_add_subregion(m, addr, &tpmppi->ram);
> > +
> > return true;
> > }
> > diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> > index 70432ffe8b..d9bfa956cc 100644
> > --- a/hw/tpm/tpm_tis.c
> > +++ b/hw/tpm/tpm_tis.c
> > @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> > s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> > TPM_TIS_BUFFER_MAX);
> >
> > + tpm_ppi_reset(&s->ppi);
> > tpm_backend_reset(s->be_driver);
> >
> > s->active_locty = TPM_TIS_NO_LOCALITY;
> > diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> > index 332c2ae597..ce9bda3c89 100644
> > --- a/docs/specs/tpm.txt
> > +++ b/docs/specs/tpm.txt
> > @@ -121,6 +121,8 @@ layout:
> > +----------+--------+--------+-------------------------------------------+
> > | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> > | | | | firmware. Used by firmware. |
> > + +----------+--------+--------+-------------------------------------------+
> > + | movv | 0x1 | 0x15a | Memory overwrite variable |
> > +----------+--------+--------+-------------------------------------------+
> >
> > The following values are supported for the 'func' field. They correspond
> > diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> > index 25bee0cecf..920d32ad55 100644
> > --- a/hw/tpm/trace-events
> > +++ b/hw/tpm/trace-events
> > @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> > tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> > tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> > tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> > +
> > +# hw/tpm/tpm_ppi.c
> > +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
>
>
--
Marc-André Lureau
On Thu, 6 Sep 2018 07:50:09 +0400
Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> Hi
>
> On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
> >
> > On Fri, 31 Aug 2018 19:24:24 +0200
> > Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> >
> > > This allows to pass the last failing test from the Windows HLK TPM 2.0
> > > TCG PPI 1.3 tests.
> > >
> > > The interface is described in the "TCG Platform Reset Attack
> > > Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> > > to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> > > it in qemu instead.
> > >
> > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > ---
> > > hw/tpm/tpm_ppi.h | 2 ++
> > > hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> > > hw/tpm/tpm_crb.c | 1 +
> > > hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> > > hw/tpm/tpm_tis.c | 1 +
> > > docs/specs/tpm.txt | 2 ++
> > > hw/tpm/trace-events | 3 +++
> > > 7 files changed, 78 insertions(+)
> > >
> > > diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> > > index f6458bf87e..3239751e9f 100644
> > > --- a/hw/tpm/tpm_ppi.h
> > > +++ b/hw/tpm/tpm_ppi.h
> > > @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > hwaddr addr, Object *obj, Error **errp);
> > >
> > > +void tpm_ppi_reset(TPMPPI *tpmppi);
> > > +
> > > #endif /* TPM_TPM_PPI_H */
> > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > index c5e9a6e11d..2ab3e8fae7 100644
> > > --- a/hw/i386/acpi-build.c
> > > +++ b/hw/i386/acpi-build.c
> > > @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > pprq = aml_name("PPRQ");
> > > pprm = aml_name("PPRM");
> > >
> > > + aml_append(dev,
> > > + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> > > + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> > > + 0x1));
> > > + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> > > + aml_append(field, aml_named_field("MOVV", 8));
> > > + aml_append(dev, field);
> > > /*
> > > * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> > > * operation region inside of a method for getting FUNC[op].
> > > @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> > > }
> > > aml_append(method, ifctx);
> > > +
> > > + ifctx = aml_if(
> > > + aml_equal(uuid,
> > > + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> > > + {
> > > + /* standard DSM query function */
> > > + ifctx2 = aml_if(aml_equal(function, zero));
> > > + {
> > > + uint8_t byte_list[1] = { 0x03 };
> > > + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> > > + }
> > > + aml_append(ifctx, ifctx2);
> > > +
> > > + /*
> > > + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> > > + *
> > > + * Arg 2 (Integer): Function Index = 1
> > > + * Arg 3 (Package): Arguments = Package: Type: Integer
> > > + * Operation Value of the Request
> > > + * Returns: Type: Integer
> > > + * 0: Success
> > > + * 1: General Failure
> > > + */
> > > + ifctx2 = aml_if(aml_equal(function, one));
> > > + {
> > > + aml_append(ifctx2,
> > > + aml_store(aml_derefof(aml_index(arguments, zero)),
> > > + op));
> > > + {
> > > + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> > > +
> > > + /* 0: success */
> > > + aml_append(ifctx2, aml_return(zero));
> > > + }
> > > + }
> > > + aml_append(ifctx, ifctx2);
> > > + }
> > > + aml_append(method, ifctx);
> > > }
> > > +
> > > aml_append(dev, method);
> > > }
> > >
> > > diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> > > index b243222fd6..48f6a716ad 100644
> > > --- a/hw/tpm/tpm_crb.c
> > > +++ b/hw/tpm/tpm_crb.c
> > > @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> > > {
> > > CRBState *s = CRB(dev);
> > >
> > > + tpm_ppi_reset(&s->ppi);
> > > tpm_backend_reset(s->tpmbe);
> > >
> > > memset(s->regs, 0, sizeof(s->regs));
> > > diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> > > index 8b46b9dd4b..ce43bc5729 100644
> > > --- a/hw/tpm/tpm_ppi.c
> > > +++ b/hw/tpm/tpm_ppi.c
> > > @@ -16,8 +16,30 @@
> > > #include "qapi/error.h"
> > > #include "cpu.h"
> > > #include "sysemu/memory_mapping.h"
> > > +#include "sysemu/reset.h"
> > > #include "migration/vmstate.h"
> > > #include "tpm_ppi.h"
> > > +#include "trace.h"
> > > +
> > > +void tpm_ppi_reset(TPMPPI *tpmppi)
> > > +{
> >
> >
> > > + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> > nvdimm seems to use cpu_physical_memory_read() to access guest
> > accessible memory, so question is what's difference?
>
> cpu_physical_memory_read() is higher level, doing dispatch on address
> and length checks.
>
> This is a bit unnecessary, as ppi->buf could be accessed directly.
[...]
> > > + memset(block->host_addr, 0,
> > > + block->target_end - block->target_start);
> > > + }
my concern here is that if we directly touch guest memory here
we might get in trouble on migration without dirtying modified
ranges
PS:
feel free it ignore since I don't have a clue what I'm talking about :)
> > > + guest_phys_blocks_free(&guest_phys_blocks);
> > > + }
> > > +}
> > >
> > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > hwaddr addr, Object *obj, Error **errp)
> > > @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
> > >
> > > memory_region_add_subregion(m, addr, &tpmppi->ram);
> > > +
> > > return true;
> > > }
> > > diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> > > index 70432ffe8b..d9bfa956cc 100644
> > > --- a/hw/tpm/tpm_tis.c
> > > +++ b/hw/tpm/tpm_tis.c
> > > @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> > > s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> > > TPM_TIS_BUFFER_MAX);
> > >
> > > + tpm_ppi_reset(&s->ppi);
> > > tpm_backend_reset(s->be_driver);
> > >
> > > s->active_locty = TPM_TIS_NO_LOCALITY;
> > > diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> > > index 332c2ae597..ce9bda3c89 100644
> > > --- a/docs/specs/tpm.txt
> > > +++ b/docs/specs/tpm.txt
> > > @@ -121,6 +121,8 @@ layout:
> > > +----------+--------+--------+-------------------------------------------+
> > > | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> > > | | | | firmware. Used by firmware. |
> > > + +----------+--------+--------+-------------------------------------------+
> > > + | movv | 0x1 | 0x15a | Memory overwrite variable |
> > > +----------+--------+--------+-------------------------------------------+
> > >
> > > The following values are supported for the 'func' field. They correspond
> > > diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> > > index 25bee0cecf..920d32ad55 100644
> > > --- a/hw/tpm/trace-events
> > > +++ b/hw/tpm/trace-events
> > > @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> > > tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> > > tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> > > tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> > > +
> > > +# hw/tpm/tpm_ppi.c
> > > +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
> >
> >
>
>
Hi
On Thu, Sep 6, 2018 at 11:58 AM Igor Mammedov <imammedo@redhat.com> wrote:
>
> On Thu, 6 Sep 2018 07:50:09 +0400
> Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
>
> > Hi
> >
> > On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > >
> > > On Fri, 31 Aug 2018 19:24:24 +0200
> > > Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> > >
> > > > This allows to pass the last failing test from the Windows HLK TPM 2.0
> > > > TCG PPI 1.3 tests.
> > > >
> > > > The interface is described in the "TCG Platform Reset Attack
> > > > Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> > > > to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> > > > it in qemu instead.
> > > >
> > > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > > ---
> > > > hw/tpm/tpm_ppi.h | 2 ++
> > > > hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> > > > hw/tpm/tpm_crb.c | 1 +
> > > > hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> > > > hw/tpm/tpm_tis.c | 1 +
> > > > docs/specs/tpm.txt | 2 ++
> > > > hw/tpm/trace-events | 3 +++
> > > > 7 files changed, 78 insertions(+)
> > > >
> > > > diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> > > > index f6458bf87e..3239751e9f 100644
> > > > --- a/hw/tpm/tpm_ppi.h
> > > > +++ b/hw/tpm/tpm_ppi.h
> > > > @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > hwaddr addr, Object *obj, Error **errp);
> > > >
> > > > +void tpm_ppi_reset(TPMPPI *tpmppi);
> > > > +
> > > > #endif /* TPM_TPM_PPI_H */
> > > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > > index c5e9a6e11d..2ab3e8fae7 100644
> > > > --- a/hw/i386/acpi-build.c
> > > > +++ b/hw/i386/acpi-build.c
> > > > @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > pprq = aml_name("PPRQ");
> > > > pprm = aml_name("PPRM");
> > > >
> > > > + aml_append(dev,
> > > > + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> > > > + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> > > > + 0x1));
> > > > + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> > > > + aml_append(field, aml_named_field("MOVV", 8));
> > > > + aml_append(dev, field);
> > > > /*
> > > > * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> > > > * operation region inside of a method for getting FUNC[op].
> > > > @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> > > > }
> > > > aml_append(method, ifctx);
> > > > +
> > > > + ifctx = aml_if(
> > > > + aml_equal(uuid,
> > > > + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> > > > + {
> > > > + /* standard DSM query function */
> > > > + ifctx2 = aml_if(aml_equal(function, zero));
> > > > + {
> > > > + uint8_t byte_list[1] = { 0x03 };
> > > > + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> > > > + }
> > > > + aml_append(ifctx, ifctx2);
> > > > +
> > > > + /*
> > > > + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> > > > + *
> > > > + * Arg 2 (Integer): Function Index = 1
> > > > + * Arg 3 (Package): Arguments = Package: Type: Integer
> > > > + * Operation Value of the Request
> > > > + * Returns: Type: Integer
> > > > + * 0: Success
> > > > + * 1: General Failure
> > > > + */
> > > > + ifctx2 = aml_if(aml_equal(function, one));
> > > > + {
> > > > + aml_append(ifctx2,
> > > > + aml_store(aml_derefof(aml_index(arguments, zero)),
> > > > + op));
> > > > + {
> > > > + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> > > > +
> > > > + /* 0: success */
> > > > + aml_append(ifctx2, aml_return(zero));
> > > > + }
> > > > + }
> > > > + aml_append(ifctx, ifctx2);
> > > > + }
> > > > + aml_append(method, ifctx);
> > > > }
> > > > +
> > > > aml_append(dev, method);
> > > > }
> > > >
> > > > diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> > > > index b243222fd6..48f6a716ad 100644
> > > > --- a/hw/tpm/tpm_crb.c
> > > > +++ b/hw/tpm/tpm_crb.c
> > > > @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> > > > {
> > > > CRBState *s = CRB(dev);
> > > >
> > > > + tpm_ppi_reset(&s->ppi);
> > > > tpm_backend_reset(s->tpmbe);
> > > >
> > > > memset(s->regs, 0, sizeof(s->regs));
> > > > diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> > > > index 8b46b9dd4b..ce43bc5729 100644
> > > > --- a/hw/tpm/tpm_ppi.c
> > > > +++ b/hw/tpm/tpm_ppi.c
> > > > @@ -16,8 +16,30 @@
> > > > #include "qapi/error.h"
> > > > #include "cpu.h"
> > > > #include "sysemu/memory_mapping.h"
> > > > +#include "sysemu/reset.h"
> > > > #include "migration/vmstate.h"
> > > > #include "tpm_ppi.h"
> > > > +#include "trace.h"
> > > > +
> > > > +void tpm_ppi_reset(TPMPPI *tpmppi)
> > > > +{
> > >
> > >
> > > > + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> > > nvdimm seems to use cpu_physical_memory_read() to access guest
> > > accessible memory, so question is what's difference?
> >
> > cpu_physical_memory_read() is higher level, doing dispatch on address
> > and length checks.
> >
> > This is a bit unnecessary, as ppi->buf could be accessed directly.
> [...]
> > > > + memset(block->host_addr, 0,
> > > > + block->target_end - block->target_start);
> > > > + }
> my concern here is that if we directly touch guest memory here
> we might get in trouble on migration without dirtying modified
> ranges
It is a read-only of one byte.
by the time the reset handler is called, the memory must have been
already migrated.
>
> PS:
> feel free it ignore since I don't have a clue what I'm talking about :)
>
> > > > + guest_phys_blocks_free(&guest_phys_blocks);
> > > > + }
> > > > +}
> > > >
> > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > hwaddr addr, Object *obj, Error **errp)
> > > > @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
> > > >
> > > > memory_region_add_subregion(m, addr, &tpmppi->ram);
> > > > +
> > > > return true;
> > > > }
> > > > diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> > > > index 70432ffe8b..d9bfa956cc 100644
> > > > --- a/hw/tpm/tpm_tis.c
> > > > +++ b/hw/tpm/tpm_tis.c
> > > > @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> > > > s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> > > > TPM_TIS_BUFFER_MAX);
> > > >
> > > > + tpm_ppi_reset(&s->ppi);
> > > > tpm_backend_reset(s->be_driver);
> > > >
> > > > s->active_locty = TPM_TIS_NO_LOCALITY;
> > > > diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> > > > index 332c2ae597..ce9bda3c89 100644
> > > > --- a/docs/specs/tpm.txt
> > > > +++ b/docs/specs/tpm.txt
> > > > @@ -121,6 +121,8 @@ layout:
> > > > +----------+--------+--------+-------------------------------------------+
> > > > | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> > > > | | | | firmware. Used by firmware. |
> > > > + +----------+--------+--------+-------------------------------------------+
> > > > + | movv | 0x1 | 0x15a | Memory overwrite variable |
> > > > +----------+--------+--------+-------------------------------------------+
> > > >
> > > > The following values are supported for the 'func' field. They correspond
> > > > diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> > > > index 25bee0cecf..920d32ad55 100644
> > > > --- a/hw/tpm/trace-events
> > > > +++ b/hw/tpm/trace-events
> > > > @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> > > > tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> > > > tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> > > > tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> > > > +
> > > > +# hw/tpm/tpm_ppi.c
> > > > +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
> > >
> > >
> >
> >
>
--
Marc-André Lureau
* Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> Hi
>
> On Thu, Sep 6, 2018 at 11:58 AM Igor Mammedov <imammedo@redhat.com> wrote:
> >
> > On Thu, 6 Sep 2018 07:50:09 +0400
> > Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> >
> > > Hi
> > >
> > > On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > >
> > > > On Fri, 31 Aug 2018 19:24:24 +0200
> > > > Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> > > >
> > > > > This allows to pass the last failing test from the Windows HLK TPM 2.0
> > > > > TCG PPI 1.3 tests.
> > > > >
> > > > > The interface is described in the "TCG Platform Reset Attack
> > > > > Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> > > > > to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> > > > > it in qemu instead.
> > > > >
> > > > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > > > ---
> > > > > hw/tpm/tpm_ppi.h | 2 ++
> > > > > hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> > > > > hw/tpm/tpm_crb.c | 1 +
> > > > > hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> > > > > hw/tpm/tpm_tis.c | 1 +
> > > > > docs/specs/tpm.txt | 2 ++
> > > > > hw/tpm/trace-events | 3 +++
> > > > > 7 files changed, 78 insertions(+)
> > > > >
> > > > > diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> > > > > index f6458bf87e..3239751e9f 100644
> > > > > --- a/hw/tpm/tpm_ppi.h
> > > > > +++ b/hw/tpm/tpm_ppi.h
> > > > > @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > hwaddr addr, Object *obj, Error **errp);
> > > > >
> > > > > +void tpm_ppi_reset(TPMPPI *tpmppi);
> > > > > +
> > > > > #endif /* TPM_TPM_PPI_H */
> > > > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > > > index c5e9a6e11d..2ab3e8fae7 100644
> > > > > --- a/hw/i386/acpi-build.c
> > > > > +++ b/hw/i386/acpi-build.c
> > > > > @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > pprq = aml_name("PPRQ");
> > > > > pprm = aml_name("PPRM");
> > > > >
> > > > > + aml_append(dev,
> > > > > + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> > > > > + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> > > > > + 0x1));
> > > > > + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> > > > > + aml_append(field, aml_named_field("MOVV", 8));
> > > > > + aml_append(dev, field);
> > > > > /*
> > > > > * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> > > > > * operation region inside of a method for getting FUNC[op].
> > > > > @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> > > > > }
> > > > > aml_append(method, ifctx);
> > > > > +
> > > > > + ifctx = aml_if(
> > > > > + aml_equal(uuid,
> > > > > + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> > > > > + {
> > > > > + /* standard DSM query function */
> > > > > + ifctx2 = aml_if(aml_equal(function, zero));
> > > > > + {
> > > > > + uint8_t byte_list[1] = { 0x03 };
> > > > > + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> > > > > + }
> > > > > + aml_append(ifctx, ifctx2);
> > > > > +
> > > > > + /*
> > > > > + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> > > > > + *
> > > > > + * Arg 2 (Integer): Function Index = 1
> > > > > + * Arg 3 (Package): Arguments = Package: Type: Integer
> > > > > + * Operation Value of the Request
> > > > > + * Returns: Type: Integer
> > > > > + * 0: Success
> > > > > + * 1: General Failure
> > > > > + */
> > > > > + ifctx2 = aml_if(aml_equal(function, one));
> > > > > + {
> > > > > + aml_append(ifctx2,
> > > > > + aml_store(aml_derefof(aml_index(arguments, zero)),
> > > > > + op));
> > > > > + {
> > > > > + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> > > > > +
> > > > > + /* 0: success */
> > > > > + aml_append(ifctx2, aml_return(zero));
> > > > > + }
> > > > > + }
> > > > > + aml_append(ifctx, ifctx2);
> > > > > + }
> > > > > + aml_append(method, ifctx);
> > > > > }
> > > > > +
> > > > > aml_append(dev, method);
> > > > > }
> > > > >
> > > > > diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> > > > > index b243222fd6..48f6a716ad 100644
> > > > > --- a/hw/tpm/tpm_crb.c
> > > > > +++ b/hw/tpm/tpm_crb.c
> > > > > @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> > > > > {
> > > > > CRBState *s = CRB(dev);
> > > > >
> > > > > + tpm_ppi_reset(&s->ppi);
> > > > > tpm_backend_reset(s->tpmbe);
> > > > >
> > > > > memset(s->regs, 0, sizeof(s->regs));
> > > > > diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> > > > > index 8b46b9dd4b..ce43bc5729 100644
> > > > > --- a/hw/tpm/tpm_ppi.c
> > > > > +++ b/hw/tpm/tpm_ppi.c
> > > > > @@ -16,8 +16,30 @@
> > > > > #include "qapi/error.h"
> > > > > #include "cpu.h"
> > > > > #include "sysemu/memory_mapping.h"
> > > > > +#include "sysemu/reset.h"
> > > > > #include "migration/vmstate.h"
> > > > > #include "tpm_ppi.h"
> > > > > +#include "trace.h"
> > > > > +
> > > > > +void tpm_ppi_reset(TPMPPI *tpmppi)
> > > > > +{
> > > >
> > > >
> > > > > + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> > > > nvdimm seems to use cpu_physical_memory_read() to access guest
> > > > accessible memory, so question is what's difference?
> > >
> > > cpu_physical_memory_read() is higher level, doing dispatch on address
> > > and length checks.
> > >
> > > This is a bit unnecessary, as ppi->buf could be accessed directly.
> > [...]
> > > > > + memset(block->host_addr, 0,
> > > > > + block->target_end - block->target_start);
> > > > > + }
> > my concern here is that if we directly touch guest memory here
> > we might get in trouble on migration without dirtying modified
> > ranges
>
> It is a read-only of one byte.
> by the time the reset handler is called, the memory must have been
> already migrated.
Looks like a write to me?
Also, don't forget that a guest reset can happen during a migration.
Dave
> >
> > PS:
> > feel free it ignore since I don't have a clue what I'm talking about :)
> >
> > > > > + guest_phys_blocks_free(&guest_phys_blocks);
> > > > > + }
> > > > > +}
> > > > >
> > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > hwaddr addr, Object *obj, Error **errp)
> > > > > @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
> > > > >
> > > > > memory_region_add_subregion(m, addr, &tpmppi->ram);
> > > > > +
> > > > > return true;
> > > > > }
> > > > > diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> > > > > index 70432ffe8b..d9bfa956cc 100644
> > > > > --- a/hw/tpm/tpm_tis.c
> > > > > +++ b/hw/tpm/tpm_tis.c
> > > > > @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> > > > > s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> > > > > TPM_TIS_BUFFER_MAX);
> > > > >
> > > > > + tpm_ppi_reset(&s->ppi);
> > > > > tpm_backend_reset(s->be_driver);
> > > > >
> > > > > s->active_locty = TPM_TIS_NO_LOCALITY;
> > > > > diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> > > > > index 332c2ae597..ce9bda3c89 100644
> > > > > --- a/docs/specs/tpm.txt
> > > > > +++ b/docs/specs/tpm.txt
> > > > > @@ -121,6 +121,8 @@ layout:
> > > > > +----------+--------+--------+-------------------------------------------+
> > > > > | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> > > > > | | | | firmware. Used by firmware. |
> > > > > + +----------+--------+--------+-------------------------------------------+
> > > > > + | movv | 0x1 | 0x15a | Memory overwrite variable |
> > > > > +----------+--------+--------+-------------------------------------------+
> > > > >
> > > > > The following values are supported for the 'func' field. They correspond
> > > > > diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> > > > > index 25bee0cecf..920d32ad55 100644
> > > > > --- a/hw/tpm/trace-events
> > > > > +++ b/hw/tpm/trace-events
> > > > > @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> > > > > tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> > > > > tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> > > > > tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> > > > > +
> > > > > +# hw/tpm/tpm_ppi.c
> > > > > +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
> > > >
> > > >
> > >
> > >
> >
>
>
> --
> Marc-André Lureau
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Hi
On Thu, Sep 6, 2018 at 12:59 PM Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > Hi
> >
> > On Thu, Sep 6, 2018 at 11:58 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > >
> > > On Thu, 6 Sep 2018 07:50:09 +0400
> > > Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> > >
> > > > Hi
> > > >
> > > > On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > > >
> > > > > On Fri, 31 Aug 2018 19:24:24 +0200
> > > > > Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> > > > >
> > > > > > This allows to pass the last failing test from the Windows HLK TPM 2.0
> > > > > > TCG PPI 1.3 tests.
> > > > > >
> > > > > > The interface is described in the "TCG Platform Reset Attack
> > > > > > Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> > > > > > to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> > > > > > it in qemu instead.
> > > > > >
> > > > > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > > > > ---
> > > > > > hw/tpm/tpm_ppi.h | 2 ++
> > > > > > hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> > > > > > hw/tpm/tpm_crb.c | 1 +
> > > > > > hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> > > > > > hw/tpm/tpm_tis.c | 1 +
> > > > > > docs/specs/tpm.txt | 2 ++
> > > > > > hw/tpm/trace-events | 3 +++
> > > > > > 7 files changed, 78 insertions(+)
> > > > > >
> > > > > > diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> > > > > > index f6458bf87e..3239751e9f 100644
> > > > > > --- a/hw/tpm/tpm_ppi.h
> > > > > > +++ b/hw/tpm/tpm_ppi.h
> > > > > > @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> > > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > hwaddr addr, Object *obj, Error **errp);
> > > > > >
> > > > > > +void tpm_ppi_reset(TPMPPI *tpmppi);
> > > > > > +
> > > > > > #endif /* TPM_TPM_PPI_H */
> > > > > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > > > > index c5e9a6e11d..2ab3e8fae7 100644
> > > > > > --- a/hw/i386/acpi-build.c
> > > > > > +++ b/hw/i386/acpi-build.c
> > > > > > @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > > pprq = aml_name("PPRQ");
> > > > > > pprm = aml_name("PPRM");
> > > > > >
> > > > > > + aml_append(dev,
> > > > > > + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> > > > > > + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> > > > > > + 0x1));
> > > > > > + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> > > > > > + aml_append(field, aml_named_field("MOVV", 8));
> > > > > > + aml_append(dev, field);
> > > > > > /*
> > > > > > * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> > > > > > * operation region inside of a method for getting FUNC[op].
> > > > > > @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > > aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> > > > > > }
> > > > > > aml_append(method, ifctx);
> > > > > > +
> > > > > > + ifctx = aml_if(
> > > > > > + aml_equal(uuid,
> > > > > > + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> > > > > > + {
> > > > > > + /* standard DSM query function */
> > > > > > + ifctx2 = aml_if(aml_equal(function, zero));
> > > > > > + {
> > > > > > + uint8_t byte_list[1] = { 0x03 };
> > > > > > + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> > > > > > + }
> > > > > > + aml_append(ifctx, ifctx2);
> > > > > > +
> > > > > > + /*
> > > > > > + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> > > > > > + *
> > > > > > + * Arg 2 (Integer): Function Index = 1
> > > > > > + * Arg 3 (Package): Arguments = Package: Type: Integer
> > > > > > + * Operation Value of the Request
> > > > > > + * Returns: Type: Integer
> > > > > > + * 0: Success
> > > > > > + * 1: General Failure
> > > > > > + */
> > > > > > + ifctx2 = aml_if(aml_equal(function, one));
> > > > > > + {
> > > > > > + aml_append(ifctx2,
> > > > > > + aml_store(aml_derefof(aml_index(arguments, zero)),
> > > > > > + op));
> > > > > > + {
> > > > > > + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> > > > > > +
> > > > > > + /* 0: success */
> > > > > > + aml_append(ifctx2, aml_return(zero));
> > > > > > + }
> > > > > > + }
> > > > > > + aml_append(ifctx, ifctx2);
> > > > > > + }
> > > > > > + aml_append(method, ifctx);
> > > > > > }
> > > > > > +
> > > > > > aml_append(dev, method);
> > > > > > }
> > > > > >
> > > > > > diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> > > > > > index b243222fd6..48f6a716ad 100644
> > > > > > --- a/hw/tpm/tpm_crb.c
> > > > > > +++ b/hw/tpm/tpm_crb.c
> > > > > > @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> > > > > > {
> > > > > > CRBState *s = CRB(dev);
> > > > > >
> > > > > > + tpm_ppi_reset(&s->ppi);
> > > > > > tpm_backend_reset(s->tpmbe);
> > > > > >
> > > > > > memset(s->regs, 0, sizeof(s->regs));
> > > > > > diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> > > > > > index 8b46b9dd4b..ce43bc5729 100644
> > > > > > --- a/hw/tpm/tpm_ppi.c
> > > > > > +++ b/hw/tpm/tpm_ppi.c
> > > > > > @@ -16,8 +16,30 @@
> > > > > > #include "qapi/error.h"
> > > > > > #include "cpu.h"
> > > > > > #include "sysemu/memory_mapping.h"
> > > > > > +#include "sysemu/reset.h"
> > > > > > #include "migration/vmstate.h"
> > > > > > #include "tpm_ppi.h"
> > > > > > +#include "trace.h"
> > > > > > +
> > > > > > +void tpm_ppi_reset(TPMPPI *tpmppi)
> > > > > > +{
> > > > >
> > > > >
> > > > > > + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> > > > > nvdimm seems to use cpu_physical_memory_read() to access guest
> > > > > accessible memory, so question is what's difference?
> > > >
> > > > cpu_physical_memory_read() is higher level, doing dispatch on address
> > > > and length checks.
> > > >
> > > > This is a bit unnecessary, as ppi->buf could be accessed directly.
> > > [...]
> > > > > > + memset(block->host_addr, 0,
> > > > > > + block->target_end - block->target_start);
> > > > > > + }
> > > my concern here is that if we directly touch guest memory here
> > > we might get in trouble on migration without dirtying modified
> > > ranges
> >
> > It is a read-only of one byte.
> > by the time the reset handler is called, the memory must have been
> > already migrated.
>
> Looks like a write to me?
the PPI RAM memory is read for the "memory clear" byte
The whole guest RAM is reset to 0 if set.
> Also, don't forget that a guest reset can happen during a migration.
Hmm, does cpu_physical_memory_read() guarantee the memory has been migrated?
Is there a way to wait for migration to be completed in a reset handler?
>
> Dave
>
> > >
> > > PS:
> > > feel free it ignore since I don't have a clue what I'm talking about :)
> > >
> > > > > > + guest_phys_blocks_free(&guest_phys_blocks);
> > > > > > + }
> > > > > > +}
> > > > > >
> > > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > hwaddr addr, Object *obj, Error **errp)
> > > > > > @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
> > > > > >
> > > > > > memory_region_add_subregion(m, addr, &tpmppi->ram);
> > > > > > +
> > > > > > return true;
> > > > > > }
> > > > > > diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> > > > > > index 70432ffe8b..d9bfa956cc 100644
> > > > > > --- a/hw/tpm/tpm_tis.c
> > > > > > +++ b/hw/tpm/tpm_tis.c
> > > > > > @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> > > > > > s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> > > > > > TPM_TIS_BUFFER_MAX);
> > > > > >
> > > > > > + tpm_ppi_reset(&s->ppi);
> > > > > > tpm_backend_reset(s->be_driver);
> > > > > >
> > > > > > s->active_locty = TPM_TIS_NO_LOCALITY;
> > > > > > diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> > > > > > index 332c2ae597..ce9bda3c89 100644
> > > > > > --- a/docs/specs/tpm.txt
> > > > > > +++ b/docs/specs/tpm.txt
> > > > > > @@ -121,6 +121,8 @@ layout:
> > > > > > +----------+--------+--------+-------------------------------------------+
> > > > > > | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> > > > > > | | | | firmware. Used by firmware. |
> > > > > > + +----------+--------+--------+-------------------------------------------+
> > > > > > + | movv | 0x1 | 0x15a | Memory overwrite variable |
> > > > > > +----------+--------+--------+-------------------------------------------+
> > > > > >
> > > > > > The following values are supported for the 'func' field. They correspond
> > > > > > diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> > > > > > index 25bee0cecf..920d32ad55 100644
> > > > > > --- a/hw/tpm/trace-events
> > > > > > +++ b/hw/tpm/trace-events
> > > > > > @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> > > > > > tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> > > > > > tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> > > > > > tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> > > > > > +
> > > > > > +# hw/tpm/tpm_ppi.c
> > > > > > +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
> > > > >
> > > > >
> > > >
> > > >
> > >
> >
> >
> > --
> > Marc-André Lureau
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
--
Marc-André Lureau
* Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> Hi
>
> On Thu, Sep 6, 2018 at 12:59 PM Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > Hi
> > >
> > > On Thu, Sep 6, 2018 at 11:58 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > >
> > > > On Thu, 6 Sep 2018 07:50:09 +0400
> > > > Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> > > >
> > > > > Hi
> > > > >
> > > > > On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > > > >
> > > > > > On Fri, 31 Aug 2018 19:24:24 +0200
> > > > > > Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> > > > > >
> > > > > > > This allows to pass the last failing test from the Windows HLK TPM 2.0
> > > > > > > TCG PPI 1.3 tests.
> > > > > > >
> > > > > > > The interface is described in the "TCG Platform Reset Attack
> > > > > > > Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> > > > > > > to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> > > > > > > it in qemu instead.
> > > > > > >
> > > > > > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > > > > > ---
> > > > > > > hw/tpm/tpm_ppi.h | 2 ++
> > > > > > > hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> > > > > > > hw/tpm/tpm_crb.c | 1 +
> > > > > > > hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> > > > > > > hw/tpm/tpm_tis.c | 1 +
> > > > > > > docs/specs/tpm.txt | 2 ++
> > > > > > > hw/tpm/trace-events | 3 +++
> > > > > > > 7 files changed, 78 insertions(+)
> > > > > > >
> > > > > > > diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> > > > > > > index f6458bf87e..3239751e9f 100644
> > > > > > > --- a/hw/tpm/tpm_ppi.h
> > > > > > > +++ b/hw/tpm/tpm_ppi.h
> > > > > > > @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> > > > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > > hwaddr addr, Object *obj, Error **errp);
> > > > > > >
> > > > > > > +void tpm_ppi_reset(TPMPPI *tpmppi);
> > > > > > > +
> > > > > > > #endif /* TPM_TPM_PPI_H */
> > > > > > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > > > > > index c5e9a6e11d..2ab3e8fae7 100644
> > > > > > > --- a/hw/i386/acpi-build.c
> > > > > > > +++ b/hw/i386/acpi-build.c
> > > > > > > @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > > > pprq = aml_name("PPRQ");
> > > > > > > pprm = aml_name("PPRM");
> > > > > > >
> > > > > > > + aml_append(dev,
> > > > > > > + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> > > > > > > + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> > > > > > > + 0x1));
> > > > > > > + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> > > > > > > + aml_append(field, aml_named_field("MOVV", 8));
> > > > > > > + aml_append(dev, field);
> > > > > > > /*
> > > > > > > * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> > > > > > > * operation region inside of a method for getting FUNC[op].
> > > > > > > @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > > > aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> > > > > > > }
> > > > > > > aml_append(method, ifctx);
> > > > > > > +
> > > > > > > + ifctx = aml_if(
> > > > > > > + aml_equal(uuid,
> > > > > > > + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> > > > > > > + {
> > > > > > > + /* standard DSM query function */
> > > > > > > + ifctx2 = aml_if(aml_equal(function, zero));
> > > > > > > + {
> > > > > > > + uint8_t byte_list[1] = { 0x03 };
> > > > > > > + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> > > > > > > + }
> > > > > > > + aml_append(ifctx, ifctx2);
> > > > > > > +
> > > > > > > + /*
> > > > > > > + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> > > > > > > + *
> > > > > > > + * Arg 2 (Integer): Function Index = 1
> > > > > > > + * Arg 3 (Package): Arguments = Package: Type: Integer
> > > > > > > + * Operation Value of the Request
> > > > > > > + * Returns: Type: Integer
> > > > > > > + * 0: Success
> > > > > > > + * 1: General Failure
> > > > > > > + */
> > > > > > > + ifctx2 = aml_if(aml_equal(function, one));
> > > > > > > + {
> > > > > > > + aml_append(ifctx2,
> > > > > > > + aml_store(aml_derefof(aml_index(arguments, zero)),
> > > > > > > + op));
> > > > > > > + {
> > > > > > > + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> > > > > > > +
> > > > > > > + /* 0: success */
> > > > > > > + aml_append(ifctx2, aml_return(zero));
> > > > > > > + }
> > > > > > > + }
> > > > > > > + aml_append(ifctx, ifctx2);
> > > > > > > + }
> > > > > > > + aml_append(method, ifctx);
> > > > > > > }
> > > > > > > +
> > > > > > > aml_append(dev, method);
> > > > > > > }
> > > > > > >
> > > > > > > diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> > > > > > > index b243222fd6..48f6a716ad 100644
> > > > > > > --- a/hw/tpm/tpm_crb.c
> > > > > > > +++ b/hw/tpm/tpm_crb.c
> > > > > > > @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> > > > > > > {
> > > > > > > CRBState *s = CRB(dev);
> > > > > > >
> > > > > > > + tpm_ppi_reset(&s->ppi);
> > > > > > > tpm_backend_reset(s->tpmbe);
> > > > > > >
> > > > > > > memset(s->regs, 0, sizeof(s->regs));
> > > > > > > diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> > > > > > > index 8b46b9dd4b..ce43bc5729 100644
> > > > > > > --- a/hw/tpm/tpm_ppi.c
> > > > > > > +++ b/hw/tpm/tpm_ppi.c
> > > > > > > @@ -16,8 +16,30 @@
> > > > > > > #include "qapi/error.h"
> > > > > > > #include "cpu.h"
> > > > > > > #include "sysemu/memory_mapping.h"
> > > > > > > +#include "sysemu/reset.h"
> > > > > > > #include "migration/vmstate.h"
> > > > > > > #include "tpm_ppi.h"
> > > > > > > +#include "trace.h"
> > > > > > > +
> > > > > > > +void tpm_ppi_reset(TPMPPI *tpmppi)
> > > > > > > +{
> > > > > >
> > > > > >
> > > > > > > + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> > > > > > nvdimm seems to use cpu_physical_memory_read() to access guest
> > > > > > accessible memory, so question is what's difference?
> > > > >
> > > > > cpu_physical_memory_read() is higher level, doing dispatch on address
> > > > > and length checks.
> > > > >
> > > > > This is a bit unnecessary, as ppi->buf could be accessed directly.
> > > > [...]
> > > > > > > + memset(block->host_addr, 0,
> > > > > > > + block->target_end - block->target_start);
> > > > > > > + }
> > > > my concern here is that if we directly touch guest memory here
> > > > we might get in trouble on migration without dirtying modified
> > > > ranges
> > >
> > > It is a read-only of one byte.
> > > by the time the reset handler is called, the memory must have been
> > > already migrated.
> >
> > Looks like a write to me?
>
> the PPI RAM memory is read for the "memory clear" byte
> The whole guest RAM is reset to 0 if set.
Oh, I see; hmm.
How do you avoid zeroing things like persistent memory? Or ROMs? Or EFI
pflash?
> > Also, don't forget that a guest reset can happen during a migration.
>
> Hmm, does cpu_physical_memory_read() guarantee the memory has been migrated?
> Is there a way to wait for migration to be completed in a reset handler?
No; remember that migration can take a significant amount of time (many
minutes) as you stuff many GB of RAM down a network.
So you can be in the situation where:
a) Migration starts
b) Migration sends a copy of most of RAM across
c) Guest dirties lots of RAM in parallel with b
d) migration sends some of the RAM again
e) guest reboots
f) migration keeps sending ram across
g) Migration finally completes and starts on destination
a-f are all happening on the source side as the guest is still running
and doing whatever it wants (including reboots).
Given something like acpi-build.c's acpi_ram_update's call to
memory_region_set_dirty, would that work for you?
Dave
> >
> > Dave
> >
> > > >
> > > > PS:
> > > > feel free it ignore since I don't have a clue what I'm talking about :)
> > > >
> > > > > > > + guest_phys_blocks_free(&guest_phys_blocks);
> > > > > > > + }
> > > > > > > +}
> > > > > > >
> > > > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > > hwaddr addr, Object *obj, Error **errp)
> > > > > > > @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > > vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
> > > > > > >
> > > > > > > memory_region_add_subregion(m, addr, &tpmppi->ram);
> > > > > > > +
> > > > > > > return true;
> > > > > > > }
> > > > > > > diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> > > > > > > index 70432ffe8b..d9bfa956cc 100644
> > > > > > > --- a/hw/tpm/tpm_tis.c
> > > > > > > +++ b/hw/tpm/tpm_tis.c
> > > > > > > @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> > > > > > > s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> > > > > > > TPM_TIS_BUFFER_MAX);
> > > > > > >
> > > > > > > + tpm_ppi_reset(&s->ppi);
> > > > > > > tpm_backend_reset(s->be_driver);
> > > > > > >
> > > > > > > s->active_locty = TPM_TIS_NO_LOCALITY;
> > > > > > > diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> > > > > > > index 332c2ae597..ce9bda3c89 100644
> > > > > > > --- a/docs/specs/tpm.txt
> > > > > > > +++ b/docs/specs/tpm.txt
> > > > > > > @@ -121,6 +121,8 @@ layout:
> > > > > > > +----------+--------+--------+-------------------------------------------+
> > > > > > > | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> > > > > > > | | | | firmware. Used by firmware. |
> > > > > > > + +----------+--------+--------+-------------------------------------------+
> > > > > > > + | movv | 0x1 | 0x15a | Memory overwrite variable |
> > > > > > > +----------+--------+--------+-------------------------------------------+
> > > > > > >
> > > > > > > The following values are supported for the 'func' field. They correspond
> > > > > > > diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> > > > > > > index 25bee0cecf..920d32ad55 100644
> > > > > > > --- a/hw/tpm/trace-events
> > > > > > > +++ b/hw/tpm/trace-events
> > > > > > > @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> > > > > > > tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> > > > > > > tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> > > > > > > tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> > > > > > > +
> > > > > > > +# hw/tpm/tpm_ppi.c
> > > > > > > +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
> > > > > >
> > > > > >
> > > > >
> > > > >
> > > >
> > >
> > >
> > > --
> > > Marc-André Lureau
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
>
>
> --
> Marc-André Lureau
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Hi
On Thu, Sep 6, 2018 at 1:42 PM Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > Hi
> >
> > On Thu, Sep 6, 2018 at 12:59 PM Dr. David Alan Gilbert
> > <dgilbert@redhat.com> wrote:
> > >
> > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > > Hi
> > > >
> > > > On Thu, Sep 6, 2018 at 11:58 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > > >
> > > > > On Thu, 6 Sep 2018 07:50:09 +0400
> > > > > Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> > > > >
> > > > > > Hi
> > > > > >
> > > > > > On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > > > > >
> > > > > > > On Fri, 31 Aug 2018 19:24:24 +0200
> > > > > > > Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> > > > > > >
> > > > > > > > This allows to pass the last failing test from the Windows HLK TPM 2.0
> > > > > > > > TCG PPI 1.3 tests.
> > > > > > > >
> > > > > > > > The interface is described in the "TCG Platform Reset Attack
> > > > > > > > Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> > > > > > > > to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> > > > > > > > it in qemu instead.
> > > > > > > >
> > > > > > > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > > > > > > ---
> > > > > > > > hw/tpm/tpm_ppi.h | 2 ++
> > > > > > > > hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> > > > > > > > hw/tpm/tpm_crb.c | 1 +
> > > > > > > > hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> > > > > > > > hw/tpm/tpm_tis.c | 1 +
> > > > > > > > docs/specs/tpm.txt | 2 ++
> > > > > > > > hw/tpm/trace-events | 3 +++
> > > > > > > > 7 files changed, 78 insertions(+)
> > > > > > > >
> > > > > > > > diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> > > > > > > > index f6458bf87e..3239751e9f 100644
> > > > > > > > --- a/hw/tpm/tpm_ppi.h
> > > > > > > > +++ b/hw/tpm/tpm_ppi.h
> > > > > > > > @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> > > > > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > > > hwaddr addr, Object *obj, Error **errp);
> > > > > > > >
> > > > > > > > +void tpm_ppi_reset(TPMPPI *tpmppi);
> > > > > > > > +
> > > > > > > > #endif /* TPM_TPM_PPI_H */
> > > > > > > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > > > > > > index c5e9a6e11d..2ab3e8fae7 100644
> > > > > > > > --- a/hw/i386/acpi-build.c
> > > > > > > > +++ b/hw/i386/acpi-build.c
> > > > > > > > @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > > > > pprq = aml_name("PPRQ");
> > > > > > > > pprm = aml_name("PPRM");
> > > > > > > >
> > > > > > > > + aml_append(dev,
> > > > > > > > + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> > > > > > > > + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> > > > > > > > + 0x1));
> > > > > > > > + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> > > > > > > > + aml_append(field, aml_named_field("MOVV", 8));
> > > > > > > > + aml_append(dev, field);
> > > > > > > > /*
> > > > > > > > * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> > > > > > > > * operation region inside of a method for getting FUNC[op].
> > > > > > > > @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > > > > aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> > > > > > > > }
> > > > > > > > aml_append(method, ifctx);
> > > > > > > > +
> > > > > > > > + ifctx = aml_if(
> > > > > > > > + aml_equal(uuid,
> > > > > > > > + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> > > > > > > > + {
> > > > > > > > + /* standard DSM query function */
> > > > > > > > + ifctx2 = aml_if(aml_equal(function, zero));
> > > > > > > > + {
> > > > > > > > + uint8_t byte_list[1] = { 0x03 };
> > > > > > > > + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> > > > > > > > + }
> > > > > > > > + aml_append(ifctx, ifctx2);
> > > > > > > > +
> > > > > > > > + /*
> > > > > > > > + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> > > > > > > > + *
> > > > > > > > + * Arg 2 (Integer): Function Index = 1
> > > > > > > > + * Arg 3 (Package): Arguments = Package: Type: Integer
> > > > > > > > + * Operation Value of the Request
> > > > > > > > + * Returns: Type: Integer
> > > > > > > > + * 0: Success
> > > > > > > > + * 1: General Failure
> > > > > > > > + */
> > > > > > > > + ifctx2 = aml_if(aml_equal(function, one));
> > > > > > > > + {
> > > > > > > > + aml_append(ifctx2,
> > > > > > > > + aml_store(aml_derefof(aml_index(arguments, zero)),
> > > > > > > > + op));
> > > > > > > > + {
> > > > > > > > + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> > > > > > > > +
> > > > > > > > + /* 0: success */
> > > > > > > > + aml_append(ifctx2, aml_return(zero));
> > > > > > > > + }
> > > > > > > > + }
> > > > > > > > + aml_append(ifctx, ifctx2);
> > > > > > > > + }
> > > > > > > > + aml_append(method, ifctx);
> > > > > > > > }
> > > > > > > > +
> > > > > > > > aml_append(dev, method);
> > > > > > > > }
> > > > > > > >
> > > > > > > > diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> > > > > > > > index b243222fd6..48f6a716ad 100644
> > > > > > > > --- a/hw/tpm/tpm_crb.c
> > > > > > > > +++ b/hw/tpm/tpm_crb.c
> > > > > > > > @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> > > > > > > > {
> > > > > > > > CRBState *s = CRB(dev);
> > > > > > > >
> > > > > > > > + tpm_ppi_reset(&s->ppi);
> > > > > > > > tpm_backend_reset(s->tpmbe);
> > > > > > > >
> > > > > > > > memset(s->regs, 0, sizeof(s->regs));
> > > > > > > > diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> > > > > > > > index 8b46b9dd4b..ce43bc5729 100644
> > > > > > > > --- a/hw/tpm/tpm_ppi.c
> > > > > > > > +++ b/hw/tpm/tpm_ppi.c
> > > > > > > > @@ -16,8 +16,30 @@
> > > > > > > > #include "qapi/error.h"
> > > > > > > > #include "cpu.h"
> > > > > > > > #include "sysemu/memory_mapping.h"
> > > > > > > > +#include "sysemu/reset.h"
> > > > > > > > #include "migration/vmstate.h"
> > > > > > > > #include "tpm_ppi.h"
> > > > > > > > +#include "trace.h"
> > > > > > > > +
> > > > > > > > +void tpm_ppi_reset(TPMPPI *tpmppi)
> > > > > > > > +{
> > > > > > >
> > > > > > >
> > > > > > > > + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> > > > > > > nvdimm seems to use cpu_physical_memory_read() to access guest
> > > > > > > accessible memory, so question is what's difference?
> > > > > >
> > > > > > cpu_physical_memory_read() is higher level, doing dispatch on address
> > > > > > and length checks.
> > > > > >
> > > > > > This is a bit unnecessary, as ppi->buf could be accessed directly.
> > > > > [...]
> > > > > > > > + memset(block->host_addr, 0,
> > > > > > > > + block->target_end - block->target_start);
> > > > > > > > + }
> > > > > my concern here is that if we directly touch guest memory here
> > > > > we might get in trouble on migration without dirtying modified
> > > > > ranges
> > > >
> > > > It is a read-only of one byte.
> > > > by the time the reset handler is called, the memory must have been
> > > > already migrated.
> > >
> > > Looks like a write to me?
> >
> > the PPI RAM memory is read for the "memory clear" byte
> > The whole guest RAM is reset to 0 if set.
>
> Oh, I see; hmm.
> How do you avoid zeroing things like persistent memory? Or ROMs? Or EFI
> pflash?
guest_phys_blocks_append() only cares about RAM (see
guest_phys_blocks_region_add)
>
> > > Also, don't forget that a guest reset can happen during a migration.
> >
> > Hmm, does cpu_physical_memory_read() guarantee the memory has been migrated?
> > Is there a way to wait for migration to be completed in a reset handler?
>
> No; remember that migration can take a significant amount of time (many
> minutes) as you stuff many GB of RAM down a network.
>
> So you can be in the situation where:
> a) Migration starts
> b) Migration sends a copy of most of RAM across
> c) Guest dirties lots of RAM in parallel with b
> d) migration sends some of the RAM again
> e) guest reboots
> f) migration keeps sending ram across
> g) Migration finally completes and starts on destination
>
> a-f are all happening on the source side as the guest is still running
> and doing whatever it wants (including reboots).
>
> Given something like acpi-build.c's acpi_ram_update's call to
> memory_region_set_dirty, would that work for you?
after the memset(), it should then call:
memory_region_set_dirty(block->mr, 0, block->target_end - block->target_start);
looks about right?
thanks
> Dave
>
> > >
> > > Dave
> > >
> > > > >
> > > > > PS:
> > > > > feel free it ignore since I don't have a clue what I'm talking about :)
> > > > >
> > > > > > > > + guest_phys_blocks_free(&guest_phys_blocks);
> > > > > > > > + }
> > > > > > > > +}
> > > > > > > >
> > > > > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > > > hwaddr addr, Object *obj, Error **errp)
> > > > > > > > @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > > > vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
> > > > > > > >
> > > > > > > > memory_region_add_subregion(m, addr, &tpmppi->ram);
> > > > > > > > +
> > > > > > > > return true;
> > > > > > > > }
> > > > > > > > diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> > > > > > > > index 70432ffe8b..d9bfa956cc 100644
> > > > > > > > --- a/hw/tpm/tpm_tis.c
> > > > > > > > +++ b/hw/tpm/tpm_tis.c
> > > > > > > > @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> > > > > > > > s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> > > > > > > > TPM_TIS_BUFFER_MAX);
> > > > > > > >
> > > > > > > > + tpm_ppi_reset(&s->ppi);
> > > > > > > > tpm_backend_reset(s->be_driver);
> > > > > > > >
> > > > > > > > s->active_locty = TPM_TIS_NO_LOCALITY;
> > > > > > > > diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> > > > > > > > index 332c2ae597..ce9bda3c89 100644
> > > > > > > > --- a/docs/specs/tpm.txt
> > > > > > > > +++ b/docs/specs/tpm.txt
> > > > > > > > @@ -121,6 +121,8 @@ layout:
> > > > > > > > +----------+--------+--------+-------------------------------------------+
> > > > > > > > | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> > > > > > > > | | | | firmware. Used by firmware. |
> > > > > > > > + +----------+--------+--------+-------------------------------------------+
> > > > > > > > + | movv | 0x1 | 0x15a | Memory overwrite variable |
> > > > > > > > +----------+--------+--------+-------------------------------------------+
> > > > > > > >
> > > > > > > > The following values are supported for the 'func' field. They correspond
> > > > > > > > diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> > > > > > > > index 25bee0cecf..920d32ad55 100644
> > > > > > > > --- a/hw/tpm/trace-events
> > > > > > > > +++ b/hw/tpm/trace-events
> > > > > > > > @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> > > > > > > > tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> > > > > > > > tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> > > > > > > > tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> > > > > > > > +
> > > > > > > > +# hw/tpm/tpm_ppi.c
> > > > > > > > +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
> > > > > > >
> > > > > > >
> > > > > >
> > > > > >
> > > > >
> > > >
> > > >
> > > > --
> > > > Marc-André Lureau
> > > --
> > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >
> >
> >
> > --
> > Marc-André Lureau
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
--
Marc-André Lureau
* Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> Hi
>
> On Thu, Sep 6, 2018 at 1:42 PM Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > Hi
> > >
> > > On Thu, Sep 6, 2018 at 12:59 PM Dr. David Alan Gilbert
> > > <dgilbert@redhat.com> wrote:
> > > >
> > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > > > Hi
> > > > >
> > > > > On Thu, Sep 6, 2018 at 11:58 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, 6 Sep 2018 07:50:09 +0400
> > > > > > Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> > > > > >
> > > > > > > Hi
> > > > > > >
> > > > > > > On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Fri, 31 Aug 2018 19:24:24 +0200
> > > > > > > > Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> > > > > > > >
> > > > > > > > > This allows to pass the last failing test from the Windows HLK TPM 2.0
> > > > > > > > > TCG PPI 1.3 tests.
> > > > > > > > >
> > > > > > > > > The interface is described in the "TCG Platform Reset Attack
> > > > > > > > > Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> > > > > > > > > to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> > > > > > > > > it in qemu instead.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > > > > > > > ---
> > > > > > > > > hw/tpm/tpm_ppi.h | 2 ++
> > > > > > > > > hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> > > > > > > > > hw/tpm/tpm_crb.c | 1 +
> > > > > > > > > hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> > > > > > > > > hw/tpm/tpm_tis.c | 1 +
> > > > > > > > > docs/specs/tpm.txt | 2 ++
> > > > > > > > > hw/tpm/trace-events | 3 +++
> > > > > > > > > 7 files changed, 78 insertions(+)
> > > > > > > > >
> > > > > > > > > diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> > > > > > > > > index f6458bf87e..3239751e9f 100644
> > > > > > > > > --- a/hw/tpm/tpm_ppi.h
> > > > > > > > > +++ b/hw/tpm/tpm_ppi.h
> > > > > > > > > @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> > > > > > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > > > > hwaddr addr, Object *obj, Error **errp);
> > > > > > > > >
> > > > > > > > > +void tpm_ppi_reset(TPMPPI *tpmppi);
> > > > > > > > > +
> > > > > > > > > #endif /* TPM_TPM_PPI_H */
> > > > > > > > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > > > > > > > index c5e9a6e11d..2ab3e8fae7 100644
> > > > > > > > > --- a/hw/i386/acpi-build.c
> > > > > > > > > +++ b/hw/i386/acpi-build.c
> > > > > > > > > @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > > > > > pprq = aml_name("PPRQ");
> > > > > > > > > pprm = aml_name("PPRM");
> > > > > > > > >
> > > > > > > > > + aml_append(dev,
> > > > > > > > > + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> > > > > > > > > + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> > > > > > > > > + 0x1));
> > > > > > > > > + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> > > > > > > > > + aml_append(field, aml_named_field("MOVV", 8));
> > > > > > > > > + aml_append(dev, field);
> > > > > > > > > /*
> > > > > > > > > * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> > > > > > > > > * operation region inside of a method for getting FUNC[op].
> > > > > > > > > @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > > > > > aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> > > > > > > > > }
> > > > > > > > > aml_append(method, ifctx);
> > > > > > > > > +
> > > > > > > > > + ifctx = aml_if(
> > > > > > > > > + aml_equal(uuid,
> > > > > > > > > + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> > > > > > > > > + {
> > > > > > > > > + /* standard DSM query function */
> > > > > > > > > + ifctx2 = aml_if(aml_equal(function, zero));
> > > > > > > > > + {
> > > > > > > > > + uint8_t byte_list[1] = { 0x03 };
> > > > > > > > > + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> > > > > > > > > + }
> > > > > > > > > + aml_append(ifctx, ifctx2);
> > > > > > > > > +
> > > > > > > > > + /*
> > > > > > > > > + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> > > > > > > > > + *
> > > > > > > > > + * Arg 2 (Integer): Function Index = 1
> > > > > > > > > + * Arg 3 (Package): Arguments = Package: Type: Integer
> > > > > > > > > + * Operation Value of the Request
> > > > > > > > > + * Returns: Type: Integer
> > > > > > > > > + * 0: Success
> > > > > > > > > + * 1: General Failure
> > > > > > > > > + */
> > > > > > > > > + ifctx2 = aml_if(aml_equal(function, one));
> > > > > > > > > + {
> > > > > > > > > + aml_append(ifctx2,
> > > > > > > > > + aml_store(aml_derefof(aml_index(arguments, zero)),
> > > > > > > > > + op));
> > > > > > > > > + {
> > > > > > > > > + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> > > > > > > > > +
> > > > > > > > > + /* 0: success */
> > > > > > > > > + aml_append(ifctx2, aml_return(zero));
> > > > > > > > > + }
> > > > > > > > > + }
> > > > > > > > > + aml_append(ifctx, ifctx2);
> > > > > > > > > + }
> > > > > > > > > + aml_append(method, ifctx);
> > > > > > > > > }
> > > > > > > > > +
> > > > > > > > > aml_append(dev, method);
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> > > > > > > > > index b243222fd6..48f6a716ad 100644
> > > > > > > > > --- a/hw/tpm/tpm_crb.c
> > > > > > > > > +++ b/hw/tpm/tpm_crb.c
> > > > > > > > > @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> > > > > > > > > {
> > > > > > > > > CRBState *s = CRB(dev);
> > > > > > > > >
> > > > > > > > > + tpm_ppi_reset(&s->ppi);
> > > > > > > > > tpm_backend_reset(s->tpmbe);
> > > > > > > > >
> > > > > > > > > memset(s->regs, 0, sizeof(s->regs));
> > > > > > > > > diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> > > > > > > > > index 8b46b9dd4b..ce43bc5729 100644
> > > > > > > > > --- a/hw/tpm/tpm_ppi.c
> > > > > > > > > +++ b/hw/tpm/tpm_ppi.c
> > > > > > > > > @@ -16,8 +16,30 @@
> > > > > > > > > #include "qapi/error.h"
> > > > > > > > > #include "cpu.h"
> > > > > > > > > #include "sysemu/memory_mapping.h"
> > > > > > > > > +#include "sysemu/reset.h"
> > > > > > > > > #include "migration/vmstate.h"
> > > > > > > > > #include "tpm_ppi.h"
> > > > > > > > > +#include "trace.h"
> > > > > > > > > +
> > > > > > > > > +void tpm_ppi_reset(TPMPPI *tpmppi)
> > > > > > > > > +{
> > > > > > > >
> > > > > > > >
> > > > > > > > > + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> > > > > > > > nvdimm seems to use cpu_physical_memory_read() to access guest
> > > > > > > > accessible memory, so question is what's difference?
> > > > > > >
> > > > > > > cpu_physical_memory_read() is higher level, doing dispatch on address
> > > > > > > and length checks.
> > > > > > >
> > > > > > > This is a bit unnecessary, as ppi->buf could be accessed directly.
> > > > > > [...]
> > > > > > > > > + memset(block->host_addr, 0,
> > > > > > > > > + block->target_end - block->target_start);
> > > > > > > > > + }
> > > > > > my concern here is that if we directly touch guest memory here
> > > > > > we might get in trouble on migration without dirtying modified
> > > > > > ranges
> > > > >
> > > > > It is a read-only of one byte.
> > > > > by the time the reset handler is called, the memory must have been
> > > > > already migrated.
> > > >
> > > > Looks like a write to me?
> > >
> > > the PPI RAM memory is read for the "memory clear" byte
> > > The whole guest RAM is reset to 0 if set.
> >
> > Oh, I see; hmm.
> > How do you avoid zeroing things like persistent memory? Or ROMs? Or EFI
> > pflash?
>
> guest_phys_blocks_append() only cares about RAM (see
> guest_phys_blocks_region_add)
Hmm, promising; it uses:
if (!memory_region_is_ram(section->mr) ||
memory_region_is_ram_device(section->mr)) {
return;
}
so ram_device is used by vfio and vhost-user; I don't see anything else.
pflash init's as a rom_device so that's probably OK.
But things like backends/hostmem-file.c just use
memory_region_init_ram_from_file even if they're shared or PMEM.
So, I think this would wipe an attached PMEM device - do you want to or
not?
> >
> > > > Also, don't forget that a guest reset can happen during a migration.
> > >
> > > Hmm, does cpu_physical_memory_read() guarantee the memory has been migrated?
> > > Is there a way to wait for migration to be completed in a reset handler?
> >
> > No; remember that migration can take a significant amount of time (many
> > minutes) as you stuff many GB of RAM down a network.
> >
> > So you can be in the situation where:
> > a) Migration starts
> > b) Migration sends a copy of most of RAM across
> > c) Guest dirties lots of RAM in parallel with b
> > d) migration sends some of the RAM again
> > e) guest reboots
> > f) migration keeps sending ram across
> > g) Migration finally completes and starts on destination
> >
> > a-f are all happening on the source side as the guest is still running
> > and doing whatever it wants (including reboots).
> >
> > Given something like acpi-build.c's acpi_ram_update's call to
> > memory_region_set_dirty, would that work for you?
>
> after the memset(), it should then call:
>
> memory_region_set_dirty(block->mr, 0, block->target_end - block->target_start);
>
> looks about right?
I think so.
Dave
>
> thanks
>
> > Dave
> >
> > > >
> > > > Dave
> > > >
> > > > > >
> > > > > > PS:
> > > > > > feel free it ignore since I don't have a clue what I'm talking about :)
> > > > > >
> > > > > > > > > + guest_phys_blocks_free(&guest_phys_blocks);
> > > > > > > > > + }
> > > > > > > > > +}
> > > > > > > > >
> > > > > > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > > > > hwaddr addr, Object *obj, Error **errp)
> > > > > > > > > @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > > > > > vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
> > > > > > > > >
> > > > > > > > > memory_region_add_subregion(m, addr, &tpmppi->ram);
> > > > > > > > > +
> > > > > > > > > return true;
> > > > > > > > > }
> > > > > > > > > diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> > > > > > > > > index 70432ffe8b..d9bfa956cc 100644
> > > > > > > > > --- a/hw/tpm/tpm_tis.c
> > > > > > > > > +++ b/hw/tpm/tpm_tis.c
> > > > > > > > > @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> > > > > > > > > s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> > > > > > > > > TPM_TIS_BUFFER_MAX);
> > > > > > > > >
> > > > > > > > > + tpm_ppi_reset(&s->ppi);
> > > > > > > > > tpm_backend_reset(s->be_driver);
> > > > > > > > >
> > > > > > > > > s->active_locty = TPM_TIS_NO_LOCALITY;
> > > > > > > > > diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> > > > > > > > > index 332c2ae597..ce9bda3c89 100644
> > > > > > > > > --- a/docs/specs/tpm.txt
> > > > > > > > > +++ b/docs/specs/tpm.txt
> > > > > > > > > @@ -121,6 +121,8 @@ layout:
> > > > > > > > > +----------+--------+--------+-------------------------------------------+
> > > > > > > > > | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> > > > > > > > > | | | | firmware. Used by firmware. |
> > > > > > > > > + +----------+--------+--------+-------------------------------------------+
> > > > > > > > > + | movv | 0x1 | 0x15a | Memory overwrite variable |
> > > > > > > > > +----------+--------+--------+-------------------------------------------+
> > > > > > > > >
> > > > > > > > > The following values are supported for the 'func' field. They correspond
> > > > > > > > > diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> > > > > > > > > index 25bee0cecf..920d32ad55 100644
> > > > > > > > > --- a/hw/tpm/trace-events
> > > > > > > > > +++ b/hw/tpm/trace-events
> > > > > > > > > @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> > > > > > > > > tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> > > > > > > > > tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> > > > > > > > > tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> > > > > > > > > +
> > > > > > > > > +# hw/tpm/tpm_ppi.c
> > > > > > > > > +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
> > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > > >
> > > > > --
> > > > > Marc-André Lureau
> > > > --
> > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > >
> > >
> > >
> > > --
> > > Marc-André Lureau
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
>
>
> --
> Marc-André Lureau
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 09/06/18 19:23, Dr. David Alan Gilbert wrote:
> * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
>> Hi
>>
>> On Thu, Sep 6, 2018 at 1:42 PM Dr. David Alan Gilbert
>> <dgilbert@redhat.com> wrote:
>>>
>>> * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
>>>> Hi
>>>>
>>>> On Thu, Sep 6, 2018 at 12:59 PM Dr. David Alan Gilbert
>>>> <dgilbert@redhat.com> wrote:
>>>>>
>>>>> * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
>>>>>> Hi
>>>>>>
>>>>>> On Thu, Sep 6, 2018 at 11:58 AM Igor Mammedov <imammedo@redhat.com> wrote:
>>>>>>>
>>>>>>> On Thu, 6 Sep 2018 07:50:09 +0400
>>>>>>> Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
>>>>>>>
>>>>>>>> Hi
>>>>>>>>
>>>>>>>> On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
>>>>>>>>>
>>>>>>>>> On Fri, 31 Aug 2018 19:24:24 +0200
>>>>>>>>> Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
>>>>>>>>>
>>>>>>>>>> This allows to pass the last failing test from the Windows HLK TPM 2.0
>>>>>>>>>> TCG PPI 1.3 tests.
>>>>>>>>>>
>>>>>>>>>> The interface is described in the "TCG Platform Reset Attack
>>>>>>>>>> Mitigation Specification", chapter 6 "ACPI _DSM Function". According
>>>>>>>>>> to Laszlo, it's not so easy to implement in OVMF, he suggested to do
>>>>>>>>>> it in qemu instead.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
>>>>>>>>>> ---
>>>>>>>>>> hw/tpm/tpm_ppi.h | 2 ++
>>>>>>>>>> hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>>> hw/tpm/tpm_crb.c | 1 +
>>>>>>>>>> hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
>>>>>>>>>> hw/tpm/tpm_tis.c | 1 +
>>>>>>>>>> docs/specs/tpm.txt | 2 ++
>>>>>>>>>> hw/tpm/trace-events | 3 +++
>>>>>>>>>> 7 files changed, 78 insertions(+)
>>>>>>>>>>
>>>>>>>>>> diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
>>>>>>>>>> index f6458bf87e..3239751e9f 100644
>>>>>>>>>> --- a/hw/tpm/tpm_ppi.h
>>>>>>>>>> +++ b/hw/tpm/tpm_ppi.h
>>>>>>>>>> @@ -23,4 +23,6 @@ typedef struct TPMPPI {
>>>>>>>>>> bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
>>>>>>>>>> hwaddr addr, Object *obj, Error **errp);
>>>>>>>>>>
>>>>>>>>>> +void tpm_ppi_reset(TPMPPI *tpmppi);
>>>>>>>>>> +
>>>>>>>>>> #endif /* TPM_TPM_PPI_H */
>>>>>>>>>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>>>>>>>>>> index c5e9a6e11d..2ab3e8fae7 100644
>>>>>>>>>> --- a/hw/i386/acpi-build.c
>>>>>>>>>> +++ b/hw/i386/acpi-build.c
>>>>>>>>>> @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
>>>>>>>>>> pprq = aml_name("PPRQ");
>>>>>>>>>> pprm = aml_name("PPRM");
>>>>>>>>>>
>>>>>>>>>> + aml_append(dev,
>>>>>>>>>> + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
>>>>>>>>>> + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
>>>>>>>>>> + 0x1));
>>>>>>>>>> + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
>>>>>>>>>> + aml_append(field, aml_named_field("MOVV", 8));
>>>>>>>>>> + aml_append(dev, field);
>>>>>>>>>> /*
>>>>>>>>>> * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
>>>>>>>>>> * operation region inside of a method for getting FUNC[op].
>>>>>>>>>> @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
>>>>>>>>>> aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
>>>>>>>>>> }
>>>>>>>>>> aml_append(method, ifctx);
>>>>>>>>>> +
>>>>>>>>>> + ifctx = aml_if(
>>>>>>>>>> + aml_equal(uuid,
>>>>>>>>>> + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
>>>>>>>>>> + {
>>>>>>>>>> + /* standard DSM query function */
>>>>>>>>>> + ifctx2 = aml_if(aml_equal(function, zero));
>>>>>>>>>> + {
>>>>>>>>>> + uint8_t byte_list[1] = { 0x03 };
>>>>>>>>>> + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
>>>>>>>>>> + }
>>>>>>>>>> + aml_append(ifctx, ifctx2);
>>>>>>>>>> +
>>>>>>>>>> + /*
>>>>>>>>>> + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
>>>>>>>>>> + *
>>>>>>>>>> + * Arg 2 (Integer): Function Index = 1
>>>>>>>>>> + * Arg 3 (Package): Arguments = Package: Type: Integer
>>>>>>>>>> + * Operation Value of the Request
>>>>>>>>>> + * Returns: Type: Integer
>>>>>>>>>> + * 0: Success
>>>>>>>>>> + * 1: General Failure
>>>>>>>>>> + */
>>>>>>>>>> + ifctx2 = aml_if(aml_equal(function, one));
>>>>>>>>>> + {
>>>>>>>>>> + aml_append(ifctx2,
>>>>>>>>>> + aml_store(aml_derefof(aml_index(arguments, zero)),
>>>>>>>>>> + op));
>>>>>>>>>> + {
>>>>>>>>>> + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
>>>>>>>>>> +
>>>>>>>>>> + /* 0: success */
>>>>>>>>>> + aml_append(ifctx2, aml_return(zero));
>>>>>>>>>> + }
>>>>>>>>>> + }
>>>>>>>>>> + aml_append(ifctx, ifctx2);
>>>>>>>>>> + }
>>>>>>>>>> + aml_append(method, ifctx);
>>>>>>>>>> }
>>>>>>>>>> +
>>>>>>>>>> aml_append(dev, method);
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
>>>>>>>>>> index b243222fd6..48f6a716ad 100644
>>>>>>>>>> --- a/hw/tpm/tpm_crb.c
>>>>>>>>>> +++ b/hw/tpm/tpm_crb.c
>>>>>>>>>> @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
>>>>>>>>>> {
>>>>>>>>>> CRBState *s = CRB(dev);
>>>>>>>>>>
>>>>>>>>>> + tpm_ppi_reset(&s->ppi);
>>>>>>>>>> tpm_backend_reset(s->tpmbe);
>>>>>>>>>>
>>>>>>>>>> memset(s->regs, 0, sizeof(s->regs));
>>>>>>>>>> diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
>>>>>>>>>> index 8b46b9dd4b..ce43bc5729 100644
>>>>>>>>>> --- a/hw/tpm/tpm_ppi.c
>>>>>>>>>> +++ b/hw/tpm/tpm_ppi.c
>>>>>>>>>> @@ -16,8 +16,30 @@
>>>>>>>>>> #include "qapi/error.h"
>>>>>>>>>> #include "cpu.h"
>>>>>>>>>> #include "sysemu/memory_mapping.h"
>>>>>>>>>> +#include "sysemu/reset.h"
>>>>>>>>>> #include "migration/vmstate.h"
>>>>>>>>>> #include "tpm_ppi.h"
>>>>>>>>>> +#include "trace.h"
>>>>>>>>>> +
>>>>>>>>>> +void tpm_ppi_reset(TPMPPI *tpmppi)
>>>>>>>>>> +{
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
>>>>>>>>> nvdimm seems to use cpu_physical_memory_read() to access guest
>>>>>>>>> accessible memory, so question is what's difference?
>>>>>>>>
>>>>>>>> cpu_physical_memory_read() is higher level, doing dispatch on address
>>>>>>>> and length checks.
>>>>>>>>
>>>>>>>> This is a bit unnecessary, as ppi->buf could be accessed directly.
>>>>>>> [...]
>>>>>>>>>> + memset(block->host_addr, 0,
>>>>>>>>>> + block->target_end - block->target_start);
>>>>>>>>>> + }
>>>>>>> my concern here is that if we directly touch guest memory here
>>>>>>> we might get in trouble on migration without dirtying modified
>>>>>>> ranges
>>>>>>
>>>>>> It is a read-only of one byte.
>>>>>> by the time the reset handler is called, the memory must have been
>>>>>> already migrated.
>>>>>
>>>>> Looks like a write to me?
>>>>
>>>> the PPI RAM memory is read for the "memory clear" byte
>>>> The whole guest RAM is reset to 0 if set.
>>>
>>> Oh, I see; hmm.
>>> How do you avoid zeroing things like persistent memory? Or ROMs? Or EFI
>>> pflash?
>>
>> guest_phys_blocks_append() only cares about RAM (see
>> guest_phys_blocks_region_add)
>
> Hmm, promising; it uses:
> if (!memory_region_is_ram(section->mr) ||
> memory_region_is_ram_device(section->mr)) {
> return;
> }
>
> so ram_device is used by vfio and vhost-user; I don't see anything else.
> pflash init's as a rom_device so that's probably OK.
> But things like backends/hostmem-file.c just use
> memory_region_init_ram_from_file even if they're shared or PMEM.
> So, I think this would wipe an attached PMEM device - do you want to or
> not?
I think that question could be put, "does the reset attack mitigation
spec recommend / require clearing persistent memory"? I've got no idea.
The reset attack is that the platform is re-set (forcibly, uncleanly)
while the original OS holds some secrets in memory, then the attacker's
OS (or firmware application) is loaded, and those scavenge the
leftovers. Would the original OS keep secrets in PMEM? I don't know.
I guess all address space that a guest OS could consider as "read-write
memory" should be wiped. I think guest_phys_blocks_append() is a good
choice here; originally I wrote that for supporting guest RAM dump; see
commit c5d7f60f06142. Note that the is_ram_device bit was added
separately, see commits e4dc3f5909ab9 and 21e00fa55f3fd -- but that's a
change in the "right" direction here, because it *restricts* what
regions qualify (for dumping, and here for clearing).
>
>>>
>>>>> Also, don't forget that a guest reset can happen during a migration.
>>>>
>>>> Hmm, does cpu_physical_memory_read() guarantee the memory has been migrated?
>>>> Is there a way to wait for migration to be completed in a reset handler?
>>>
>>> No; remember that migration can take a significant amount of time (many
>>> minutes) as you stuff many GB of RAM down a network.
>>>
>>> So you can be in the situation where:
>>> a) Migration starts
>>> b) Migration sends a copy of most of RAM across
>>> c) Guest dirties lots of RAM in parallel with b
>>> d) migration sends some of the RAM again
>>> e) guest reboots
>>> f) migration keeps sending ram across
>>> g) Migration finally completes and starts on destination
>>>
>>> a-f are all happening on the source side as the guest is still running
>>> and doing whatever it wants (including reboots).
>>>
>>> Given something like acpi-build.c's acpi_ram_update's call to
>>> memory_region_set_dirty, would that work for you?
>>
>> after the memset(), it should then call:
>>
>> memory_region_set_dirty(block->mr, 0, block->target_end - block->target_start);
>>
>> looks about right?
>
> I think so.
I'll admit I can't follow the dirtying discussion. But, I'm reminded of
another commit of Dave's, namely 90c647db8d59 ("Fix pflash migration",
2016-04-15). Would the same trick apply here?
(
BTW, I'm sorry about not having following this series closely -- I feel
bad that we can't solve this within the firmware, but we really can't.
The issue is that this memory clearing would have to occur really early
into the firmware, at the latest in the PEI phase.
However, both the 32-bit and the 64-bit variants of OVMF's PEI phase
have access only to the lowest 4GB of the memory address space. Mapping
all RAM (even with a "sliding bank") for clearing it would be a real
mess. To that, add the fact that OVMF's PEI phase executes from RAM (not
from pflash -- in OVMF, only SEC executes from pflash, and it
decompresses the PEI + DXE firmware volumes to RAM), so PEI would have
to clear all RAM *except* the areas its own self occupies, such as code,
global variables (static data), heap, stack, other processor data
structures (IDT, GDT, page tables, ...). And, no gaps inside those
should be left out either, because the previous OS might have left
secrets there...
This is actually easier for firmware that runs on physical hardware; for
two reasons. First, on physical hardware, the PEI phase of the firmware
runs from pflash (it might use CAR, Cache-As-RAM, for a small temporary
r/w storage), so it doesn't have to worry about scribbling over itself.
Second, on physical hardware, the memory controller too has to be booted
up -- the PEI code that does this is all vendors' most closely guarded
secret, and *never* open source --; and when the firmware massages
chipset registers for that, it can use non-architected means to get the
RAM to clear itself.
In comparison, in QEMU/KVM guests, the RAM just *is*. While that's a
huge relief most of the time, in this case, the fact that the RAM can
start out as nonzero, is a big problem. Hence my plea to implement the
feature in QEMU.
)
Thanks,
Laszlo
* Laszlo Ersek (lersek@redhat.com) wrote:
> On 09/06/18 19:23, Dr. David Alan Gilbert wrote:
> > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> >> Hi
> >>
> >> On Thu, Sep 6, 2018 at 1:42 PM Dr. David Alan Gilbert
> >> <dgilbert@redhat.com> wrote:
> >>>
> >>> * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> >>>> Hi
> >>>>
> >>>> On Thu, Sep 6, 2018 at 12:59 PM Dr. David Alan Gilbert
> >>>> <dgilbert@redhat.com> wrote:
> >>>>>
> >>>>> * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> >>>>>> Hi
> >>>>>>
> >>>>>> On Thu, Sep 6, 2018 at 11:58 AM Igor Mammedov <imammedo@redhat.com> wrote:
> >>>>>>>
> >>>>>>> On Thu, 6 Sep 2018 07:50:09 +0400
> >>>>>>> Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> >>>>>>>
> >>>>>>>> Hi
> >>>>>>>>
> >>>>>>>> On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
> >>>>>>>>>
> >>>>>>>>> On Fri, 31 Aug 2018 19:24:24 +0200
> >>>>>>>>> Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> >>>>>>>>>
> >>>>>>>>>> This allows to pass the last failing test from the Windows HLK TPM 2.0
> >>>>>>>>>> TCG PPI 1.3 tests.
> >>>>>>>>>>
> >>>>>>>>>> The interface is described in the "TCG Platform Reset Attack
> >>>>>>>>>> Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> >>>>>>>>>> to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> >>>>>>>>>> it in qemu instead.
> >>>>>>>>>>
> >>>>>>>>>> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> >>>>>>>>>> ---
> >>>>>>>>>> hw/tpm/tpm_ppi.h | 2 ++
> >>>>>>>>>> hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> >>>>>>>>>> hw/tpm/tpm_crb.c | 1 +
> >>>>>>>>>> hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> >>>>>>>>>> hw/tpm/tpm_tis.c | 1 +
> >>>>>>>>>> docs/specs/tpm.txt | 2 ++
> >>>>>>>>>> hw/tpm/trace-events | 3 +++
> >>>>>>>>>> 7 files changed, 78 insertions(+)
> >>>>>>>>>>
> >>>>>>>>>> diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> >>>>>>>>>> index f6458bf87e..3239751e9f 100644
> >>>>>>>>>> --- a/hw/tpm/tpm_ppi.h
> >>>>>>>>>> +++ b/hw/tpm/tpm_ppi.h
> >>>>>>>>>> @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> >>>>>>>>>> bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> >>>>>>>>>> hwaddr addr, Object *obj, Error **errp);
> >>>>>>>>>>
> >>>>>>>>>> +void tpm_ppi_reset(TPMPPI *tpmppi);
> >>>>>>>>>> +
> >>>>>>>>>> #endif /* TPM_TPM_PPI_H */
> >>>>>>>>>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> >>>>>>>>>> index c5e9a6e11d..2ab3e8fae7 100644
> >>>>>>>>>> --- a/hw/i386/acpi-build.c
> >>>>>>>>>> +++ b/hw/i386/acpi-build.c
> >>>>>>>>>> @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> >>>>>>>>>> pprq = aml_name("PPRQ");
> >>>>>>>>>> pprm = aml_name("PPRM");
> >>>>>>>>>>
> >>>>>>>>>> + aml_append(dev,
> >>>>>>>>>> + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> >>>>>>>>>> + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> >>>>>>>>>> + 0x1));
> >>>>>>>>>> + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> >>>>>>>>>> + aml_append(field, aml_named_field("MOVV", 8));
> >>>>>>>>>> + aml_append(dev, field);
> >>>>>>>>>> /*
> >>>>>>>>>> * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> >>>>>>>>>> * operation region inside of a method for getting FUNC[op].
> >>>>>>>>>> @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> >>>>>>>>>> aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> >>>>>>>>>> }
> >>>>>>>>>> aml_append(method, ifctx);
> >>>>>>>>>> +
> >>>>>>>>>> + ifctx = aml_if(
> >>>>>>>>>> + aml_equal(uuid,
> >>>>>>>>>> + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> >>>>>>>>>> + {
> >>>>>>>>>> + /* standard DSM query function */
> >>>>>>>>>> + ifctx2 = aml_if(aml_equal(function, zero));
> >>>>>>>>>> + {
> >>>>>>>>>> + uint8_t byte_list[1] = { 0x03 };
> >>>>>>>>>> + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> >>>>>>>>>> + }
> >>>>>>>>>> + aml_append(ifctx, ifctx2);
> >>>>>>>>>> +
> >>>>>>>>>> + /*
> >>>>>>>>>> + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> >>>>>>>>>> + *
> >>>>>>>>>> + * Arg 2 (Integer): Function Index = 1
> >>>>>>>>>> + * Arg 3 (Package): Arguments = Package: Type: Integer
> >>>>>>>>>> + * Operation Value of the Request
> >>>>>>>>>> + * Returns: Type: Integer
> >>>>>>>>>> + * 0: Success
> >>>>>>>>>> + * 1: General Failure
> >>>>>>>>>> + */
> >>>>>>>>>> + ifctx2 = aml_if(aml_equal(function, one));
> >>>>>>>>>> + {
> >>>>>>>>>> + aml_append(ifctx2,
> >>>>>>>>>> + aml_store(aml_derefof(aml_index(arguments, zero)),
> >>>>>>>>>> + op));
> >>>>>>>>>> + {
> >>>>>>>>>> + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> >>>>>>>>>> +
> >>>>>>>>>> + /* 0: success */
> >>>>>>>>>> + aml_append(ifctx2, aml_return(zero));
> >>>>>>>>>> + }
> >>>>>>>>>> + }
> >>>>>>>>>> + aml_append(ifctx, ifctx2);
> >>>>>>>>>> + }
> >>>>>>>>>> + aml_append(method, ifctx);
> >>>>>>>>>> }
> >>>>>>>>>> +
> >>>>>>>>>> aml_append(dev, method);
> >>>>>>>>>> }
> >>>>>>>>>>
> >>>>>>>>>> diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> >>>>>>>>>> index b243222fd6..48f6a716ad 100644
> >>>>>>>>>> --- a/hw/tpm/tpm_crb.c
> >>>>>>>>>> +++ b/hw/tpm/tpm_crb.c
> >>>>>>>>>> @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> >>>>>>>>>> {
> >>>>>>>>>> CRBState *s = CRB(dev);
> >>>>>>>>>>
> >>>>>>>>>> + tpm_ppi_reset(&s->ppi);
> >>>>>>>>>> tpm_backend_reset(s->tpmbe);
> >>>>>>>>>>
> >>>>>>>>>> memset(s->regs, 0, sizeof(s->regs));
> >>>>>>>>>> diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> >>>>>>>>>> index 8b46b9dd4b..ce43bc5729 100644
> >>>>>>>>>> --- a/hw/tpm/tpm_ppi.c
> >>>>>>>>>> +++ b/hw/tpm/tpm_ppi.c
> >>>>>>>>>> @@ -16,8 +16,30 @@
> >>>>>>>>>> #include "qapi/error.h"
> >>>>>>>>>> #include "cpu.h"
> >>>>>>>>>> #include "sysemu/memory_mapping.h"
> >>>>>>>>>> +#include "sysemu/reset.h"
> >>>>>>>>>> #include "migration/vmstate.h"
> >>>>>>>>>> #include "tpm_ppi.h"
> >>>>>>>>>> +#include "trace.h"
> >>>>>>>>>> +
> >>>>>>>>>> +void tpm_ppi_reset(TPMPPI *tpmppi)
> >>>>>>>>>> +{
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>> + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> >>>>>>>>> nvdimm seems to use cpu_physical_memory_read() to access guest
> >>>>>>>>> accessible memory, so question is what's difference?
> >>>>>>>>
> >>>>>>>> cpu_physical_memory_read() is higher level, doing dispatch on address
> >>>>>>>> and length checks.
> >>>>>>>>
> >>>>>>>> This is a bit unnecessary, as ppi->buf could be accessed directly.
> >>>>>>> [...]
> >>>>>>>>>> + memset(block->host_addr, 0,
> >>>>>>>>>> + block->target_end - block->target_start);
> >>>>>>>>>> + }
> >>>>>>> my concern here is that if we directly touch guest memory here
> >>>>>>> we might get in trouble on migration without dirtying modified
> >>>>>>> ranges
> >>>>>>
> >>>>>> It is a read-only of one byte.
> >>>>>> by the time the reset handler is called, the memory must have been
> >>>>>> already migrated.
> >>>>>
> >>>>> Looks like a write to me?
> >>>>
> >>>> the PPI RAM memory is read for the "memory clear" byte
> >>>> The whole guest RAM is reset to 0 if set.
> >>>
> >>> Oh, I see; hmm.
> >>> How do you avoid zeroing things like persistent memory? Or ROMs? Or EFI
> >>> pflash?
> >>
> >> guest_phys_blocks_append() only cares about RAM (see
> >> guest_phys_blocks_region_add)
> >
> > Hmm, promising; it uses:
> > if (!memory_region_is_ram(section->mr) ||
> > memory_region_is_ram_device(section->mr)) {
> > return;
> > }
> >
> > so ram_device is used by vfio and vhost-user; I don't see anything else.
> > pflash init's as a rom_device so that's probably OK.
> > But things like backends/hostmem-file.c just use
> > memory_region_init_ram_from_file even if they're shared or PMEM.
> > So, I think this would wipe an attached PMEM device - do you want to or
> > not?
>
> I think that question could be put, "does the reset attack mitigation
> spec recommend / require clearing persistent memory"? I've got no idea.
> The reset attack is that the platform is re-set (forcibly, uncleanly)
> while the original OS holds some secrets in memory, then the attacker's
> OS (or firmware application) is loaded, and those scavenge the
> leftovers. Would the original OS keep secrets in PMEM? I don't know.
No, I don't know either; and I think part of the answer might depend
what PMEM is being used for; if it's being used as actual storage with
a filesystem or database on, you probably don't want to clear it - I
mean that's what the (P)ersistence is for.
> I guess all address space that a guest OS could consider as "read-write
> memory" should be wiped. I think guest_phys_blocks_append() is a good
> choice here; originally I wrote that for supporting guest RAM dump; see
> commit c5d7f60f06142. Note that the is_ram_device bit was added
> separately, see commits e4dc3f5909ab9 and 21e00fa55f3fd -- but that's a
> change in the "right" direction here, because it *restricts* what
> regions qualify (for dumping, and here for clearing).
Yem, I'm wondering if we should add a check for the pmem flag at the
same place.
Having said that; I don't think that's a question for this patch series;
if we agree that guest_phys_blocks* is the right thing to use then it's
a separate question about adding the pmem check to there.
(I didn't know about guest_phys_block* and would have probably just used
qemu_ram_foreach_block )
Dave
> >
> >>>
> >>>>> Also, don't forget that a guest reset can happen during a migration.
> >>>>
> >>>> Hmm, does cpu_physical_memory_read() guarantee the memory has been migrated?
> >>>> Is there a way to wait for migration to be completed in a reset handler?
> >>>
> >>> No; remember that migration can take a significant amount of time (many
> >>> minutes) as you stuff many GB of RAM down a network.
> >>>
> >>> So you can be in the situation where:
> >>> a) Migration starts
> >>> b) Migration sends a copy of most of RAM across
> >>> c) Guest dirties lots of RAM in parallel with b
> >>> d) migration sends some of the RAM again
> >>> e) guest reboots
> >>> f) migration keeps sending ram across
> >>> g) Migration finally completes and starts on destination
> >>>
> >>> a-f are all happening on the source side as the guest is still running
> >>> and doing whatever it wants (including reboots).
> >>>
> >>> Given something like acpi-build.c's acpi_ram_update's call to
> >>> memory_region_set_dirty, would that work for you?
> >>
> >> after the memset(), it should then call:
> >>
> >> memory_region_set_dirty(block->mr, 0, block->target_end - block->target_start);
> >>
> >> looks about right?
> >
> > I think so.
>
> I'll admit I can't follow the dirtying discussion. But, I'm reminded of
> another commit of Dave's, namely 90c647db8d59 ("Fix pflash migration",
> 2016-04-15). Would the same trick apply here?
>
> (
>
> BTW, I'm sorry about not having following this series closely -- I feel
> bad that we can't solve this within the firmware, but we really can't.
> The issue is that this memory clearing would have to occur really early
> into the firmware, at the latest in the PEI phase.
>
> However, both the 32-bit and the 64-bit variants of OVMF's PEI phase
> have access only to the lowest 4GB of the memory address space. Mapping
> all RAM (even with a "sliding bank") for clearing it would be a real
> mess. To that, add the fact that OVMF's PEI phase executes from RAM (not
> from pflash -- in OVMF, only SEC executes from pflash, and it
> decompresses the PEI + DXE firmware volumes to RAM), so PEI would have
> to clear all RAM *except* the areas its own self occupies, such as code,
> global variables (static data), heap, stack, other processor data
> structures (IDT, GDT, page tables, ...). And, no gaps inside those
> should be left out either, because the previous OS might have left
> secrets there...
>
> This is actually easier for firmware that runs on physical hardware; for
> two reasons. First, on physical hardware, the PEI phase of the firmware
> runs from pflash (it might use CAR, Cache-As-RAM, for a small temporary
> r/w storage), so it doesn't have to worry about scribbling over itself.
> Second, on physical hardware, the memory controller too has to be booted
> up -- the PEI code that does this is all vendors' most closely guarded
> secret, and *never* open source --; and when the firmware massages
> chipset registers for that, it can use non-architected means to get the
> RAM to clear itself.
>
> In comparison, in QEMU/KVM guests, the RAM just *is*. While that's a
> huge relief most of the time, in this case, the fact that the RAM can
> start out as nonzero, is a big problem. Hence my plea to implement the
> feature in QEMU.
>
> )
>
> Thanks,
> Laszlo
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Hi On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > (I didn't know about guest_phys_block* and would have probably just used > qemu_ram_foreach_block ) > guest_phys_block*() seems to fit, as it lists only the blocks actually used, and already skip the device RAM. Laszlo, you wrote the functions (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), do you think it's appropriate to list the memory to clear, or we should rather use qemu_ram_foreach_block() ? thanks -- Marc-André Lureau
+Alex, due to mention of 21e00fa55f3fd On 09/10/18 15:03, Marc-André Lureau wrote: > Hi > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: >> (I didn't know about guest_phys_block* and would have probably just used >> qemu_ram_foreach_block ) >> > > guest_phys_block*() seems to fit, as it lists only the blocks actually > used, and already skip the device RAM. > > Laszlo, you wrote the functions > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > do you think it's appropriate to list the memory to clear, or we > should rather use qemu_ram_foreach_block() ? Originally, I would have said, "use either, doesn't matter". Namely, when I introduced the guest_phys_block*() functions, the original purpose was not related to RAM *contents*, but to RAM *addresses* (GPAs). This is evident if you look at the direct child commit of c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. And, for your use case (= wiping RAM), GPAs don't matter, only contents matter. However, with the commits I mentioned previously, namely e4dc3f5909ab9 and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping based on contents / backing as well. I think? So I believe we should honor that for the wiping to. I guess I'd (vaguely) suggest using guest_phys_block*(). (And then, as Dave suggests, maybe extend the filter to consider pmem too, separately.) Laszlo
Hi On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > +Alex, due to mention of 21e00fa55f3fd > > On 09/10/18 15:03, Marc-André Lureau wrote: > > Hi > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > <dgilbert@redhat.com> wrote: > >> (I didn't know about guest_phys_block* and would have probably just used > >> qemu_ram_foreach_block ) > >> > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > used, and already skip the device RAM. > > > > Laszlo, you wrote the functions > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > do you think it's appropriate to list the memory to clear, or we > > should rather use qemu_ram_foreach_block() ? > > Originally, I would have said, "use either, doesn't matter". Namely, > when I introduced the guest_phys_block*() functions, the original > purpose was not related to RAM *contents*, but to RAM *addresses* > (GPAs). This is evident if you look at the direct child commit of > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > And, for your use case (= wiping RAM), GPAs don't matter, only contents > matter. > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > based on contents / backing as well. I think? So I believe we should > honor that for the wiping to. I guess I'd (vaguely) suggest using > guest_phys_block*(). > > (And then, as Dave suggests, maybe extend the filter to consider pmem > too, separately.) I looked a bit into skipping pmem memory. The issue is that RamBlock and MemoryRegion have no idea they are actually used for nvram (you could rely on hostmem.pmem flag, but this is optional), and I don't see a clear way to figure this out. I can imagine to retrieve the MemoryRegion from guest phys address, then check the owner is TYPE_NVDIMM for example. Is this a good solution? There is memory_region_from_host(), is there a memory_region_from_guest() ? thanks -- Marc-André Lureau
* Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> Hi
>
> On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote:
> >
> > +Alex, due to mention of 21e00fa55f3fd
> >
> > On 09/10/18 15:03, Marc-André Lureau wrote:
> > > Hi
> > >
> > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert
> > > <dgilbert@redhat.com> wrote:
> > >> (I didn't know about guest_phys_block* and would have probably just used
> > >> qemu_ram_foreach_block )
> > >>
> > >
> > > guest_phys_block*() seems to fit, as it lists only the blocks actually
> > > used, and already skip the device RAM.
> > >
> > > Laszlo, you wrote the functions
> > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0),
> > > do you think it's appropriate to list the memory to clear, or we
> > > should rather use qemu_ram_foreach_block() ?
> >
> > Originally, I would have said, "use either, doesn't matter". Namely,
> > when I introduced the guest_phys_block*() functions, the original
> > purpose was not related to RAM *contents*, but to RAM *addresses*
> > (GPAs). This is evident if you look at the direct child commit of
> > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use.
> > And, for your use case (= wiping RAM), GPAs don't matter, only contents
> > matter.
> >
> > However, with the commits I mentioned previously, namely e4dc3f5909ab9
> > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping
> > based on contents / backing as well. I think? So I believe we should
> > honor that for the wiping to. I guess I'd (vaguely) suggest using
> > guest_phys_block*().
> >
> > (And then, as Dave suggests, maybe extend the filter to consider pmem
> > too, separately.)
>
> I looked a bit into skipping pmem memory. The issue is that RamBlock
> and MemoryRegion have no idea they are actually used for nvram (you
> could rely on hostmem.pmem flag, but this is optional), and I don't
> see a clear way to figure this out.
I think the pmem flag is what we should use; the problem though is we
have two different pieces of semantics:
a) PMEM - needs special flush instruction/calls
b) PMEM - my data is persistent, please don't clear me
Do those always go together?
(Copying in Junyan He who added the RAM_PMEM flag)
> I can imagine to retrieve the MemoryRegion from guest phys address,
> then check the owner is TYPE_NVDIMM for example. Is this a good
> solution?
No, I think it's upto whatever creates the region to set a flag
somewhere properly - there's no telling whether it'll always be NVDIMM
or some other object.
Dave
> There is memory_region_from_host(), is there a memory_region_from_guest() ?
>
> thanks
>
>
> --
> Marc-André Lureau
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Hi On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > Hi > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > Hi > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > <dgilbert@redhat.com> wrote: > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > >> qemu_ram_foreach_block ) > > > >> > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > used, and already skip the device RAM. > > > > > > > > Laszlo, you wrote the functions > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > do you think it's appropriate to list the memory to clear, or we > > > > should rather use qemu_ram_foreach_block() ? > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > when I introduced the guest_phys_block*() functions, the original > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > (GPAs). This is evident if you look at the direct child commit of > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > matter. > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > based on contents / backing as well. I think? So I believe we should > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > guest_phys_block*(). > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > too, separately.) > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > and MemoryRegion have no idea they are actually used for nvram (you > > could rely on hostmem.pmem flag, but this is optional), and I don't > > see a clear way to figure this out. > > I think the pmem flag is what we should use; the problem though is we That would be much simpler. But What if you setup a nvdimm backend by a non-pmem memory? It will always be cleared? What about platforms that do not support libpmem? > have two different pieces of semantics: > a) PMEM - needs special flush instruction/calls > b) PMEM - my data is persistent, please don't clear me > > Do those always go together? > > (Copying in Junyan He who added the RAM_PMEM flag) > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > then check the owner is TYPE_NVDIMM for example. Is this a good > > solution? > > No, I think it's upto whatever creates the region to set a flag > somewhere properly - there's no telling whether it'll always be NVDIMM > or some other object. We could make the owner object set a flag on the MemoryRegion, or implement a common NV interface. > > Dave > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > thanks > > > > > > -- > > Marc-André Lureau > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK -- Marc-André Lureau
Add zhangyi -----Original Message----- From: Marc-André Lureau [mailto:marcandre.lureau@gmail.com] Sent: Wednesday, September 19, 2018 6:30 PM To: Dr. David Alan Gilbert <dgilbert@redhat.com> Cc: He, Junyan <junyan.he@intel.com>; Laszlo Ersek <lersek@redhat.com>; Eduardo Habkost <ehabkost@redhat.com>; Michael S. Tsirkin <mst@redhat.com>; Stefan Berger <stefanb@linux.vnet.ibm.com>; QEMU <qemu-devel@nongnu.org>; Paolo Bonzini <pbonzini@redhat.com>; Igor Mammedov <imammedo@redhat.com>; Richard Henderson <rth@twiddle.net>; alex.williamson@redhat.com Subject: Re: [Qemu-devel] [PATCH v10 6/6] tpm: add ACPI memory clear interface Hi On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > Hi > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > Hi > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > <dgilbert@redhat.com> wrote: > > > >> (I didn't know about guest_phys_block* and would have probably > > > >> just used qemu_ram_foreach_block ) > > > >> > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks > > > > actually used, and already skip the device RAM. > > > > > > > > Laszlo, you wrote the functions > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd92 > > > > 5071e25220ce5958f75d0), do you think it's appropriate to list > > > > the memory to clear, or we should rather use > > > > qemu_ram_foreach_block() ? > > > > > > Originally, I would have said, "use either, doesn't matter". > > > Namely, when I introduced the guest_phys_block*() functions, the > > > original purpose was not related to RAM *contents*, but to RAM > > > *addresses* (GPAs). This is evident if you look at the direct > > > child commit of c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > And, for your use case (= wiping RAM), GPAs don't matter, only > > > contents matter. > > > > > > However, with the commits I mentioned previously, namely > > > e4dc3f5909ab9 and 21e00fa55f3fd, we now filter out some RAM blocks > > > from the dumping based on contents / backing as well. I think? So > > > I believe we should honor that for the wiping to. I guess I'd > > > (vaguely) suggest using guest_phys_block*(). > > > > > > (And then, as Dave suggests, maybe extend the filter to consider > > > pmem too, separately.) > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > and MemoryRegion have no idea they are actually used for nvram (you > > could rely on hostmem.pmem flag, but this is optional), and I don't > > see a clear way to figure this out. > > I think the pmem flag is what we should use; the problem though is we That would be much simpler. But What if you setup a nvdimm backend by a non-pmem memory? It will always be cleared? What about platforms that do not support libpmem? > have two different pieces of semantics: > a) PMEM - needs special flush instruction/calls > b) PMEM - my data is persistent, please don't clear me > > Do those always go together? > > (Copying in Junyan He who added the RAM_PMEM flag) > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > then check the owner is TYPE_NVDIMM for example. Is this a good > > solution? > > No, I think it's upto whatever creates the region to set a flag > somewhere properly - there's no telling whether it'll always be NVDIMM > or some other object. We could make the owner object set a flag on the MemoryRegion, or implement a common NV interface. > > Dave > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > thanks > > > > > > -- > > Marc-André Lureau > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK -- Marc-André Lureau
* Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > Hi > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > Hi > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > > Hi > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > > <dgilbert@redhat.com> wrote: > > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > > >> qemu_ram_foreach_block ) > > > > >> > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > > used, and already skip the device RAM. > > > > > > > > > > Laszlo, you wrote the functions > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > > do you think it's appropriate to list the memory to clear, or we > > > > > should rather use qemu_ram_foreach_block() ? > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > > when I introduced the guest_phys_block*() functions, the original > > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > > (GPAs). This is evident if you look at the direct child commit of > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > > matter. > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > > based on contents / backing as well. I think? So I believe we should > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > > guest_phys_block*(). > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > > too, separately.) > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > > and MemoryRegion have no idea they are actually used for nvram (you > > > could rely on hostmem.pmem flag, but this is optional), and I don't > > > see a clear way to figure this out. > > > > I think the pmem flag is what we should use; the problem though is we > > That would be much simpler. But What if you setup a nvdimm backend by > a non-pmem memory? It will always be cleared? What about platforms > that do not support libpmem? Right, that's basically the problem I say below, the difference between (a) and (b). > > have two different pieces of semantics: > > a) PMEM - needs special flush instruction/calls > > b) PMEM - my data is persistent, please don't clear me > > > > Do those always go together? > > > > (Copying in Junyan He who added the RAM_PMEM flag) > > > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > > then check the owner is TYPE_NVDIMM for example. Is this a good > > > solution? > > > > No, I think it's upto whatever creates the region to set a flag > > somewhere properly - there's no telling whether it'll always be NVDIMM > > or some other object. > > We could make the owner object set a flag on the MemoryRegion, or > implement a common NV interface. I think preferably on the RAMBlock, but yes; the question then is whether we need to split the PMEM flag into two for (a) and (b) above and whether they need to be separately set. Dave > > > > Dave > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > > > thanks > > > > > > > > > -- > > > Marc-André Lureau > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > -- > Marc-André Lureau -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, 19 Sep 2018 11:58:22 +0100 "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > Hi > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert > > <dgilbert@redhat.com> wrote: > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > Hi > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > > > Hi > > > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > > > <dgilbert@redhat.com> wrote: > > > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > > > >> qemu_ram_foreach_block ) > > > > > >> > > > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > > > used, and already skip the device RAM. > > > > > > > > > > > > Laszlo, you wrote the functions > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > > > do you think it's appropriate to list the memory to clear, or we > > > > > > should rather use qemu_ram_foreach_block() ? > > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > > > when I introduced the guest_phys_block*() functions, the original > > > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > > > (GPAs). This is evident if you look at the direct child commit of > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > > > matter. > > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > > > based on contents / backing as well. I think? So I believe we should > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > > > guest_phys_block*(). > > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > > > too, separately.) > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > > > and MemoryRegion have no idea they are actually used for nvram (you > > > > could rely on hostmem.pmem flag, but this is optional), and I don't > > > > see a clear way to figure this out. > > > > > > I think the pmem flag is what we should use; the problem though is we > > > > That would be much simpler. But What if you setup a nvdimm backend by > > a non-pmem memory? It will always be cleared? What about platforms > > that do not support libpmem? > > Right, that's basically the problem I say below, the difference between > (a) and (b). > > > > have two different pieces of semantics: > > > a) PMEM - needs special flush instruction/calls > > > b) PMEM - my data is persistent, please don't clear me > > > > > > Do those always go together? > > > > > > (Copying in Junyan He who added the RAM_PMEM flag) > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > > > then check the owner is TYPE_NVDIMM for example. Is this a good > > > > solution? > > > > > > No, I think it's upto whatever creates the region to set a flag > > > somewhere properly - there's no telling whether it'll always be NVDIMM > > > or some other object. > > > > We could make the owner object set a flag on the MemoryRegion, or > > implement a common NV interface. > > I think preferably on the RAMBlock, but yes; the question then is > whether we need to split the PMEM flag into two for (a) and (b) above > and whether they need to be separately set. well, I get current pmem flag semantics as backend supports persistent memory whether frontend will use it or not is different story. Perhaps we need a separate flag which say that pmem is in use, but what about usecase where nvdimm is used as RAM and not storage we probably would like to wipe it out as normal RAM. Maybe we should add frontend property to allow selecting policy per device, which then could be propagated to MemoryRegion/RAMBlock? > Dave > > > > > > > Dave > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > > > > > thanks > > > > > > > > > > > > -- > > > > Marc-André Lureau > > > -- > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > > > > -- > > Marc-André Lureau > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
* Igor Mammedov (imammedo@redhat.com) wrote: > On Wed, 19 Sep 2018 11:58:22 +0100 > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > Hi > > > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert > > > <dgilbert@redhat.com> wrote: > > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > > Hi > > > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > > > > Hi > > > > > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > > > > <dgilbert@redhat.com> wrote: > > > > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > > > > >> qemu_ram_foreach_block ) > > > > > > >> > > > > > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > > > > used, and already skip the device RAM. > > > > > > > > > > > > > > Laszlo, you wrote the functions > > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > > > > do you think it's appropriate to list the memory to clear, or we > > > > > > > should rather use qemu_ram_foreach_block() ? > > > > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > > > > when I introduced the guest_phys_block*() functions, the original > > > > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > > > > (GPAs). This is evident if you look at the direct child commit of > > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > > > > matter. > > > > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > > > > based on contents / backing as well. I think? So I believe we should > > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > > > > guest_phys_block*(). > > > > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > > > > too, separately.) > > > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > > > > and MemoryRegion have no idea they are actually used for nvram (you > > > > > could rely on hostmem.pmem flag, but this is optional), and I don't > > > > > see a clear way to figure this out. > > > > > > > > I think the pmem flag is what we should use; the problem though is we > > > > > > That would be much simpler. But What if you setup a nvdimm backend by > > > a non-pmem memory? It will always be cleared? What about platforms > > > that do not support libpmem? > > > > Right, that's basically the problem I say below, the difference between > > (a) and (b). > > > > > > have two different pieces of semantics: > > > > a) PMEM - needs special flush instruction/calls > > > > b) PMEM - my data is persistent, please don't clear me > > > > > > > > Do those always go together? > > > > > > > > (Copying in Junyan He who added the RAM_PMEM flag) > > > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > > > > then check the owner is TYPE_NVDIMM for example. Is this a good > > > > > solution? > > > > > > > > No, I think it's upto whatever creates the region to set a flag > > > > somewhere properly - there's no telling whether it'll always be NVDIMM > > > > or some other object. > > > > > > We could make the owner object set a flag on the MemoryRegion, or > > > implement a common NV interface. > > > > I think preferably on the RAMBlock, but yes; the question then is > > whether we need to split the PMEM flag into two for (a) and (b) above > > and whether they need to be separately set. > well, > I get current pmem flag semantics as backend supports persistent memory > whether frontend will use it or not is different story. > > Perhaps we need a separate flag which say that pmem is in use, > but what about usecase where nvdimm is used as RAM and not storage > we probably would like to wipe it out as normal RAM. Right, so we're slowly building up the number of flags/policies we're trying to represent here; it's certainly not the one 'pmem' flag we currently have. > Maybe we should add frontend property to allow selecting policy per device, > which then could be propagated to MemoryRegion/RAMBlock? By 'device' here you mean memory-backend-* objects? If so then yes I'd agree a property on those propagated to the underlying RAMBlock would be ideal. Dave > > Dave > > > > > > > > > > Dave > > > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > > > > > > > thanks > > > > > > > > > > > > > > > -- > > > > > Marc-André Lureau > > > > -- > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > > > > > > > > -- > > > Marc-André Lureau > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, 19 Sep 2018 13:14:05 +0100 "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > * Igor Mammedov (imammedo@redhat.com) wrote: > > On Wed, 19 Sep 2018 11:58:22 +0100 > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > Hi > > > > > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert > > > > <dgilbert@redhat.com> wrote: > > > > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > > > Hi > > > > > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > > > > > Hi > > > > > > > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > > > > > <dgilbert@redhat.com> wrote: > > > > > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > > > > > >> qemu_ram_foreach_block ) > > > > > > > >> > > > > > > > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > > > > > used, and already skip the device RAM. > > > > > > > > > > > > > > > > Laszlo, you wrote the functions > > > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > > > > > do you think it's appropriate to list the memory to clear, or we > > > > > > > > should rather use qemu_ram_foreach_block() ? > > > > > > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > > > > > when I introduced the guest_phys_block*() functions, the original > > > > > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > > > > > (GPAs). This is evident if you look at the direct child commit of > > > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > > > > > matter. > > > > > > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > > > > > based on contents / backing as well. I think? So I believe we should > > > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > > > > > guest_phys_block*(). > > > > > > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > > > > > too, separately.) > > > > > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > > > > > and MemoryRegion have no idea they are actually used for nvram (you > > > > > > could rely on hostmem.pmem flag, but this is optional), and I don't > > > > > > see a clear way to figure this out. > > > > > > > > > > I think the pmem flag is what we should use; the problem though is we > > > > > > > > That would be much simpler. But What if you setup a nvdimm backend by > > > > a non-pmem memory? It will always be cleared? What about platforms > > > > that do not support libpmem? > > > > > > Right, that's basically the problem I say below, the difference between > > > (a) and (b). > > > > > > > > have two different pieces of semantics: > > > > > a) PMEM - needs special flush instruction/calls > > > > > b) PMEM - my data is persistent, please don't clear me > > > > > > > > > > Do those always go together? > > > > > > > > > > (Copying in Junyan He who added the RAM_PMEM flag) > > > > > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > > > > > then check the owner is TYPE_NVDIMM for example. Is this a good > > > > > > solution? > > > > > > > > > > No, I think it's upto whatever creates the region to set a flag > > > > > somewhere properly - there's no telling whether it'll always be NVDIMM > > > > > or some other object. > > > > > > > > We could make the owner object set a flag on the MemoryRegion, or > > > > implement a common NV interface. > > > > > > I think preferably on the RAMBlock, but yes; the question then is > > > whether we need to split the PMEM flag into two for (a) and (b) above > > > and whether they need to be separately set. > > well, > > I get current pmem flag semantics as backend supports persistent memory > > whether frontend will use it or not is different story. > > > > Perhaps we need a separate flag which say that pmem is in use, > > but what about usecase where nvdimm is used as RAM and not storage > > we probably would like to wipe it out as normal RAM. > > Right, so we're slowly building up the number of flags/policies we're > trying to represent here; it's certainly not the one 'pmem' flag we > currently have. > > > Maybe we should add frontend property to allow selecting policy per device, > > which then could be propagated to MemoryRegion/RAMBlock? > > By 'device' here you mean memory-backend-* objects? If so then yes I'd > agree a property on those propagated to the underlying RAMBlock would > be ideal. nope, 'device' is frontend i.e. pc-dimm, nvdimm, whatever comes next ... it can mark a backend/MemoryRegion as 'clear on request' when it's realized(), (which has nothing to do with pmem or backends). I wonder if NVDIMM spec has a means to to express it, or TCG spec (they should have some thoughts on how to deal with coming problem as it affects bare-metal as well) > Dave > > > > Dave > > > > > > > > > > > > > Dave > > > > > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > > > > > > > > > thanks > > > > > > > > > > > > > > > > > > -- > > > > > > Marc-André Lureau > > > > > -- > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > > > > > > > > > > > > -- > > > > Marc-André Lureau > > > -- > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >
* Igor Mammedov (imammedo@redhat.com) wrote: > On Wed, 19 Sep 2018 13:14:05 +0100 > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > * Igor Mammedov (imammedo@redhat.com) wrote: > > > On Wed, 19 Sep 2018 11:58:22 +0100 > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > > Hi > > > > > > > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert > > > > > <dgilbert@redhat.com> wrote: > > > > > > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > > > > Hi > > > > > > > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > > > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > > > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > > > > > > Hi > > > > > > > > > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > > > > > > <dgilbert@redhat.com> wrote: > > > > > > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > > > > > > >> qemu_ram_foreach_block ) > > > > > > > > >> > > > > > > > > > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > > > > > > used, and already skip the device RAM. > > > > > > > > > > > > > > > > > > Laszlo, you wrote the functions > > > > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > > > > > > do you think it's appropriate to list the memory to clear, or we > > > > > > > > > should rather use qemu_ram_foreach_block() ? > > > > > > > > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > > > > > > when I introduced the guest_phys_block*() functions, the original > > > > > > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > > > > > > (GPAs). This is evident if you look at the direct child commit of > > > > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > > > > > > matter. > > > > > > > > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > > > > > > based on contents / backing as well. I think? So I believe we should > > > > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > > > > > > guest_phys_block*(). > > > > > > > > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > > > > > > too, separately.) > > > > > > > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > > > > > > and MemoryRegion have no idea they are actually used for nvram (you > > > > > > > could rely on hostmem.pmem flag, but this is optional), and I don't > > > > > > > see a clear way to figure this out. > > > > > > > > > > > > I think the pmem flag is what we should use; the problem though is we > > > > > > > > > > That would be much simpler. But What if you setup a nvdimm backend by > > > > > a non-pmem memory? It will always be cleared? What about platforms > > > > > that do not support libpmem? > > > > > > > > Right, that's basically the problem I say below, the difference between > > > > (a) and (b). > > > > > > > > > > have two different pieces of semantics: > > > > > > a) PMEM - needs special flush instruction/calls > > > > > > b) PMEM - my data is persistent, please don't clear me > > > > > > > > > > > > Do those always go together? > > > > > > > > > > > > (Copying in Junyan He who added the RAM_PMEM flag) > > > > > > > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > > > > > > then check the owner is TYPE_NVDIMM for example. Is this a good > > > > > > > solution? > > > > > > > > > > > > No, I think it's upto whatever creates the region to set a flag > > > > > > somewhere properly - there's no telling whether it'll always be NVDIMM > > > > > > or some other object. > > > > > > > > > > We could make the owner object set a flag on the MemoryRegion, or > > > > > implement a common NV interface. > > > > > > > > I think preferably on the RAMBlock, but yes; the question then is > > > > whether we need to split the PMEM flag into two for (a) and (b) above > > > > and whether they need to be separately set. > > > well, > > > I get current pmem flag semantics as backend supports persistent memory > > > whether frontend will use it or not is different story. > > > > > > Perhaps we need a separate flag which say that pmem is in use, > > > but what about usecase where nvdimm is used as RAM and not storage > > > we probably would like to wipe it out as normal RAM. > > > > Right, so we're slowly building up the number of flags/policies we're > > trying to represent here; it's certainly not the one 'pmem' flag we > > currently have. > > > > > Maybe we should add frontend property to allow selecting policy per device, > > > which then could be propagated to MemoryRegion/RAMBlock? > > > > By 'device' here you mean memory-backend-* objects? If so then yes I'd > > agree a property on those propagated to the underlying RAMBlock would > > be ideal. > nope, 'device' is frontend i.e. pc-dimm, nvdimm, whatever comes next ... > it can mark a backend/MemoryRegion as 'clear on request' when it's realized(), > (which has nothing to do with pmem or backends). Why do you want to do it on the dimm; I thought the behaviour was more associated with the backing device. I worry about that because there's lots of places you can wire a backing device up, for example I tend to do -object memory-backend-*.... -numa node, rather than using a dimm device, you'd have to go and add the option to all of those DIMMs etc. > I wonder if NVDIMM spec has a means to to express it, > or TCG spec (they should have some thoughts on how to deal with > coming problem as it affects bare-metal as well) Dave > > > Dave > > > > > > Dave > > > > > > > > > > > > > > > > Dave > > > > > > > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > > > > > > > > > > > thanks > > > > > > > > > > > > > > > > > > > > > -- > > > > > > > Marc-André Lureau > > > > > > -- > > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > > > > > > > > > > > > > > > > -- > > > > > Marc-André Lureau > > > > -- > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, Sep 19, 2018 at 08:15:25PM +0100, Dr. David Alan Gilbert wrote: > * Igor Mammedov (imammedo@redhat.com) wrote: > > On Wed, 19 Sep 2018 13:14:05 +0100 > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > * Igor Mammedov (imammedo@redhat.com) wrote: > > > > On Wed, 19 Sep 2018 11:58:22 +0100 > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > > > Hi > > > > > > > > > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert > > > > > > <dgilbert@redhat.com> wrote: > > > > > > > > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > > > > > Hi > > > > > > > > > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > > > > > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > > > > > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > > > > > > > Hi > > > > > > > > > > > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > > > > > > > <dgilbert@redhat.com> wrote: > > > > > > > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > > > > > > > >> qemu_ram_foreach_block ) > > > > > > > > > >> > > > > > > > > > > > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > > > > > > > used, and already skip the device RAM. > > > > > > > > > > > > > > > > > > > > Laszlo, you wrote the functions > > > > > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > > > > > > > do you think it's appropriate to list the memory to clear, or we > > > > > > > > > > should rather use qemu_ram_foreach_block() ? > > > > > > > > > > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > > > > > > > when I introduced the guest_phys_block*() functions, the original > > > > > > > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > > > > > > > (GPAs). This is evident if you look at the direct child commit of > > > > > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > > > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > > > > > > > matter. > > > > > > > > > > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > > > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > > > > > > > based on contents / backing as well. I think? So I believe we should > > > > > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > > > > > > > guest_phys_block*(). > > > > > > > > > > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > > > > > > > too, separately.) > > > > > > > > > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > > > > > > > and MemoryRegion have no idea they are actually used for nvram (you > > > > > > > > could rely on hostmem.pmem flag, but this is optional), and I don't > > > > > > > > see a clear way to figure this out. > > > > > > > > > > > > > > I think the pmem flag is what we should use; the problem though is we > > > > > > > > > > > > That would be much simpler. But What if you setup a nvdimm backend by > > > > > > a non-pmem memory? It will always be cleared? What about platforms > > > > > > that do not support libpmem? > > > > > > > > > > Right, that's basically the problem I say below, the difference between > > > > > (a) and (b). > > > > > > > > > > > > have two different pieces of semantics: > > > > > > > a) PMEM - needs special flush instruction/calls > > > > > > > b) PMEM - my data is persistent, please don't clear me > > > > > > > > > > > > > > Do those always go together? > > > > > > > > > > > > > > (Copying in Junyan He who added the RAM_PMEM flag) > > > > > > > > > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > > > > > > > then check the owner is TYPE_NVDIMM for example. Is this a good > > > > > > > > solution? > > > > > > > > > > > > > > No, I think it's upto whatever creates the region to set a flag > > > > > > > somewhere properly - there's no telling whether it'll always be NVDIMM > > > > > > > or some other object. > > > > > > > > > > > > We could make the owner object set a flag on the MemoryRegion, or > > > > > > implement a common NV interface. > > > > > > > > > > I think preferably on the RAMBlock, but yes; the question then is > > > > > whether we need to split the PMEM flag into two for (a) and (b) above > > > > > and whether they need to be separately set. > > > > well, > > > > I get current pmem flag semantics as backend supports persistent memory > > > > whether frontend will use it or not is different story. > > > > > > > > Perhaps we need a separate flag which say that pmem is in use, > > > > but what about usecase where nvdimm is used as RAM and not storage > > > > we probably would like to wipe it out as normal RAM. > > > > > > Right, so we're slowly building up the number of flags/policies we're > > > trying to represent here; it's certainly not the one 'pmem' flag we > > > currently have. > > > > > > > Maybe we should add frontend property to allow selecting policy per device, > > > > which then could be propagated to MemoryRegion/RAMBlock? > > > > > > By 'device' here you mean memory-backend-* objects? If so then yes I'd > > > agree a property on those propagated to the underlying RAMBlock would > > > be ideal. > > nope, 'device' is frontend i.e. pc-dimm, nvdimm, whatever comes next ... > > it can mark a backend/MemoryRegion as 'clear on request' when it's realized(), > > (which has nothing to do with pmem or backends). > > Why do you want to do it on the dimm; I thought the behaviour was more > associated with the backing device. > I worry about that because there's lots of places you can wire a backing > device up, for example I tend to do -object memory-backend-*.... > -numa node, rather than using a dimm device, you'd have to go and add > the option to all of those DIMMs etc. I'm a bit confused by the details here, but it's important that we get the frontend/backend distinction correctly. I'm still reading this thread and trying to understand what exactly is/isn't guest-visible here. Anything that is guest-visible is supposed to be a frontend option (especially if it's a setting that isn't allowed to change during migration) Backend options are never supposed to affect guest ABI. Which is the case here? > > > I wonder if NVDIMM spec has a means to to express it, > > or TCG spec (they should have some thoughts on how to deal with > > coming problem as it affects bare-metal as well) > > Dave > > > > > Dave > > > > > > > > Dave > > > > > > > > > > > > > > > > > > > Dave > > > > > > > > > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > > > > > > > > > > > > > thanks > > > > > > > > > > > > > > > > > > > > > > > > -- > > > > > > > > Marc-André Lureau > > > > > > > -- > > > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > > > > > > > > > > > > > > > > > > > > -- > > > > > > Marc-André Lureau > > > > > -- > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > > > -- > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK -- Eduardo
* Eduardo Habkost (ehabkost@redhat.com) wrote:
> On Wed, Sep 19, 2018 at 08:15:25PM +0100, Dr. David Alan Gilbert wrote:
> > * Igor Mammedov (imammedo@redhat.com) wrote:
> > > On Wed, 19 Sep 2018 13:14:05 +0100
> > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > >
> > > > * Igor Mammedov (imammedo@redhat.com) wrote:
> > > > > On Wed, 19 Sep 2018 11:58:22 +0100
> > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > >
> > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > > > > > Hi
> > > > > > >
> > > > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert
> > > > > > > <dgilbert@redhat.com> wrote:
> > > > > > > >
> > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > > > > > > > Hi
> > > > > > > > >
> > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd
> > > > > > > > > >
> > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote:
> > > > > > > > > > > Hi
> > > > > > > > > > >
> > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert
> > > > > > > > > > > <dgilbert@redhat.com> wrote:
> > > > > > > > > > >> (I didn't know about guest_phys_block* and would have probably just used
> > > > > > > > > > >> qemu_ram_foreach_block )
> > > > > > > > > > >>
> > > > > > > > > > >
> > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually
> > > > > > > > > > > used, and already skip the device RAM.
> > > > > > > > > > >
> > > > > > > > > > > Laszlo, you wrote the functions
> > > > > > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0),
> > > > > > > > > > > do you think it's appropriate to list the memory to clear, or we
> > > > > > > > > > > should rather use qemu_ram_foreach_block() ?
> > > > > > > > > >
> > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely,
> > > > > > > > > > when I introduced the guest_phys_block*() functions, the original
> > > > > > > > > > purpose was not related to RAM *contents*, but to RAM *addresses*
> > > > > > > > > > (GPAs). This is evident if you look at the direct child commit of
> > > > > > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use.
> > > > > > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents
> > > > > > > > > > matter.
> > > > > > > > > >
> > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9
> > > > > > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping
> > > > > > > > > > based on contents / backing as well. I think? So I believe we should
> > > > > > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using
> > > > > > > > > > guest_phys_block*().
> > > > > > > > > >
> > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem
> > > > > > > > > > too, separately.)
> > > > > > > > >
> > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock
> > > > > > > > > and MemoryRegion have no idea they are actually used for nvram (you
> > > > > > > > > could rely on hostmem.pmem flag, but this is optional), and I don't
> > > > > > > > > see a clear way to figure this out.
> > > > > > > >
> > > > > > > > I think the pmem flag is what we should use; the problem though is we
> > > > > > >
> > > > > > > That would be much simpler. But What if you setup a nvdimm backend by
> > > > > > > a non-pmem memory? It will always be cleared? What about platforms
> > > > > > > that do not support libpmem?
> > > > > >
> > > > > > Right, that's basically the problem I say below, the difference between
> > > > > > (a) and (b).
> > > > > >
> > > > > > > > have two different pieces of semantics:
> > > > > > > > a) PMEM - needs special flush instruction/calls
> > > > > > > > b) PMEM - my data is persistent, please don't clear me
> > > > > > > >
> > > > > > > > Do those always go together?
> > > > > > > >
> > > > > > > > (Copying in Junyan He who added the RAM_PMEM flag)
> > > > > > > >
> > > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address,
> > > > > > > > > then check the owner is TYPE_NVDIMM for example. Is this a good
> > > > > > > > > solution?
> > > > > > > >
> > > > > > > > No, I think it's upto whatever creates the region to set a flag
> > > > > > > > somewhere properly - there's no telling whether it'll always be NVDIMM
> > > > > > > > or some other object.
> > > > > > >
> > > > > > > We could make the owner object set a flag on the MemoryRegion, or
> > > > > > > implement a common NV interface.
> > > > > >
> > > > > > I think preferably on the RAMBlock, but yes; the question then is
> > > > > > whether we need to split the PMEM flag into two for (a) and (b) above
> > > > > > and whether they need to be separately set.
> > > > > well,
> > > > > I get current pmem flag semantics as backend supports persistent memory
> > > > > whether frontend will use it or not is different story.
> > > > >
> > > > > Perhaps we need a separate flag which say that pmem is in use,
> > > > > but what about usecase where nvdimm is used as RAM and not storage
> > > > > we probably would like to wipe it out as normal RAM.
> > > >
> > > > Right, so we're slowly building up the number of flags/policies we're
> > > > trying to represent here; it's certainly not the one 'pmem' flag we
> > > > currently have.
> > > >
> > > > > Maybe we should add frontend property to allow selecting policy per device,
> > > > > which then could be propagated to MemoryRegion/RAMBlock?
> > > >
> > > > By 'device' here you mean memory-backend-* objects? If so then yes I'd
> > > > agree a property on those propagated to the underlying RAMBlock would
> > > > be ideal.
> > > nope, 'device' is frontend i.e. pc-dimm, nvdimm, whatever comes next ...
> > > it can mark a backend/MemoryRegion as 'clear on request' when it's realized(),
> > > (which has nothing to do with pmem or backends).
> >
> > Why do you want to do it on the dimm; I thought the behaviour was more
> > associated with the backing device.
> > I worry about that because there's lots of places you can wire a backing
> > device up, for example I tend to do -object memory-backend-*....
> > -numa node, rather than using a dimm device, you'd have to go and add
> > the option to all of those DIMMs etc.
>
> I'm a bit confused by the details here, but it's important that
> we get the frontend/backend distinction correctly.
>
> I'm still reading this thread and trying to understand what
> exactly is/isn't guest-visible here. Anything that is
> guest-visible is supposed to be a frontend option (especially if
> it's a setting that isn't allowed to change during migration)
> Backend options are never supposed to affect guest ABI.
That's a fair argument.
> Which is the case here?
From what I can see the current RAM_PMEM/is_pmem flags are purely used by
QEMU to decide if they should call pmem_* library calls to persist
things - so that's a host visible feature only.
docs/nvdimm.txt seems to cover a lot of the detail here.
We've got quite a lot of places in the code that do:
if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
and that's what seems to drive the ACPI building - see hw/acpi/nvdimm.c
so I guess that's most of the guess visible stuff.
and for DIMMs we have a MEMORY_DEVICE_INFO_KIND_NVDIMM flag.
Then we also have a flag for the CPU/machine type stating persistence
mode (either cpu, or mem-ctrl I think which set some ACPI state to the
magic values 2 or 3 - see pc_machine_set_nvdimm_persistence). But that's
guest visible state and global, not about specific ram chunks.
So I think you're right, that RAM_PMEM Is the wrong thing to use here -
it shouldn't change the guest visible behaviour of whether the RAM is
cleared; at the moment the only thing seems to be the TYPE_NVDIMM
but that's not passed down to the memory regions.
So should it just be that the nvdimm frontend sets the flag saying that
a memory region/ramblock is exposed to the guest as nvdimm? And that
should make the difference here?
(cc'ing in Pankaj because of virtio-pmem).
Dave
>
> >
> > > I wonder if NVDIMM spec has a means to to express it,
> > > or TCG spec (they should have some thoughts on how to deal with
> > > coming problem as it affects bare-metal as well)
> >
> > Dave
> > >
> > > > Dave
> > > >
> > > > > > Dave
> > > > > >
> > > > > > > >
> > > > > > > > Dave
> > > > > > > >
> > > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ?
> > > > > > > > >
> > > > > > > > > thanks
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > Marc-André Lureau
> > > > > > > > --
> > > > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > --
> > > > > > > Marc-André Lureau
> > > > > > --
> > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > >
> > > > --
> > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > >
> > >
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> --
> Eduardo
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Thu, 20 Sep 2018 10:20:49 +0100
"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> * Eduardo Habkost (ehabkost@redhat.com) wrote:
> > On Wed, Sep 19, 2018 at 08:15:25PM +0100, Dr. David Alan Gilbert wrote:
> > > * Igor Mammedov (imammedo@redhat.com) wrote:
> > > > On Wed, 19 Sep 2018 13:14:05 +0100
> > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > >
> > > > > * Igor Mammedov (imammedo@redhat.com) wrote:
> > > > > > On Wed, 19 Sep 2018 11:58:22 +0100
> > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > >
> > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > > > > > > Hi
> > > > > > > >
> > > > > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert
> > > > > > > > <dgilbert@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > > > > > > > > Hi
> > > > > > > > > >
> > > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd
> > > > > > > > > > >
> > > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote:
> > > > > > > > > > > > Hi
> > > > > > > > > > > >
> > > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert
> > > > > > > > > > > > <dgilbert@redhat.com> wrote:
> > > > > > > > > > > >> (I didn't know about guest_phys_block* and would have probably just used
> > > > > > > > > > > >> qemu_ram_foreach_block )
> > > > > > > > > > > >>
> > > > > > > > > > > >
> > > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually
> > > > > > > > > > > > used, and already skip the device RAM.
> > > > > > > > > > > >
> > > > > > > > > > > > Laszlo, you wrote the functions
> > > > > > > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0),
> > > > > > > > > > > > do you think it's appropriate to list the memory to clear, or we
> > > > > > > > > > > > should rather use qemu_ram_foreach_block() ?
> > > > > > > > > > >
> > > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely,
> > > > > > > > > > > when I introduced the guest_phys_block*() functions, the original
> > > > > > > > > > > purpose was not related to RAM *contents*, but to RAM *addresses*
> > > > > > > > > > > (GPAs). This is evident if you look at the direct child commit of
> > > > > > > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use.
> > > > > > > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents
> > > > > > > > > > > matter.
> > > > > > > > > > >
> > > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9
> > > > > > > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping
> > > > > > > > > > > based on contents / backing as well. I think? So I believe we should
> > > > > > > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using
> > > > > > > > > > > guest_phys_block*().
> > > > > > > > > > >
> > > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem
> > > > > > > > > > > too, separately.)
> > > > > > > > > >
> > > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock
> > > > > > > > > > and MemoryRegion have no idea they are actually used for nvram (you
> > > > > > > > > > could rely on hostmem.pmem flag, but this is optional), and I don't
> > > > > > > > > > see a clear way to figure this out.
> > > > > > > > >
> > > > > > > > > I think the pmem flag is what we should use; the problem though is we
> > > > > > > >
> > > > > > > > That would be much simpler. But What if you setup a nvdimm backend by
> > > > > > > > a non-pmem memory? It will always be cleared? What about platforms
> > > > > > > > that do not support libpmem?
> > > > > > >
> > > > > > > Right, that's basically the problem I say below, the difference between
> > > > > > > (a) and (b).
> > > > > > >
> > > > > > > > > have two different pieces of semantics:
> > > > > > > > > a) PMEM - needs special flush instruction/calls
> > > > > > > > > b) PMEM - my data is persistent, please don't clear me
> > > > > > > > >
> > > > > > > > > Do those always go together?
> > > > > > > > >
> > > > > > > > > (Copying in Junyan He who added the RAM_PMEM flag)
> > > > > > > > >
> > > > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address,
> > > > > > > > > > then check the owner is TYPE_NVDIMM for example. Is this a good
> > > > > > > > > > solution?
> > > > > > > > >
> > > > > > > > > No, I think it's upto whatever creates the region to set a flag
> > > > > > > > > somewhere properly - there's no telling whether it'll always be NVDIMM
> > > > > > > > > or some other object.
> > > > > > > >
> > > > > > > > We could make the owner object set a flag on the MemoryRegion, or
> > > > > > > > implement a common NV interface.
> > > > > > >
> > > > > > > I think preferably on the RAMBlock, but yes; the question then is
> > > > > > > whether we need to split the PMEM flag into two for (a) and (b) above
> > > > > > > and whether they need to be separately set.
> > > > > > well,
> > > > > > I get current pmem flag semantics as backend supports persistent memory
> > > > > > whether frontend will use it or not is different story.
> > > > > >
> > > > > > Perhaps we need a separate flag which say that pmem is in use,
> > > > > > but what about usecase where nvdimm is used as RAM and not storage
> > > > > > we probably would like to wipe it out as normal RAM.
> > > > >
> > > > > Right, so we're slowly building up the number of flags/policies we're
> > > > > trying to represent here; it's certainly not the one 'pmem' flag we
> > > > > currently have.
> > > > >
> > > > > > Maybe we should add frontend property to allow selecting policy per device,
> > > > > > which then could be propagated to MemoryRegion/RAMBlock?
> > > > >
> > > > > By 'device' here you mean memory-backend-* objects? If so then yes I'd
> > > > > agree a property on those propagated to the underlying RAMBlock would
> > > > > be ideal.
> > > > nope, 'device' is frontend i.e. pc-dimm, nvdimm, whatever comes next ...
> > > > it can mark a backend/MemoryRegion as 'clear on request' when it's realized(),
> > > > (which has nothing to do with pmem or backends).
> > >
> > > Why do you want to do it on the dimm; I thought the behaviour was more
> > > associated with the backing device.
> > > I worry about that because there's lots of places you can wire a backing
> > > device up, for example I tend to do -object memory-backend-*....
> > > -numa node, rather than using a dimm device, you'd have to go and add
> > > the option to all of those DIMMs etc.
That's just RAM guest wise regardless of backend, so it must be wiped out.
> >
> > I'm a bit confused by the details here, but it's important that
> > we get the frontend/backend distinction correctly.
> >
> > I'm still reading this thread and trying to understand what
> > exactly is/isn't guest-visible here. Anything that is
> > guest-visible is supposed to be a frontend option (especially if
> > it's a setting that isn't allowed to change during migration)
> > Backend options are never supposed to affect guest ABI.
>
> That's a fair argument.
>
> > Which is the case here?
>
> From what I can see the current RAM_PMEM/is_pmem flags are purely used by
> QEMU to decide if they should call pmem_* library calls to persist
> things - so that's a host visible feature only.
>
> docs/nvdimm.txt seems to cover a lot of the detail here.
>
> We've got quite a lot of places in the code that do:
> if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
>
> and that's what seems to drive the ACPI building - see hw/acpi/nvdimm.c
> so I guess that's most of the guess visible stuff.
>
> and for DIMMs we have a MEMORY_DEVICE_INFO_KIND_NVDIMM flag.
>
> Then we also have a flag for the CPU/machine type stating persistence
> mode (either cpu, or mem-ctrl I think which set some ACPI state to the
> magic values 2 or 3 - see pc_machine_set_nvdimm_persistence). But that's
> guest visible state and global, not about specific ram chunks.
>
> So I think you're right, that RAM_PMEM Is the wrong thing to use here -
> it shouldn't change the guest visible behaviour of whether the RAM is
> cleared; at the moment the only thing seems to be the TYPE_NVDIMM
> but that's not passed down to the memory regions.
>
> So should it just be that the nvdimm frontend sets the flag saying that
> a memory region/ramblock is exposed to the guest as nvdimm? And that
> should make the difference here?
Well, I'm also starting to loose what we are trying to figure out here.
Should we start anew, and sum it up?
* we have a TPM feature 'clear memory on reboot'
* it's typically a FW that implements it, however it would
be hard to implement in OVMF, so this patch tries to implement
it in QEMU going over RAMBlocks and clearing them thinking it would be
easier, alas it turns out not really true as:
we notice that there is nvdimm and go on tangent discussing
backend's 'pmem' option which is not visible to guests and could be set or not set
or the backend used by other frontends (RAM).
Regardless of where the feature should be implemented
I think we should first figure out (ask TCG spec folks) what to do with nvdimms
in first place or try to get hold on platform that supports both and copy behavior from there.
Once we know what to do with it we can discuss details.
>
>
> (cc'ing in Pankaj because of virtio-pmem).
>
> Dave
> >
> > >
> > > > I wonder if NVDIMM spec has a means to to express it,
> > > > or TCG spec (they should have some thoughts on how to deal with
> > > > coming problem as it affects bare-metal as well)
> > >
> > > Dave
> > > >
> > > > > Dave
> > > > >
> > > > > > > Dave
> > > > > > >
> > > > > > > > >
> > > > > > > > > Dave
> > > > > > > > >
> > > > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ?
> > > > > > > > > >
> > > > > > > > > > thanks
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > --
> > > > > > > > > > Marc-André Lureau
> > > > > > > > > --
> > > > > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > --
> > > > > > > > Marc-André Lureau
> > > > > > > --
> > > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > > >
> > > > > --
> > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > >
> > > >
> > > --
> > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >
> > --
> > Eduardo
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 09/20/18 15:20, Igor Mammedov wrote:
> On Thu, 20 Sep 2018 10:20:49 +0100
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
>
>> * Eduardo Habkost (ehabkost@redhat.com) wrote:
>>> On Wed, Sep 19, 2018 at 08:15:25PM +0100, Dr. David Alan Gilbert wrote:
>>>> * Igor Mammedov (imammedo@redhat.com) wrote:
>>>>> On Wed, 19 Sep 2018 13:14:05 +0100
>>>>> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
>>>>>
>>>>>> * Igor Mammedov (imammedo@redhat.com) wrote:
>>>>>>> On Wed, 19 Sep 2018 11:58:22 +0100
>>>>>>> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
>>>>>>>
>>>>>>>> * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
>>>>>>>>> Hi
>>>>>>>>>
>>>>>>>>> On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert
>>>>>>>>> <dgilbert@redhat.com> wrote:
>>>>>>>>>>
>>>>>>>>>> * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
>>>>>>>>>>> Hi
>>>>>>>>>>>
>>>>>>>>>>> On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> +Alex, due to mention of 21e00fa55f3fd
>>>>>>>>>>>>
>>>>>>>>>>>> On 09/10/18 15:03, Marc-André Lureau wrote:
>>>>>>>>>>>>> Hi
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert
>>>>>>>>>>>>> <dgilbert@redhat.com> wrote:
>>>>>>>>>>>>>> (I didn't know about guest_phys_block* and would have probably just used
>>>>>>>>>>>>>> qemu_ram_foreach_block )
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> guest_phys_block*() seems to fit, as it lists only the blocks actually
>>>>>>>>>>>>> used, and already skip the device RAM.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Laszlo, you wrote the functions
>>>>>>>>>>>>> (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0),
>>>>>>>>>>>>> do you think it's appropriate to list the memory to clear, or we
>>>>>>>>>>>>> should rather use qemu_ram_foreach_block() ?
>>>>>>>>>>>>
>>>>>>>>>>>> Originally, I would have said, "use either, doesn't matter". Namely,
>>>>>>>>>>>> when I introduced the guest_phys_block*() functions, the original
>>>>>>>>>>>> purpose was not related to RAM *contents*, but to RAM *addresses*
>>>>>>>>>>>> (GPAs). This is evident if you look at the direct child commit of
>>>>>>>>>>>> c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use.
>>>>>>>>>>>> And, for your use case (= wiping RAM), GPAs don't matter, only contents
>>>>>>>>>>>> matter.
>>>>>>>>>>>>
>>>>>>>>>>>> However, with the commits I mentioned previously, namely e4dc3f5909ab9
>>>>>>>>>>>> and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping
>>>>>>>>>>>> based on contents / backing as well. I think? So I believe we should
>>>>>>>>>>>> honor that for the wiping to. I guess I'd (vaguely) suggest using
>>>>>>>>>>>> guest_phys_block*().
>>>>>>>>>>>>
>>>>>>>>>>>> (And then, as Dave suggests, maybe extend the filter to consider pmem
>>>>>>>>>>>> too, separately.)
>>>>>>>>>>>
>>>>>>>>>>> I looked a bit into skipping pmem memory. The issue is that RamBlock
>>>>>>>>>>> and MemoryRegion have no idea they are actually used for nvram (you
>>>>>>>>>>> could rely on hostmem.pmem flag, but this is optional), and I don't
>>>>>>>>>>> see a clear way to figure this out.
>>>>>>>>>>
>>>>>>>>>> I think the pmem flag is what we should use; the problem though is we
>>>>>>>>>
>>>>>>>>> That would be much simpler. But What if you setup a nvdimm backend by
>>>>>>>>> a non-pmem memory? It will always be cleared? What about platforms
>>>>>>>>> that do not support libpmem?
>>>>>>>>
>>>>>>>> Right, that's basically the problem I say below, the difference between
>>>>>>>> (a) and (b).
>>>>>>>>
>>>>>>>>>> have two different pieces of semantics:
>>>>>>>>>> a) PMEM - needs special flush instruction/calls
>>>>>>>>>> b) PMEM - my data is persistent, please don't clear me
>>>>>>>>>>
>>>>>>>>>> Do those always go together?
>>>>>>>>>>
>>>>>>>>>> (Copying in Junyan He who added the RAM_PMEM flag)
>>>>>>>>>>
>>>>>>>>>>> I can imagine to retrieve the MemoryRegion from guest phys address,
>>>>>>>>>>> then check the owner is TYPE_NVDIMM for example. Is this a good
>>>>>>>>>>> solution?
>>>>>>>>>>
>>>>>>>>>> No, I think it's upto whatever creates the region to set a flag
>>>>>>>>>> somewhere properly - there's no telling whether it'll always be NVDIMM
>>>>>>>>>> or some other object.
>>>>>>>>>
>>>>>>>>> We could make the owner object set a flag on the MemoryRegion, or
>>>>>>>>> implement a common NV interface.
>>>>>>>>
>>>>>>>> I think preferably on the RAMBlock, but yes; the question then is
>>>>>>>> whether we need to split the PMEM flag into two for (a) and (b) above
>>>>>>>> and whether they need to be separately set.
>>>>>>> well,
>>>>>>> I get current pmem flag semantics as backend supports persistent memory
>>>>>>> whether frontend will use it or not is different story.
>>>>>>>
>>>>>>> Perhaps we need a separate flag which say that pmem is in use,
>>>>>>> but what about usecase where nvdimm is used as RAM and not storage
>>>>>>> we probably would like to wipe it out as normal RAM.
>>>>>>
>>>>>> Right, so we're slowly building up the number of flags/policies we're
>>>>>> trying to represent here; it's certainly not the one 'pmem' flag we
>>>>>> currently have.
>>>>>>
>>>>>>> Maybe we should add frontend property to allow selecting policy per device,
>>>>>>> which then could be propagated to MemoryRegion/RAMBlock?
>>>>>>
>>>>>> By 'device' here you mean memory-backend-* objects? If so then yes I'd
>>>>>> agree a property on those propagated to the underlying RAMBlock would
>>>>>> be ideal.
>>>>> nope, 'device' is frontend i.e. pc-dimm, nvdimm, whatever comes next ...
>>>>> it can mark a backend/MemoryRegion as 'clear on request' when it's realized(),
>>>>> (which has nothing to do with pmem or backends).
>>>>
>>>> Why do you want to do it on the dimm; I thought the behaviour was more
>>>> associated with the backing device.
>>>> I worry about that because there's lots of places you can wire a backing
>>>> device up, for example I tend to do -object memory-backend-*....
>>>> -numa node, rather than using a dimm device, you'd have to go and add
>>>> the option to all of those DIMMs etc.
> That's just RAM guest wise regardless of backend, so it must be wiped out.
>
>>>
>>> I'm a bit confused by the details here, but it's important that
>>> we get the frontend/backend distinction correctly.
>>>
>>> I'm still reading this thread and trying to understand what
>>> exactly is/isn't guest-visible here. Anything that is
>>> guest-visible is supposed to be a frontend option (especially if
>>> it's a setting that isn't allowed to change during migration)
>>> Backend options are never supposed to affect guest ABI.
>>
>> That's a fair argument.
>>
>>> Which is the case here?
>>
>> From what I can see the current RAM_PMEM/is_pmem flags are purely used by
>> QEMU to decide if they should call pmem_* library calls to persist
>> things - so that's a host visible feature only.
>>
>> docs/nvdimm.txt seems to cover a lot of the detail here.
>>
>> We've got quite a lot of places in the code that do:
>> if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
>>
>> and that's what seems to drive the ACPI building - see hw/acpi/nvdimm.c
>> so I guess that's most of the guess visible stuff.
>>
>> and for DIMMs we have a MEMORY_DEVICE_INFO_KIND_NVDIMM flag.
>>
>> Then we also have a flag for the CPU/machine type stating persistence
>> mode (either cpu, or mem-ctrl I think which set some ACPI state to the
>> magic values 2 or 3 - see pc_machine_set_nvdimm_persistence). But that's
>> guest visible state and global, not about specific ram chunks.
>>
>> So I think you're right, that RAM_PMEM Is the wrong thing to use here -
>> it shouldn't change the guest visible behaviour of whether the RAM is
>> cleared; at the moment the only thing seems to be the TYPE_NVDIMM
>> but that's not passed down to the memory regions.
>>
>> So should it just be that the nvdimm frontend sets the flag saying that
>> a memory region/ramblock is exposed to the guest as nvdimm? And that
>> should make the difference here?
>
> Well, I'm also starting to loose what we are trying to figure out here.
> Should we start anew, and sum it up?
>
> * we have a TPM feature 'clear memory on reboot'
> * it's typically a FW that implements it, however it would
> be hard to implement in OVMF, so this patch tries to implement
> it in QEMU going over RAMBlocks and clearing them thinking it would be
> easier, alas it turns out not really true as:
(QEMU now faces a design question. OVMF would face the same design
question *plus* a huge implementation mess. To my eyes, the relative
difference in difficulty persists.)
> we notice that there is nvdimm and go on tangent discussing
> backend's 'pmem' option which is not visible to guests and could be set or not set
> or the backend used by other frontends (RAM).
>
> Regardless of where the feature should be implemented
> I think we should first figure out (ask TCG spec folks) what to do with nvdimms
> in first place or try to get hold on platform that supports both and copy behavior from there.
> Once we know what to do with it we can discuss details.
Agreed 100%.
Thanks
Laszlo
* Igor Mammedov (imammedo@redhat.com) wrote:
> On Thu, 20 Sep 2018 10:20:49 +0100
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
>
> > * Eduardo Habkost (ehabkost@redhat.com) wrote:
> > > On Wed, Sep 19, 2018 at 08:15:25PM +0100, Dr. David Alan Gilbert wrote:
> > > > * Igor Mammedov (imammedo@redhat.com) wrote:
> > > > > On Wed, 19 Sep 2018 13:14:05 +0100
> > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > >
> > > > > > * Igor Mammedov (imammedo@redhat.com) wrote:
> > > > > > > On Wed, 19 Sep 2018 11:58:22 +0100
> > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > > >
> > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > > > > > > > Hi
> > > > > > > > >
> > > > > > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert
> > > > > > > > > <dgilbert@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote:
> > > > > > > > > > > Hi
> > > > > > > > > > >
> > > > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd
> > > > > > > > > > > >
> > > > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote:
> > > > > > > > > > > > > Hi
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert
> > > > > > > > > > > > > <dgilbert@redhat.com> wrote:
> > > > > > > > > > > > >> (I didn't know about guest_phys_block* and would have probably just used
> > > > > > > > > > > > >> qemu_ram_foreach_block )
> > > > > > > > > > > > >>
> > > > > > > > > > > > >
> > > > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually
> > > > > > > > > > > > > used, and already skip the device RAM.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Laszlo, you wrote the functions
> > > > > > > > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0),
> > > > > > > > > > > > > do you think it's appropriate to list the memory to clear, or we
> > > > > > > > > > > > > should rather use qemu_ram_foreach_block() ?
> > > > > > > > > > > >
> > > > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely,
> > > > > > > > > > > > when I introduced the guest_phys_block*() functions, the original
> > > > > > > > > > > > purpose was not related to RAM *contents*, but to RAM *addresses*
> > > > > > > > > > > > (GPAs). This is evident if you look at the direct child commit of
> > > > > > > > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use.
> > > > > > > > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents
> > > > > > > > > > > > matter.
> > > > > > > > > > > >
> > > > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9
> > > > > > > > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping
> > > > > > > > > > > > based on contents / backing as well. I think? So I believe we should
> > > > > > > > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using
> > > > > > > > > > > > guest_phys_block*().
> > > > > > > > > > > >
> > > > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem
> > > > > > > > > > > > too, separately.)
> > > > > > > > > > >
> > > > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock
> > > > > > > > > > > and MemoryRegion have no idea they are actually used for nvram (you
> > > > > > > > > > > could rely on hostmem.pmem flag, but this is optional), and I don't
> > > > > > > > > > > see a clear way to figure this out.
> > > > > > > > > >
> > > > > > > > > > I think the pmem flag is what we should use; the problem though is we
> > > > > > > > >
> > > > > > > > > That would be much simpler. But What if you setup a nvdimm backend by
> > > > > > > > > a non-pmem memory? It will always be cleared? What about platforms
> > > > > > > > > that do not support libpmem?
> > > > > > > >
> > > > > > > > Right, that's basically the problem I say below, the difference between
> > > > > > > > (a) and (b).
> > > > > > > >
> > > > > > > > > > have two different pieces of semantics:
> > > > > > > > > > a) PMEM - needs special flush instruction/calls
> > > > > > > > > > b) PMEM - my data is persistent, please don't clear me
> > > > > > > > > >
> > > > > > > > > > Do those always go together?
> > > > > > > > > >
> > > > > > > > > > (Copying in Junyan He who added the RAM_PMEM flag)
> > > > > > > > > >
> > > > > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address,
> > > > > > > > > > > then check the owner is TYPE_NVDIMM for example. Is this a good
> > > > > > > > > > > solution?
> > > > > > > > > >
> > > > > > > > > > No, I think it's upto whatever creates the region to set a flag
> > > > > > > > > > somewhere properly - there's no telling whether it'll always be NVDIMM
> > > > > > > > > > or some other object.
> > > > > > > > >
> > > > > > > > > We could make the owner object set a flag on the MemoryRegion, or
> > > > > > > > > implement a common NV interface.
> > > > > > > >
> > > > > > > > I think preferably on the RAMBlock, but yes; the question then is
> > > > > > > > whether we need to split the PMEM flag into two for (a) and (b) above
> > > > > > > > and whether they need to be separately set.
> > > > > > > well,
> > > > > > > I get current pmem flag semantics as backend supports persistent memory
> > > > > > > whether frontend will use it or not is different story.
> > > > > > >
> > > > > > > Perhaps we need a separate flag which say that pmem is in use,
> > > > > > > but what about usecase where nvdimm is used as RAM and not storage
> > > > > > > we probably would like to wipe it out as normal RAM.
> > > > > >
> > > > > > Right, so we're slowly building up the number of flags/policies we're
> > > > > > trying to represent here; it's certainly not the one 'pmem' flag we
> > > > > > currently have.
> > > > > >
> > > > > > > Maybe we should add frontend property to allow selecting policy per device,
> > > > > > > which then could be propagated to MemoryRegion/RAMBlock?
> > > > > >
> > > > > > By 'device' here you mean memory-backend-* objects? If so then yes I'd
> > > > > > agree a property on those propagated to the underlying RAMBlock would
> > > > > > be ideal.
> > > > > nope, 'device' is frontend i.e. pc-dimm, nvdimm, whatever comes next ...
> > > > > it can mark a backend/MemoryRegion as 'clear on request' when it's realized(),
> > > > > (which has nothing to do with pmem or backends).
> > > >
> > > > Why do you want to do it on the dimm; I thought the behaviour was more
> > > > associated with the backing device.
> > > > I worry about that because there's lots of places you can wire a backing
> > > > device up, for example I tend to do -object memory-backend-*....
> > > > -numa node, rather than using a dimm device, you'd have to go and add
> > > > the option to all of those DIMMs etc.
> That's just RAM guest wise regardless of backend, so it must be wiped out.
>
> > >
> > > I'm a bit confused by the details here, but it's important that
> > > we get the frontend/backend distinction correctly.
> > >
> > > I'm still reading this thread and trying to understand what
> > > exactly is/isn't guest-visible here. Anything that is
> > > guest-visible is supposed to be a frontend option (especially if
> > > it's a setting that isn't allowed to change during migration)
> > > Backend options are never supposed to affect guest ABI.
> >
> > That's a fair argument.
> >
> > > Which is the case here?
> >
> > From what I can see the current RAM_PMEM/is_pmem flags are purely used by
> > QEMU to decide if they should call pmem_* library calls to persist
> > things - so that's a host visible feature only.
> >
> > docs/nvdimm.txt seems to cover a lot of the detail here.
> >
> > We've got quite a lot of places in the code that do:
> > if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
> >
> > and that's what seems to drive the ACPI building - see hw/acpi/nvdimm.c
> > so I guess that's most of the guess visible stuff.
> >
> > and for DIMMs we have a MEMORY_DEVICE_INFO_KIND_NVDIMM flag.
> >
> > Then we also have a flag for the CPU/machine type stating persistence
> > mode (either cpu, or mem-ctrl I think which set some ACPI state to the
> > magic values 2 or 3 - see pc_machine_set_nvdimm_persistence). But that's
> > guest visible state and global, not about specific ram chunks.
> >
> > So I think you're right, that RAM_PMEM Is the wrong thing to use here -
> > it shouldn't change the guest visible behaviour of whether the RAM is
> > cleared; at the moment the only thing seems to be the TYPE_NVDIMM
> > but that's not passed down to the memory regions.
> >
> > So should it just be that the nvdimm frontend sets the flag saying that
> > a memory region/ramblock is exposed to the guest as nvdimm? And that
> > should make the difference here?
>
> Well, I'm also starting to loose what we are trying to figure out here.
> Should we start anew, and sum it up?
>
> * we have a TPM feature 'clear memory on reboot'
> * it's typically a FW that implements it, however it would
> be hard to implement in OVMF, so this patch tries to implement
> it in QEMU going over RAMBlocks and clearing them thinking it would be
> easier, alas it turns out not really true as:
>
> we notice that there is nvdimm and go on tangent discussing
> backend's 'pmem' option which is not visible to guests and could be set or not set
> or the backend used by other frontends (RAM).
>
> Regardless of where the feature should be implemented
> I think we should first figure out (ask TCG spec folks) what to do with nvdimms
> in first place or try to get hold on platform that supports both and copy behavior from there.
> Once we know what to do with it we can discuss details.
Yeh agreed.
Dave
>
> >
> >
> > (cc'ing in Pankaj because of virtio-pmem).
> >
> > Dave
> > >
> > > >
> > > > > I wonder if NVDIMM spec has a means to to express it,
> > > > > or TCG spec (they should have some thoughts on how to deal with
> > > > > coming problem as it affects bare-metal as well)
> > > >
> > > > Dave
> > > > >
> > > > > > Dave
> > > > > >
> > > > > > > > Dave
> > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Dave
> > > > > > > > > >
> > > > > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ?
> > > > > > > > > > >
> > > > > > > > > > > thanks
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > --
> > > > > > > > > > > Marc-André Lureau
> > > > > > > > > > --
> > > > > > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > Marc-André Lureau
> > > > > > > > --
> > > > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > > > >
> > > > > > --
> > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > > >
> > > > >
> > > > --
> > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > >
> > > --
> > > Eduardo
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Hi On Wed, Sep 19, 2018 at 4:14 PM Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > > * Igor Mammedov (imammedo@redhat.com) wrote: > > On Wed, 19 Sep 2018 11:58:22 +0100 > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > Hi > > > > > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert > > > > <dgilbert@redhat.com> wrote: > > > > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > > > Hi > > > > > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > > > > > Hi > > > > > > > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > > > > > <dgilbert@redhat.com> wrote: > > > > > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > > > > > >> qemu_ram_foreach_block ) > > > > > > > >> > > > > > > > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > > > > > used, and already skip the device RAM. > > > > > > > > > > > > > > > > Laszlo, you wrote the functions > > > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > > > > > do you think it's appropriate to list the memory to clear, or we > > > > > > > > should rather use qemu_ram_foreach_block() ? > > > > > > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > > > > > when I introduced the guest_phys_block*() functions, the original > > > > > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > > > > > (GPAs). This is evident if you look at the direct child commit of > > > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > > > > > matter. > > > > > > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > > > > > based on contents / backing as well. I think? So I believe we should > > > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > > > > > guest_phys_block*(). > > > > > > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > > > > > too, separately.) > > > > > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > > > > > and MemoryRegion have no idea they are actually used for nvram (you > > > > > > could rely on hostmem.pmem flag, but this is optional), and I don't > > > > > > see a clear way to figure this out. > > > > > > > > > > I think the pmem flag is what we should use; the problem though is we > > > > > > > > That would be much simpler. But What if you setup a nvdimm backend by > > > > a non-pmem memory? It will always be cleared? What about platforms > > > > that do not support libpmem? > > > > > > Right, that's basically the problem I say below, the difference between > > > (a) and (b). > > > > > > > > have two different pieces of semantics: > > > > > a) PMEM - needs special flush instruction/calls > > > > > b) PMEM - my data is persistent, please don't clear me > > > > > > > > > > Do those always go together? > > > > > > > > > > (Copying in Junyan He who added the RAM_PMEM flag) > > > > > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > > > > > then check the owner is TYPE_NVDIMM for example. Is this a good > > > > > > solution? > > > > > > > > > > No, I think it's upto whatever creates the region to set a flag > > > > > somewhere properly - there's no telling whether it'll always be NVDIMM > > > > > or some other object. > > > > > > > > We could make the owner object set a flag on the MemoryRegion, or > > > > implement a common NV interface. > > > > > > I think preferably on the RAMBlock, but yes; the question then is > > > whether we need to split the PMEM flag into two for (a) and (b) above > > > and whether they need to be separately set. > > well, > > I get current pmem flag semantics as backend supports persistent memory > > whether frontend will use it or not is different story. > > > > Perhaps we need a separate flag which say that pmem is in use, > > but what about usecase where nvdimm is used as RAM and not storage > > we probably would like to wipe it out as normal RAM. > > Right, so we're slowly building up the number of flags/policies we're > trying to represent here; it's certainly not the one 'pmem' flag we > currently have. > > > Maybe we should add frontend property to allow selecting policy per device, > > which then could be propagated to MemoryRegion/RAMBlock? > > By 'device' here you mean memory-backend-* objects? If so then yes I'd > agree a property on those propagated to the underlying RAMBlock would > be ideal. Earlier I was trying to suggest the guest exposed device / owner (dimm/nvdimm etc) would be the one setting a flag. I can work on a patch, that should be more clear. > Dave > > > > Dave > > > > > > > > > > > > > Dave > > > > > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > > > > > > > > > thanks > > > > > > > > > > > > > > > > > > -- > > > > > > Marc-André Lureau > > > > > -- > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > > > > > > > > > > > > -- > > > > Marc-André Lureau > > > -- > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK -- Marc-André Lureau
On Wed, 19 Sep 2018 18:36:14 +0400 Marc-André Lureau <marcandre.lureau@gmail.com> wrote: > Hi > > On Wed, Sep 19, 2018 at 4:14 PM Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > > > * Igor Mammedov (imammedo@redhat.com) wrote: > > > On Wed, 19 Sep 2018 11:58:22 +0100 > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > > Hi > > > > > > > > > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert > > > > > <dgilbert@redhat.com> wrote: > > > > > > > > > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > > > > > Hi > > > > > > > > > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > > > > > > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > > > > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > > > > > > Hi > > > > > > > > > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > > > > > > <dgilbert@redhat.com> wrote: > > > > > > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > > > > > > >> qemu_ram_foreach_block ) > > > > > > > > >> > > > > > > > > > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > > > > > > used, and already skip the device RAM. > > > > > > > > > > > > > > > > > > Laszlo, you wrote the functions > > > > > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > > > > > > do you think it's appropriate to list the memory to clear, or we > > > > > > > > > should rather use qemu_ram_foreach_block() ? > > > > > > > > > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > > > > > > when I introduced the guest_phys_block*() functions, the original > > > > > > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > > > > > > (GPAs). This is evident if you look at the direct child commit of > > > > > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > > > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > > > > > > matter. > > > > > > > > > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > > > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > > > > > > based on contents / backing as well. I think? So I believe we should > > > > > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > > > > > > guest_phys_block*(). > > > > > > > > > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > > > > > > too, separately.) > > > > > > > > > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > > > > > > and MemoryRegion have no idea they are actually used for nvram (you > > > > > > > could rely on hostmem.pmem flag, but this is optional), and I don't > > > > > > > see a clear way to figure this out. > > > > > > > > > > > > I think the pmem flag is what we should use; the problem though is we > > > > > > > > > > That would be much simpler. But What if you setup a nvdimm backend by > > > > > a non-pmem memory? It will always be cleared? What about platforms > > > > > that do not support libpmem? > > > > > > > > Right, that's basically the problem I say below, the difference between > > > > (a) and (b). > > > > > > > > > > have two different pieces of semantics: > > > > > > a) PMEM - needs special flush instruction/calls > > > > > > b) PMEM - my data is persistent, please don't clear me > > > > > > > > > > > > Do those always go together? > > > > > > > > > > > > (Copying in Junyan He who added the RAM_PMEM flag) > > > > > > > > > > > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > > > > > > then check the owner is TYPE_NVDIMM for example. Is this a good > > > > > > > solution? > > > > > > > > > > > > No, I think it's upto whatever creates the region to set a flag > > > > > > somewhere properly - there's no telling whether it'll always be NVDIMM > > > > > > or some other object. > > > > > > > > > > We could make the owner object set a flag on the MemoryRegion, or > > > > > implement a common NV interface. > > > > > > > > I think preferably on the RAMBlock, but yes; the question then is > > > > whether we need to split the PMEM flag into two for (a) and (b) above > > > > and whether they need to be separately set. > > > well, > > > I get current pmem flag semantics as backend supports persistent memory > > > whether frontend will use it or not is different story. > > > > > > Perhaps we need a separate flag which say that pmem is in use, > > > but what about usecase where nvdimm is used as RAM and not storage > > > we probably would like to wipe it out as normal RAM. > > > > Right, so we're slowly building up the number of flags/policies we're > > trying to represent here; it's certainly not the one 'pmem' flag we > > currently have. > > > > > Maybe we should add frontend property to allow selecting policy per device, > > > which then could be propagated to MemoryRegion/RAMBlock? > > > > By 'device' here you mean memory-backend-* objects? If so then yes I'd > > agree a property on those propagated to the underlying RAMBlock would > > be ideal. > > Earlier I was trying to suggest the guest exposed device / owner > (dimm/nvdimm etc) would be the one setting a flag. I can work on a > patch, that should be more clear. The following questions would be, - how user/libvirt would pick up which device cloud be cleared and which is not - and how guest OS would know which is which > > > Dave > > > > > > Dave > > > > > > > > > > > > > > > > Dave > > > > > > > > > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > > > > > > > > > > > thanks > > > > > > > > > > > > > > > > > > > > > -- > > > > > > > Marc-André Lureau > > > > > > -- > > > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > > > > > > > > > > > > > > > > -- > > > > > Marc-André Lureau > > > > -- > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > >
On 2018-09-19 at 14:29:57 +0400, Marc-André Lureau wrote: > Hi > > On Tue, Sep 18, 2018 at 7:49 PM Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > > > * Marc-André Lureau (marcandre.lureau@gmail.com) wrote: > > > Hi > > > > > > On Tue, Sep 11, 2018 at 6:19 PM Laszlo Ersek <lersek@redhat.com> wrote: > > > > > > > > +Alex, due to mention of 21e00fa55f3fd > > > > > > > > On 09/10/18 15:03, Marc-André Lureau wrote: > > > > > Hi > > > > > > > > > > On Mon, Sep 10, 2018 at 2:44 PM Dr. David Alan Gilbert > > > > > <dgilbert@redhat.com> wrote: > > > > >> (I didn't know about guest_phys_block* and would have probably just used > > > > >> qemu_ram_foreach_block ) > > > > >> > > > > > > > > > > guest_phys_block*() seems to fit, as it lists only the blocks actually > > > > > used, and already skip the device RAM. > > > > > > > > > > Laszlo, you wrote the functions > > > > > (https://git.qemu.org/?p=qemu.git;a=commit;h=c5d7f60f0614250bd925071e25220ce5958f75d0), > > > > > do you think it's appropriate to list the memory to clear, or we > > > > > should rather use qemu_ram_foreach_block() ? > > > > > > > > Originally, I would have said, "use either, doesn't matter". Namely, > > > > when I introduced the guest_phys_block*() functions, the original > > > > purpose was not related to RAM *contents*, but to RAM *addresses* > > > > (GPAs). This is evident if you look at the direct child commit of > > > > c5d7f60f0614, namely 56c4bfb3f07f, which put GuestPhysBlockList to use. > > > > And, for your use case (= wiping RAM), GPAs don't matter, only contents > > > > matter. > > > > > > > > However, with the commits I mentioned previously, namely e4dc3f5909ab9 > > > > and 21e00fa55f3fd, we now filter out some RAM blocks from the dumping > > > > based on contents / backing as well. I think? So I believe we should > > > > honor that for the wiping to. I guess I'd (vaguely) suggest using > > > > guest_phys_block*(). > > > > > > > > (And then, as Dave suggests, maybe extend the filter to consider pmem > > > > too, separately.) > > > > > > I looked a bit into skipping pmem memory. The issue is that RamBlock > > > and MemoryRegion have no idea they are actually used for nvram (you > > > could rely on hostmem.pmem flag, but this is optional), and I don't > > > see a clear way to figure this out. > > > > I think the pmem flag is what we should use; the problem though is we > > That would be much simpler. But What if you setup a nvdimm backend by > a non-pmem memory? It will always be cleared? What about platforms > that do not support libpmem? My understanding is that you want use pmem=on flag to grantee the data persistent in a non-nvdimm backend? > > > have two different pieces of semantics: > > a) PMEM - needs special flush instruction/calls > > b) PMEM - my data is persistent, please don't clear me > > > > Do those always go together? I think we don't need to split the flag to pmem=nvdimm / pmem=others. for a)needs special flush instruction/calls We could fallback to an empty function while we don't have libpmem support. if you have libpmem support, but not have a pmem=nvdimm, it will returned while it passed a non-nvdimm virtual address. for b)my data is persistent, please don't clear me That is we both needed. Ah.. There is a logic that we exit qemu while detected pmem=on && non libpmem support, it should be improved if you really want this flag. Correct me if I got some misunderstood. > > > > (Copying in Junyan He who added the RAM_PMEM flag) > > > > > I can imagine to retrieve the MemoryRegion from guest phys address, > > > then check the owner is TYPE_NVDIMM for example. Is this a good > > > solution? > > > > No, I think it's upto whatever creates the region to set a flag > > somewhere properly - there's no telling whether it'll always be NVDIMM > > or some other object. > > We could make the owner object set a flag on the MemoryRegion, or > implement a common NV interface. > > > > > Dave > > > > > There is memory_region_from_host(), is there a memory_region_from_guest() ? > > > > > > thanks > > > > > > > > > -- > > > Marc-André Lureau > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > -- > Marc-André Lureau >
On Thu, 6 Sep 2018 12:01:40 +0400
Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> Hi
>
> On Thu, Sep 6, 2018 at 11:58 AM Igor Mammedov <imammedo@redhat.com> wrote:
> >
> > On Thu, 6 Sep 2018 07:50:09 +0400
> > Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> >
> > > Hi
> > >
> > > On Tue, Sep 4, 2018 at 10:47 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > >
> > > > On Fri, 31 Aug 2018 19:24:24 +0200
> > > > Marc-André Lureau <marcandre.lureau@redhat.com> wrote:
> > > >
> > > > > This allows to pass the last failing test from the Windows HLK TPM 2.0
> > > > > TCG PPI 1.3 tests.
> > > > >
> > > > > The interface is described in the "TCG Platform Reset Attack
> > > > > Mitigation Specification", chapter 6 "ACPI _DSM Function". According
> > > > > to Laszlo, it's not so easy to implement in OVMF, he suggested to do
> > > > > it in qemu instead.
> > > > >
> > > > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > > > ---
> > > > > hw/tpm/tpm_ppi.h | 2 ++
> > > > > hw/i386/acpi-build.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
> > > > > hw/tpm/tpm_crb.c | 1 +
> > > > > hw/tpm/tpm_ppi.c | 23 ++++++++++++++++++++++
> > > > > hw/tpm/tpm_tis.c | 1 +
> > > > > docs/specs/tpm.txt | 2 ++
> > > > > hw/tpm/trace-events | 3 +++
> > > > > 7 files changed, 78 insertions(+)
> > > > >
> > > > > diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
> > > > > index f6458bf87e..3239751e9f 100644
> > > > > --- a/hw/tpm/tpm_ppi.h
> > > > > +++ b/hw/tpm/tpm_ppi.h
> > > > > @@ -23,4 +23,6 @@ typedef struct TPMPPI {
> > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > hwaddr addr, Object *obj, Error **errp);
> > > > >
> > > > > +void tpm_ppi_reset(TPMPPI *tpmppi);
> > > > > +
> > > > > #endif /* TPM_TPM_PPI_H */
> > > > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > > > index c5e9a6e11d..2ab3e8fae7 100644
> > > > > --- a/hw/i386/acpi-build.c
> > > > > +++ b/hw/i386/acpi-build.c
> > > > > @@ -1824,6 +1824,13 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > pprq = aml_name("PPRQ");
> > > > > pprm = aml_name("PPRM");
> > > > >
> > > > > + aml_append(dev,
> > > > > + aml_operation_region("TPP3", AML_SYSTEM_MEMORY,
> > > > > + aml_int(TPM_PPI_ADDR_BASE + 0x15a),
> > > > > + 0x1));
> > > > > + field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
> > > > > + aml_append(field, aml_named_field("MOVV", 8));
> > > > > + aml_append(dev, field);
> > > > > /*
> > > > > * DerefOf in Windows is broken with SYSTEM_MEMORY. Use a dynamic
> > > > > * operation region inside of a method for getting FUNC[op].
> > > > > @@ -2166,7 +2173,46 @@ build_tpm_ppi(TPMIf *tpm, Aml *dev)
> > > > > aml_append(ifctx, aml_return(aml_buffer(1, zerobyte)));
> > > > > }
> > > > > aml_append(method, ifctx);
> > > > > +
> > > > > + ifctx = aml_if(
> > > > > + aml_equal(uuid,
> > > > > + aml_touuid("376054ED-CC13-4675-901C-4756D7F2D45D")));
> > > > > + {
> > > > > + /* standard DSM query function */
> > > > > + ifctx2 = aml_if(aml_equal(function, zero));
> > > > > + {
> > > > > + uint8_t byte_list[1] = { 0x03 };
> > > > > + aml_append(ifctx2, aml_return(aml_buffer(1, byte_list)));
> > > > > + }
> > > > > + aml_append(ifctx, ifctx2);
> > > > > +
> > > > > + /*
> > > > > + * TCG Platform Reset Attack Mitigation Specification 1.0 Ch.6
> > > > > + *
> > > > > + * Arg 2 (Integer): Function Index = 1
> > > > > + * Arg 3 (Package): Arguments = Package: Type: Integer
> > > > > + * Operation Value of the Request
> > > > > + * Returns: Type: Integer
> > > > > + * 0: Success
> > > > > + * 1: General Failure
> > > > > + */
> > > > > + ifctx2 = aml_if(aml_equal(function, one));
> > > > > + {
> > > > > + aml_append(ifctx2,
> > > > > + aml_store(aml_derefof(aml_index(arguments, zero)),
> > > > > + op));
> > > > > + {
> > > > > + aml_append(ifctx2, aml_store(op, aml_name("MOVV")));
> > > > > +
> > > > > + /* 0: success */
> > > > > + aml_append(ifctx2, aml_return(zero));
> > > > > + }
> > > > > + }
> > > > > + aml_append(ifctx, ifctx2);
> > > > > + }
> > > > > + aml_append(method, ifctx);
> > > > > }
> > > > > +
> > > > > aml_append(dev, method);
> > > > > }
> > > > >
> > > > > diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
> > > > > index b243222fd6..48f6a716ad 100644
> > > > > --- a/hw/tpm/tpm_crb.c
> > > > > +++ b/hw/tpm/tpm_crb.c
> > > > > @@ -233,6 +233,7 @@ static void tpm_crb_reset(void *dev)
> > > > > {
> > > > > CRBState *s = CRB(dev);
> > > > >
> > > > > + tpm_ppi_reset(&s->ppi);
> > > > > tpm_backend_reset(s->tpmbe);
> > > > >
> > > > > memset(s->regs, 0, sizeof(s->regs));
> > > > > diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
> > > > > index 8b46b9dd4b..ce43bc5729 100644
> > > > > --- a/hw/tpm/tpm_ppi.c
> > > > > +++ b/hw/tpm/tpm_ppi.c
> > > > > @@ -16,8 +16,30 @@
> > > > > #include "qapi/error.h"
> > > > > #include "cpu.h"
> > > > > #include "sysemu/memory_mapping.h"
> > > > > +#include "sysemu/reset.h"
> > > > > #include "migration/vmstate.h"
> > > > > #include "tpm_ppi.h"
> > > > > +#include "trace.h"
> > > > > +
> > > > > +void tpm_ppi_reset(TPMPPI *tpmppi)
> > > > > +{
> > > >
> > > >
> > > > > + char *ptr = memory_region_get_ram_ptr(&tpmppi->ram);
> > > > nvdimm seems to use cpu_physical_memory_read() to access guest
> > > > accessible memory, so question is what's difference?
> > >
> > > cpu_physical_memory_read() is higher level, doing dispatch on address
> > > and length checks.
> > >
> > > This is a bit unnecessary, as ppi->buf could be accessed directly.
> > [...]
> > > > > + memset(block->host_addr, 0,
> > > > > + block->target_end - block->target_start);
> > > > > + }
> > my concern here is that if we directly touch guest memory here
> > we might get in trouble on migration without dirtying modified
> > ranges
>
> It is a read-only of one byte.
> by the time the reset handler is called, the memory must have been
> already migrated.
what about memset(), it looks like we are writing to guest memory.
Shouldn't we dirty zeroed out blocks?
> >
> > PS:
> > feel free it ignore since I don't have a clue what I'm talking about :)
> >
> > > > > + guest_phys_blocks_free(&guest_phys_blocks);
> > > > > + }
> > > > > +}
> > > > >
> > > > > bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > hwaddr addr, Object *obj, Error **errp)
> > > > > @@ -27,5 +49,6 @@ bool tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m,
> > > > > vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
> > > > >
> > > > > memory_region_add_subregion(m, addr, &tpmppi->ram);
> > > > > +
> > > > > return true;
> > > > > }
> > > > > diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
> > > > > index 70432ffe8b..d9bfa956cc 100644
> > > > > --- a/hw/tpm/tpm_tis.c
> > > > > +++ b/hw/tpm/tpm_tis.c
> > > > > @@ -868,6 +868,7 @@ static void tpm_tis_reset(DeviceState *dev)
> > > > > s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
> > > > > TPM_TIS_BUFFER_MAX);
> > > > >
> > > > > + tpm_ppi_reset(&s->ppi);
> > > > > tpm_backend_reset(s->be_driver);
> > > > >
> > > > > s->active_locty = TPM_TIS_NO_LOCALITY;
> > > > > diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
> > > > > index 332c2ae597..ce9bda3c89 100644
> > > > > --- a/docs/specs/tpm.txt
> > > > > +++ b/docs/specs/tpm.txt
> > > > > @@ -121,6 +121,8 @@ layout:
> > > > > +----------+--------+--------+-------------------------------------------+
> > > > > | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
> > > > > | | | | firmware. Used by firmware. |
> > > > > + +----------+--------+--------+-------------------------------------------+
> > > > > + | movv | 0x1 | 0x15a | Memory overwrite variable |
> > > > > +----------+--------+--------+-------------------------------------------+
> > > > >
> > > > > The following values are supported for the 'func' field. They correspond
> > > > > diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
> > > > > index 25bee0cecf..920d32ad55 100644
> > > > > --- a/hw/tpm/trace-events
> > > > > +++ b/hw/tpm/trace-events
> > > > > @@ -51,3 +51,6 @@ tpm_tis_mmio_write_init_abort(void) "Initiating abort"
> > > > > tpm_tis_mmio_write_lowering_irq(void) "Lowering IRQ"
> > > > > tpm_tis_mmio_write_data2send(uint32_t value, unsigned size) "Data to send to TPM: 0x%08x (size=%d)"
> > > > > tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
> > > > > +
> > > > > +# hw/tpm/tpm_ppi.c
> > > > > +tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
> > > >
> > > >
> > >
> > >
> >
>
>
© 2016 - 2026 Red Hat, Inc.