[PATCH v2] TCG plugin API extension to read guest memory content by an address

Mikhail Tyutin posted 1 patch 1 year, 2 months ago
Failed in applying to current master (apply log)
contrib/plugins/Makefile     |  1 +
contrib/plugins/memtrace.c   | 76 ++++++++++++++++++++++++++++++++++++
include/qemu/qemu-plugin.h   | 18 ++++++++-
plugins/api.c                | 16 ++++++++
plugins/qemu-plugins.symbols |  1 +
5 files changed, 111 insertions(+), 1 deletion(-)
create mode 100644 contrib/plugins/memtrace.c
[PATCH v2] TCG plugin API extension to read guest memory content by an address
Posted by Mikhail Tyutin 1 year, 2 months ago
TCG plugin API extension to read guest memory content. qemu_plugin_vcpu_read_phys_mem()
function can be used by TCG plugin inside of qemu_plugin_vcpu_mem_cb_t callback to adjust
received address according to internal memory mappings and read content of guest memory.
Works for both user-level and system-level emulation modes.

What's new in v2:
* Increment QEMU_PLUGIN_VERSION instead of custom define
* Example of qemu_plugin_vcpu_read_phys_mem() usage in memtrace.c
* Use cpu_memory_rw_debug() to get memory content in both user-level and
  system-level emulation modes

Signed-off-by: Mikhail Tyutin <m.tyutin@yadro.com>
Signed-off-by: Aleksey Titov <a.titov@yadro.com>
---
contrib/plugins/Makefile     |  1 +
contrib/plugins/memtrace.c   | 76 ++++++++++++++++++++++++++++++++++++
include/qemu/qemu-plugin.h   | 18 ++++++++-
plugins/api.c                | 16 ++++++++
plugins/qemu-plugins.symbols |  1 +
5 files changed, 111 insertions(+), 1 deletion(-)
create mode 100644 contrib/plugins/memtrace.c

diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index 23e0396687..cf27554616 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -21,6 +21,7 @@ NAMES += lockstep
NAMES += hwprofile
NAMES += cache
NAMES += drcov
+NAMES += memtrace
 SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
diff --git a/contrib/plugins/memtrace.c b/contrib/plugins/memtrace.c
new file mode 100644
index 0000000000..16c1553f47
--- /dev/null
+++ b/contrib/plugins/memtrace.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2023, Mikhail Tyutin <m.tyutin@yadro.com>
+ *
+ * Log all memory accesses including content of the access.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+
+static void vcpu_mem_access(uint32_t vcpuIndex, qemu_plugin_meminfo_t memInfo,
+                            uint64_t vaddr, void *userdata)
+{
+    unsigned size = 1 << qemu_plugin_mem_size_shift(memInfo);
+    char* memContent = g_malloc(size);
+    unsigned i;
+    GString* logLine = g_string_new(NULL);
+
+    qemu_plugin_vcpu_read_phys_mem(vcpuIndex, vaddr, memContent, size);
+
+    if(qemu_plugin_mem_is_store(memInfo)) {
+        g_string_append(logLine, "mem store");
+    }
+    else {
+        g_string_append(logLine, "mem load");
+    }
+
+    g_string_append_printf(logLine, " at 0x%08"PRIx64, vaddr);
+
+    g_string_append_printf(logLine, " size=%u content={",size);
+    for (i = 0; i < size; i++) {
+        if (i != 0) {
+            g_string_append(logLine, " ");
+        }
+        g_string_append_printf(logLine, "%02hhx", memContent[i]);
+    }
+    g_string_append(logLine, "}\n");
+
+    qemu_plugin_outs(logLine->str);
+
+    g_string_free(logLine, TRUE);
+    g_free(memContent);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n_insns;
+    size_t i;
+
+    n_insns = qemu_plugin_tb_n_insns(tb);
+    for (i = 0; i < n_insns; i++) {
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+
+        qemu_plugin_register_vcpu_mem_cb(insn,
+                                     vcpu_mem_access,
+                                     QEMU_PLUGIN_CB_NO_REGS,
+                                     QEMU_PLUGIN_MEM_RW,
+                                     0);
+    }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+                        int argc, char **argv)
+{
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+
+    return 0;
+}
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index d0e9d03adf..866282cf7d 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -51,7 +51,7 @@ typedef uint64_t qemu_plugin_id_t;
 extern QEMU_PLUGIN_EXPORT int qemu_plugin_version;
-#define QEMU_PLUGIN_VERSION 1
+#define QEMU_PLUGIN_VERSION 2
 /**
  * struct qemu_info_t - system information for plugins
@@ -625,4 +625,20 @@ uint64_t qemu_plugin_end_code(void);
  */
uint64_t qemu_plugin_entry_code(void);
+/**
+ * qemu_plugin_vcpu_read_phys_mem() - reads guest's memory content
+ *
+ * @vcpu_index: vcpu index
+ * @addr: guest's virtual address
+ * @buf: destination buffer to read data to
+ * @len: number of bytes to read
+ *
+ * Adjusts address according to internal memory mapping and reads
+ * content of guest memory.
+ */
+void qemu_plugin_vcpu_read_phys_mem(unsigned int vcpu_index,
+                                    uint64_t addr,
+                                    void *buf,
+                                    uint64_t len);
+
#endif /* QEMU_QEMU_PLUGIN_H */
diff --git a/plugins/api.c b/plugins/api.c
index 2078b16edb..af4f177229 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -442,3 +442,19 @@ uint64_t qemu_plugin_entry_code(void)
#endif
     return entry;
}
+
+void qemu_plugin_vcpu_read_phys_mem(unsigned int vcpu_index,
+                                    uint64_t addr,
+                                    void *buf,
+                                    uint64_t len) {
+    CPUClass *cc;
+    CPUState *cpu;
+
+    cpu = qemu_get_cpu(vcpu_index);
+    cc = CPU_GET_CLASS(cpu);
+    if (cc->memory_rw_debug) {
+        cc->memory_rw_debug(cpu, addr, buf, len, false);
+    } else {
+        cpu_memory_rw_debug(cpu, addr, buf, len, false);
+    }
+}
\ No newline at end of file
diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
index 71f6c90549..f0ce8c730f 100644
--- a/plugins/qemu-plugins.symbols
+++ b/plugins/qemu-plugins.symbols
@@ -42,4 +42,5 @@
   qemu_plugin_tb_vaddr;
   qemu_plugin_uninstall;
   qemu_plugin_vcpu_for_each;
+  qemu_plugin_vcpu_read_phys_mem;
};
--
2.34.1

Re: [PATCH v2] TCG plugin API extension to read guest memory content by an address
Posted by Alex Bennée 1 year, 1 month ago
Mikhail Tyutin <m.tyutin@yadro.com> writes:

> TCG plugin API extension to read guest memory content. qemu_plugin_vcpu_read_phys_mem()
>
> function can be used by TCG plugin inside of qemu_plugin_vcpu_mem_cb_t callback to adjust
>
> received address according to internal memory mappings and read content of guest memory.
>
> Works for both user-level and system-level emulation modes.
>
>  
>
> What's new in v2:
>
> * Increment QEMU_PLUGIN_VERSION instead of custom define
>
> * Example of qemu_plugin_vcpu_read_phys_mem() usage in memtrace.c
>
> * Use cpu_memory_rw_debug() to get memory content in both user-level and
>
>   system-level emulation modes
>
>  
>
> Signed-off-by: Mikhail Tyutin <m.tyutin@yadro.com>
>
> Signed-off-by: Aleksey Titov <a.titov@yadro.com>

Not sure what happened with the formatting of this patch, I think there
is an html part getting in the way.

>
> ---
>
> contrib/plugins/Makefile     |  1 +
>
> contrib/plugins/memtrace.c   | 76 ++++++++++++++++++++++++++++++++++++
>
> include/qemu/qemu-plugin.h   | 18 ++++++++-
>
> plugins/api.c                | 16 ++++++++
>
> plugins/qemu-plugins.symbols |  1 +
>
> 5 files changed, 111 insertions(+), 1 deletion(-)
>
> create mode 100644 contrib/plugins/memtrace.c
>
>  
>
> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>
> index 23e0396687..cf27554616 100644
>
> --- a/contrib/plugins/Makefile
>
> +++ b/contrib/plugins/Makefile
>
> @@ -21,6 +21,7 @@ NAMES += lockstep
>
> NAMES += hwprofile
>
> NAMES += cache
>
> NAMES += drcov
>
> +NAMES += memtrace
>
>  SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
>
> diff --git a/contrib/plugins/memtrace.c b/contrib/plugins/memtrace.c
>
> new file mode 100644
>
> index 0000000000..16c1553f47
>
> --- /dev/null
>
> +++ b/contrib/plugins/memtrace.c
>
> @@ -0,0 +1,76 @@
>
> +/*
>
> + * Copyright (C) 2023, Mikhail Tyutin <m.tyutin@yadro.com>
>
> + *
>
> + * Log all memory accesses including content of the access.
>
> + * 
>
> + * License: GNU GPL, version 2 or later.
>
> + *   See the COPYING file in the top-level directory.
>
> + */
>
> +
>
> +#include <glib.h>
>
> +
>
> +#include <qemu-plugin.h>
>
> +
>
> +
>
> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>
> +
>
> +
>
> +static void vcpu_mem_access(uint32_t vcpuIndex, qemu_plugin_meminfo_t memInfo,
>
> +                            uint64_t vaddr, void *userdata)
>
> +{
>
> +    unsigned size = 1 << qemu_plugin_mem_size_shift(memInfo);
>
> +    char* memContent = g_malloc(size);
>
> +    unsigned i;
>
> +    GString* logLine = g_string_new(NULL);
>
> +
>
> +    qemu_plugin_vcpu_read_phys_mem(vcpuIndex, vaddr, memContent,
> size);

So the problem with this approach is the memory value you read here may not be
the same as the value that was read by the instruction. This could
because of a few reasons:

  - an mmio write changes underlying memory layout
  - another thread changes memory after the access

I think a better way to get this information would be to register a new
type of call-back which can duplicate the value in the store/load and
pass it directly to the callback. It might even be worth just fixing up
the existing callback and breaking compatibility rather than having two
callback types?

We didn't do this originally as we were being cautious about any
attempts to use plugins to workaround the GPL for doing HW emulation -
however I don't think adding the memory values to the callbacks greatly
increases that risk.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro
RE: [PATCH v2] TCG plugin API extension to read guest memory content by an address
Posted by Mikhail Tyutin 1 year, 1 month ago
> Not sure what happened with the formatting of this patch, I think there
> is an html part getting in the way.
I guess line ends were messed up somewhere on my side. Will try to figure out the root cause.


> > +    qemu_plugin_vcpu_read_phys_mem(vcpuIndex, vaddr, memContent,
> > size);
> 
> So the problem with this approach is the memory value you read here may not be
> the same as the value that was read by the instruction. This could
> because of a few reasons:
> 
>   - an mmio write changes underlying memory layout
>   - another thread changes memory after the access
> 
> I think a better way to get this information would be to register a new
> type of call-back which can duplicate the value in the store/load and
> pass it directly to the callback. It might even be worth just fixing up
> the existing callback and breaking compatibility rather than having two
> callback types?
> 
> We didn't do this originally as we were being cautious about any
> attempts to use plugins to workaround the GPL for doing HW emulation -
> however I don't think adding the memory values to the callbacks greatly
> increases that risk.
> 
> --
> Alex Bennée
> Virtualisation Tech Lead @ Linaro

Do you mean concurrent access to the same memory block by multiple threads?

I think , for guest threads/cores if we observe mismatch of memory content read by a plugin and instruction
itself, then it should clearly indicate that guest software has true data race problem sitting somewhere
in its code. Otherwise other threads would wait on a synchronization object to let current thread
perform both memory operations (plugin callback + instruction). On the other hand, concurrent access
using atomic operation will indeed cause either plugin or instruction to read invalid memory content.

Isn’t it the same problem as we face in case of GDB attached to running Qemu instance (gdbserver) and
asking it to read some memory? How is it solved there?
Re: [PATCH v2] TCG plugin API extension to read guest memory content by an address
Posted by Alex Bennée 1 year, 1 month ago
Mikhail Tyutin <m.tyutin@yadro.com> writes:

>> Not sure what happened with the formatting of this patch, I think there
>> is an html part getting in the way.
> I guess line ends were messed up somewhere on my side. Will try to figure out the root cause.
>
>
>> > +    qemu_plugin_vcpu_read_phys_mem(vcpuIndex, vaddr, memContent,
>> > size);
>> 
>> So the problem with this approach is the memory value you read here may not be
>> the same as the value that was read by the instruction. This could
>> because of a few reasons:
>> 
>>   - an mmio write changes underlying memory layout
>>   - another thread changes memory after the access
>> 
>> I think a better way to get this information would be to register a new
>> type of call-back which can duplicate the value in the store/load and
>> pass it directly to the callback. It might even be worth just fixing up
>> the existing callback and breaking compatibility rather than having two
>> callback types?
>> 
>> We didn't do this originally as we were being cautious about any
>> attempts to use plugins to workaround the GPL for doing HW emulation -
>> however I don't think adding the memory values to the callbacks greatly
>> increases that risk.
>> 
>> --
>> Alex Bennée
>> Virtualisation Tech Lead @ Linaro
>
> Do you mean concurrent access to the same memory block by multiple
> threads?

Yes - although we also see MMU changes updating a mapping for a given
vaddr -> phys address.

>
> I think , for guest threads/cores if we observe mismatch of memory content read by a plugin and instruction
> itself, then it should clearly indicate that guest software has true data race problem sitting somewhere
> in its code. Otherwise other threads would wait on a synchronization object to let current thread
> perform both memory operations (plugin callback + instruction).

Other threads don't pause at all (unless you do something in the plugin
to force that)

> On the other hand, concurrent access
> using atomic operation will indeed cause either plugin or instruction to read invalid memory content.
>
> Isn’t it the same problem as we face in case of GDB attached to running Qemu instance (gdbserver) and
> asking it to read some memory? How is it solved there?

Yes and it's not solved except usually most interactions with the guest
during debugging are while the system is paused.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro
RE: [PATCH v2] TCG plugin API extension to read guest memory content by an address
Posted by Mikhail Tyutin 1 year, 1 month ago
> > Do you mean concurrent access to the same memory block by multiple
> > threads?
> 
> Yes - although we also see MMU changes updating a mapping for a given
> vaddr -> phys address.
> 
> >
> > I think , for guest threads/cores if we observe mismatch of memory content read by a plugin and instruction
> > itself, then it should clearly indicate that guest software has true data race problem sitting somewhere
> > in its code. Otherwise other threads would wait on a synchronization object to let current thread
> > perform both memory operations (plugin callback + instruction).
> 
> Other threads don't pause at all (unless you do something in the plugin
> to force that)

By correct multi-threaded code I mean that two concurrent accesses should
be synchronized by the application itself to ensure it correctness. For example
two thread access the same memory using a lock:

T1:
Lock         
  read mem
Unlock

T2:
Lock
  write mem
Unlock

If a plugin inserts memory callback at read/write mem instruction, it will be
implicitly synchronized with another thread.

On the other hand, if application misses the lock, it has data race regardless
of inserted callbacks. So, the plugin will get undefined content anyway.
T1                  T2
read mem    write mem

 
> > On the other hand, concurrent access
> > using atomic operation will indeed cause either plugin or instruction to read invalid memory content.
> >
> > Isn’t it the same problem as we face in case of GDB attached to running Qemu instance (gdbserver) and
> > asking it to read some memory? How is it solved there?
> 
> Yes and it's not solved except usually most interactions with the guest
> during debugging are while the system is paused.
> 
> --
> Alex Bennée
> Virtualisation Tech Lead @ Linaro