This commit adds a syscall filter API to the TCG plugin API set.
Plugins can register a filter callback to QEMU to decide whether
to intercept a syscall, process it and bypass the QEMU syscall
handler.
Signed-off-by: Ziyang Zhang <functioner@sjtu.edu.cn>
---
include/qemu/plugin-event.h | 1 +
include/qemu/plugin.h | 28 ++++++++++++++++++++--------
include/qemu/qemu-plugin.h | 24 ++++++++++++++++++++++++
include/user/syscall-trace.h | 16 ++++++++++++++++
linux-user/syscall.c | 7 +++++--
plugins/api.c | 7 +++++++
plugins/core.c | 34 ++++++++++++++++++++++++++++++++++
7 files changed, 107 insertions(+), 10 deletions(-)
diff --git a/include/qemu/plugin-event.h b/include/qemu/plugin-event.h
index 7056d8427b..bbb1c2b91f 100644
--- a/include/qemu/plugin-event.h
+++ b/include/qemu/plugin-event.h
@@ -20,6 +20,7 @@ enum qemu_plugin_event {
QEMU_PLUGIN_EV_VCPU_SYSCALL_RET,
QEMU_PLUGIN_EV_FLUSH,
QEMU_PLUGIN_EV_ATEXIT,
+ QEMU_PLUGIN_EV_VCPU_SYSCALL_FILTER,
QEMU_PLUGIN_EV_MAX, /* total number of plugin events we support */
};
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index f355c7cb8a..adba2ac49c 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -55,14 +55,15 @@ void qemu_plugin_opt_parse(const char *optstr, QemuPluginList *head);
int qemu_plugin_load_list(QemuPluginList *head, Error **errp);
union qemu_plugin_cb_sig {
- qemu_plugin_simple_cb_t simple;
- qemu_plugin_udata_cb_t udata;
- qemu_plugin_vcpu_simple_cb_t vcpu_simple;
- qemu_plugin_vcpu_udata_cb_t vcpu_udata;
- qemu_plugin_vcpu_tb_trans_cb_t vcpu_tb_trans;
- qemu_plugin_vcpu_mem_cb_t vcpu_mem;
- qemu_plugin_vcpu_syscall_cb_t vcpu_syscall;
- qemu_plugin_vcpu_syscall_ret_cb_t vcpu_syscall_ret;
+ qemu_plugin_simple_cb_t simple;
+ qemu_plugin_udata_cb_t udata;
+ qemu_plugin_vcpu_simple_cb_t vcpu_simple;
+ qemu_plugin_vcpu_udata_cb_t vcpu_udata;
+ qemu_plugin_vcpu_tb_trans_cb_t vcpu_tb_trans;
+ qemu_plugin_vcpu_mem_cb_t vcpu_mem;
+ qemu_plugin_vcpu_syscall_cb_t vcpu_syscall;
+ qemu_plugin_vcpu_syscall_ret_cb_t vcpu_syscall_ret;
+ qemu_plugin_vcpu_syscall_filter_cb_t vcpu_syscall_filter;
void *generic;
};
@@ -165,6 +166,10 @@ qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1,
uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
uint64_t a6, uint64_t a7, uint64_t a8);
void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret);
+bool
+qemu_plugin_vcpu_syscall_filter(CPUState *cpu, int64_t num, uint64_t a1,
+ uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
+ uint64_t a6, uint64_t a7, uint64_t a8, uint64_t *ret);
void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
uint64_t value_low,
@@ -267,6 +272,13 @@ static inline
void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
{ }
+static inline bool
+qemu_plugin_vcpu_syscall_filter(CPUState *cpu, int64_t num, uint64_t a1,
+ uint64_t a2, uint64_t a3, uint64_t a4,
+ uint64_t a5, uint64_t a6, uint64_t a7,
+ uint64_t a8, uint64_t *ret)
+{ }
+
static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
uint64_t value_low,
uint64_t value_high,
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index c450106af1..719b070e32 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -738,6 +738,25 @@ typedef void
uint64_t a3, uint64_t a4, uint64_t a5,
uint64_t a6, uint64_t a7, uint64_t a8);
+/*
+ * typedef qemu_plugin_vcpu_syscall_filter_cb_t - vCPU syscall filter callback
+ * function type
+ * @vcpu_index: the executing vCPU
+ * @num: the syscall number
+ * @a1-a8: the syscall arguments
+ * @ret: the address of the syscall return value, set this if filtered
+ *
+ * Returns: true if you want to filter this syscall (i.e. stop it being handled
+ * further), otherwise return false.
+ */
+typedef bool
+(*qemu_plugin_vcpu_syscall_filter_cb_t)(qemu_plugin_id_t id,
+ unsigned int vcpu_index,
+ int64_t num, uint64_t a1, uint64_t a2,
+ uint64_t a3, uint64_t a4, uint64_t a5,
+ uint64_t a6, uint64_t a7, uint64_t a8,
+ uint64_t *ret);
+
QEMU_PLUGIN_API
void qemu_plugin_register_vcpu_syscall_cb(qemu_plugin_id_t id,
qemu_plugin_vcpu_syscall_cb_t cb);
@@ -751,6 +770,11 @@ void
qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id,
qemu_plugin_vcpu_syscall_ret_cb_t cb);
+QEMU_PLUGIN_API
+void
+qemu_plugin_register_vcpu_syscall_filter_cb(qemu_plugin_id_t id,
+ qemu_plugin_vcpu_syscall_filter_cb_t cb);
+
/**
* qemu_plugin_insn_disas() - return disassembly string for instruction
diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h
index 9bd7ca19c8..34f7d6c7d4 100644
--- a/include/user/syscall-trace.h
+++ b/include/user/syscall-trace.h
@@ -39,5 +39,21 @@ static inline void record_syscall_return(CPUState *cpu, int num, abi_long ret)
gdb_syscall_return(cpu, num);
}
+static inline bool send_through_syscall_filters(CPUState *cpu, int num,
+ abi_long arg1, abi_long arg2,
+ abi_long arg3, abi_long arg4,
+ abi_long arg5, abi_long arg6,
+ abi_long arg7, abi_long arg8, abi_long *ret)
+{
+ uint64_t sysret64 = 0;
+ bool filtered = qemu_plugin_vcpu_syscall_filter(cpu, num,
+ arg1, arg2, arg3, arg4,
+ arg5, arg6, arg7, arg8, &sysret64);
+ if (filtered) {
+ *ret = sysret64;
+ }
+ return filtered;
+}
+
#endif /* SYSCALL_TRACE_H */
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index d78b2029fa..b8225f838f 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -14084,8 +14084,11 @@ abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1,
print_syscall(cpu_env, num, arg1, arg2, arg3, arg4, arg5, arg6);
}
- ret = do_syscall1(cpu_env, num, arg1, arg2, arg3, arg4,
- arg5, arg6, arg7, arg8);
+ if (!send_through_syscall_filters(cpu, num, arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7, arg8, &ret)) {
+ ret = do_syscall1(cpu_env, num, arg1, arg2, arg3, arg4,
+ arg5, arg6, arg7, arg8);
+ }
if (unlikely(qemu_loglevel_mask(LOG_STRACE))) {
print_syscall_ret(cpu_env, num, ret, arg1, arg2,
diff --git a/plugins/api.c b/plugins/api.c
index eac04cc1f6..478d0c8889 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -208,6 +208,13 @@ qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id,
plugin_register_cb(id, QEMU_PLUGIN_EV_VCPU_SYSCALL_RET, cb);
}
+void
+qemu_plugin_register_vcpu_syscall_filter_cb(qemu_plugin_id_t id,
+ qemu_plugin_vcpu_syscall_filter_cb_t cb)
+{
+ plugin_register_cb(id, QEMU_PLUGIN_EV_VCPU_SYSCALL_FILTER, cb);
+}
+
/*
* Plugin Queries
*
diff --git a/plugins/core.c b/plugins/core.c
index ead09fd2f1..1b2f875fb1 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -538,6 +538,40 @@ void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
}
}
+/*
+ * Disable CFI checks.
+ * The callback function has been loaded from an external library so we do not
+ * have type information
+ */
+QEMU_DISABLE_CFI
+bool
+qemu_plugin_vcpu_syscall_filter(CPUState *cpu, int64_t num, uint64_t a1,
+ uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
+ uint64_t a6, uint64_t a7, uint64_t a8, uint64_t *ret)
+{
+ struct qemu_plugin_cb *cb, *next;
+ enum qemu_plugin_event ev = QEMU_PLUGIN_EV_VCPU_SYSCALL_FILTER;
+
+ if (!test_bit(ev, cpu->plugin_state->event_mask)) {
+ return false;
+ }
+
+ bool filtered = false;
+ QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
+ qemu_plugin_vcpu_syscall_filter_cb_t func = cb->f.vcpu_syscall_filter;
+
+ qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_RW_REGS);
+ if (func(cb->ctx->id, cpu->cpu_index, num, a1, a2, a3, a4,
+ a5, a6, a7, a8, ret)) {
+ filtered = true;
+ qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_NO_REGS);
+ break;
+ }
+ qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_NO_REGS);
+ }
+ return filtered;
+}
+
void qemu_plugin_vcpu_idle_cb(CPUState *cpu)
{
/* idle and resume cb may be called before init, ignore in this case */
--
2.25.1
On 10/22/25 01:53, Ziyang Zhang wrote:
> @@ -165,6 +166,10 @@ qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1,
> uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
> uint64_t a6, uint64_t a7, uint64_t a8);
> void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret);
> +bool
> +qemu_plugin_vcpu_syscall_filter(CPUState *cpu, int64_t num, uint64_t a1,
> + uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
> + uint64_t a6, uint64_t a7, uint64_t a8, uint64_t *ret);
The second and third lines should indented just past the ( on the first line, i.e. with
CPUState.
> +static inline bool
> +qemu_plugin_vcpu_syscall_filter(CPUState *cpu, int64_t num, uint64_t a1,
> + uint64_t a2, uint64_t a3, uint64_t a4,
> + uint64_t a5, uint64_t a6, uint64_t a7,
> + uint64_t a8, uint64_t *ret)
Like this.
> +typedef bool
> +(*qemu_plugin_vcpu_syscall_filter_cb_t)(qemu_plugin_id_t id,
> + unsigned int vcpu_index,
> + int64_t num, uint64_t a1, uint64_t a2,
> + uint64_t a3, uint64_t a4, uint64_t a5,
> + uint64_t a6, uint64_t a7, uint64_t a8,
> + uint64_t *ret);
Likewise.
> +static inline bool send_through_syscall_filters(CPUState *cpu, int num,
> + abi_long arg1, abi_long arg2,
> + abi_long arg3, abi_long arg4,
> + abi_long arg5, abi_long arg6,
> + abi_long arg7, abi_long arg8, abi_long *ret)
Do not mark inline; let the compiler decide.
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index d78b2029fa..b8225f838f 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -14084,8 +14084,11 @@ abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1,
> print_syscall(cpu_env, num, arg1, arg2, arg3, arg4, arg5, arg6);
> }
>
> - ret = do_syscall1(cpu_env, num, arg1, arg2, arg3, arg4,
> - arg5, arg6, arg7, arg8);
> + if (!send_through_syscall_filters(cpu, num, arg1, arg2, arg3, arg4, arg5,
> + arg6, arg7, arg8, &ret)) {
Incorrect indent.
> + ret = do_syscall1(cpu_env, num, arg1, arg2, arg3, arg4,
> + arg5, arg6, arg7, arg8);
Likewise.
> diff --git a/plugins/core.c b/plugins/core.c
> index ead09fd2f1..1b2f875fb1 100644
> --- a/plugins/core.c
> +++ b/plugins/core.c
> @@ -538,6 +538,40 @@ void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
> }
> }
>
> +/*
> + * Disable CFI checks.
> + * The callback function has been loaded from an external library so we do not
> + * have type information
> + */
> +QEMU_DISABLE_CFI
> +bool
> +qemu_plugin_vcpu_syscall_filter(CPUState *cpu, int64_t num, uint64_t a1,
> + uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
> + uint64_t a6, uint64_t a7, uint64_t a8, uint64_t *ret)
Likewise.
> +{
> + struct qemu_plugin_cb *cb, *next;
> + enum qemu_plugin_event ev = QEMU_PLUGIN_EV_VCPU_SYSCALL_FILTER;
> +
> + if (!test_bit(ev, cpu->plugin_state->event_mask)) {
> + return false;
> + }
> +
> + bool filtered = false;
> + QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
> + qemu_plugin_vcpu_syscall_filter_cb_t func = cb->f.vcpu_syscall_filter;
> +
> + qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_RW_REGS);
> + if (func(cb->ctx->id, cpu->cpu_index, num, a1, a2, a3, a4,
> + a5, a6, a7, a8, ret)) {
> + filtered = true;
> + qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_NO_REGS);
> + break;
> + }
> + qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_NO_REGS);
> + }
> + return filtered;
> +}
The loop is better written
QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
bool filtered;
qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_RW_REGS);
filtered = cb->f.vcpu_syscall_filter(cb->ctx->id, cpu->cpu_index,
num, a1, a2, a3, a4, a5,
a6, a7, a8, ret);
qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_NO_REGS);
if (filtered) {
return true;
}
}
return false;
r~
Hi Richard and Alex,
On 2025-10-22 18:00, Richard Henderson wrote:
> On 10/22/25 01:53, Ziyang Zhang wrote:
>> @@ -165,6 +166,10 @@ qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1,
>> uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
>> uint64_t a6, uint64_t a7, uint64_t a8);
>> void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret);
>> +bool
>> +qemu_plugin_vcpu_syscall_filter(CPUState *cpu, int64_t num, uint64_t a1,
>> + uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
>> + uint64_t a6, uint64_t a7, uint64_t a8, uint64_t *ret);
>
> The second and third lines should indented just past the ( on the first line, i.e. with
> CPUState.
>
>> +static inline bool
>> +qemu_plugin_vcpu_syscall_filter(CPUState *cpu, int64_t num, uint64_t a1,
>> + uint64_t a2, uint64_t a3, uint64_t a4,
>> + uint64_t a5, uint64_t a6, uint64_t a7,
>> + uint64_t a8, uint64_t *ret)
>
> Like this.
>
>> +typedef bool
>> +(*qemu_plugin_vcpu_syscall_filter_cb_t)(qemu_plugin_id_t id,
>> + unsigned int vcpu_index,
>> + int64_t num, uint64_t a1, uint64_t a2,
>> + uint64_t a3, uint64_t a4, uint64_t a5,
>> + uint64_t a6, uint64_t a7, uint64_t a8,
>> + uint64_t *ret);
>
> Likewise.
>
>> +static inline bool send_through_syscall_filters(CPUState *cpu, int num,
>> + abi_long arg1, abi_long arg2,
>> + abi_long arg3, abi_long arg4,
>> + abi_long arg5, abi_long arg6,
>> + abi_long arg7, abi_long arg8, abi_long *ret)
>
> Do not mark inline; let the compiler decide.
>
>> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
>> index d78b2029fa..b8225f838f 100644
>> --- a/linux-user/syscall.c
>> +++ b/linux-user/syscall.c
>> @@ -14084,8 +14084,11 @@ abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1,
>> print_syscall(cpu_env, num, arg1, arg2, arg3, arg4, arg5, arg6);
>> }
>>
>> - ret = do_syscall1(cpu_env, num, arg1, arg2, arg3, arg4,
>> - arg5, arg6, arg7, arg8);
>> + if (!send_through_syscall_filters(cpu, num, arg1, arg2, arg3, arg4, arg5,
>> + arg6, arg7, arg8, &ret)) {
>
> Incorrect indent.
>
>> + ret = do_syscall1(cpu_env, num, arg1, arg2, arg3, arg4,
>> + arg5, arg6, arg7, arg8);
>
> Likewise.
>
>> diff --git a/plugins/core.c b/plugins/core.c
>> index ead09fd2f1..1b2f875fb1 100644
>> --- a/plugins/core.c
>> +++ b/plugins/core.c
>> @@ -538,6 +538,40 @@ void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
>> }
>> }
>>
>> +/*
>> + * Disable CFI checks.
>> + * The callback function has been loaded from an external library so we do not
>> + * have type information
>> + */
>> +QEMU_DISABLE_CFI
>> +bool
>> +qemu_plugin_vcpu_syscall_filter(CPUState *cpu, int64_t num, uint64_t a1,
>> + uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5,
>> + uint64_t a6, uint64_t a7, uint64_t a8, uint64_t *ret)
>
> Likewise.
>
>> +{
>> + struct qemu_plugin_cb *cb, *next;
>> + enum qemu_plugin_event ev = QEMU_PLUGIN_EV_VCPU_SYSCALL_FILTER;
>> +
>> + if (!test_bit(ev, cpu->plugin_state->event_mask)) {
>> + return false;
>> + }
>> +
>> + bool filtered = false;
>> + QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
>> + qemu_plugin_vcpu_syscall_filter_cb_t func = cb->f.vcpu_syscall_filter;
>> +
>> + qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_RW_REGS);
>> + if (func(cb->ctx->id, cpu->cpu_index, num, a1, a2, a3, a4,
>> + a5, a6, a7, a8, ret)) {
>> + filtered = true;
>> + qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_NO_REGS);
>> + break;
>> + }
>> + qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_NO_REGS);
>> + }
>> + return filtered;
>> +}
>
> The loop is better written
>
> QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
> bool filtered;
>
> qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_RW_REGS);
> filtered = cb->f.vcpu_syscall_filter(cb->ctx->id, cpu->cpu_index,
> num, a1, a2, a3, a4, a5,
> a6, a7, a8, ret);
> qemu_plugin_set_cb_flags(cpu, QEMU_PLUGIN_CB_NO_REGS);
>
> if (filtered) {
> return true;
> }
> }
> return false;
>
>
> r~
Thanks for the review.
Beyond the code style, are you open to accept such a functionality as
part of API plugins?
I think it's simple and powerful enough to justify having it upstream.
Plus, it will solve other issues than integration with Lorelei.
Regards,
Pierrick
On 10/27/25 08:14, Pierrick Bouvier wrote: > > Thanks for the review. > Beyond the code style, are you open to accept such a functionality as part of API plugins? Yes. r~
On 10/28/25 19:17, Richard Henderson wrote: >> >> Thanks for the review. >> Beyond the code style, are you open to accept such a functionality as part of API plugins? > > Yes. Thank you for your agreement! Considering that our ultimate goal is to enable QEMU to support all features of Lorelei, the current syscall filter can only play a limited role. Here are some features currently required by Lorelei: 1. We need to bypass checks such as `pageflags` in `accel/tcg/user-exec.c`. This is because the host library may provide memory blocks to the guest program, and these memory blocks cannot be recorded by the page tables emulated by `qemu-user`. As a result, errors may occur when invoking syscalls like `read/write`. 2. The `cpu_loop` should support recursive invocation and exit. This is necessary when the host library needs to execute guest callbacks, and the recursively invoked `cpu_loop` should be able to return after the callback finishes. 3. Since the host library may create new threads and execute guest callbacks within those threads, the `thread_cpu` is `NULL` when the callback is first executed. Therefore, a mechanism is required to create a `CPUState` when `thread_cpu` is NULL. Do you have any suggestions about how to implement these features?
© 2016 - 2025 Red Hat, Inc.