[PATCH v2 08/13] rust: cleanup module_init!, use it from #[derive(Object)]

Paolo Bonzini posted 13 patches 1 day, 5 hours ago
[PATCH v2 08/13] rust: cleanup module_init!, use it from #[derive(Object)]
Posted by Paolo Bonzini 1 day, 5 hours ago
Remove the duplicate code by using the module_init! macro; at the same time,
simplify how module_init! is used, by taking inspiration from the implementation
of #[derive(Object)].

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 rust/qemu-api-macros/src/lib.rs  | 33 +++-------------
 rust/qemu-api/src/definitions.rs | 66 ++++++++++++++------------------
 2 files changed, 33 insertions(+), 66 deletions(-)

diff --git a/rust/qemu-api-macros/src/lib.rs b/rust/qemu-api-macros/src/lib.rs
index 70e3f920460..a4bc5d01ee8 100644
--- a/rust/qemu-api-macros/src/lib.rs
+++ b/rust/qemu-api-macros/src/lib.rs
@@ -3,43 +3,20 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 use proc_macro::TokenStream;
-use quote::{format_ident, quote};
+use quote::quote;
 use syn::{parse_macro_input, DeriveInput};
 
 #[proc_macro_derive(Object)]
 pub fn derive_object(input: TokenStream) -> TokenStream {
     let input = parse_macro_input!(input as DeriveInput);
-
     let name = input.ident;
-    let module_static = format_ident!("__{}_LOAD_MODULE", name);
 
     let expanded = quote! {
-        #[allow(non_upper_case_globals)]
-        #[used]
-        #[cfg_attr(
-            not(any(target_vendor = "apple", target_os = "windows")),
-            link_section = ".init_array"
-        )]
-        #[cfg_attr(target_vendor = "apple", link_section = "__DATA,__mod_init_func")]
-        #[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
-        pub static #module_static: extern "C" fn() = {
-            extern "C" fn __register() {
-                unsafe {
-                    ::qemu_api::bindings::type_register_static(&<#name as ::qemu_api::definitions::ObjectImpl>::TYPE_INFO);
-                }
+        ::qemu_api::module_init! {
+            MODULE_INIT_QOM => unsafe {
+                ::qemu_api::bindings::type_register_static(&<#name as ::qemu_api::definitions::ObjectImpl>::TYPE_INFO);
             }
-
-            extern "C" fn __load() {
-                unsafe {
-                    ::qemu_api::bindings::register_module_init(
-                        Some(__register),
-                        ::qemu_api::bindings::module_init_type::MODULE_INIT_QOM
-                    );
-                }
-            }
-
-            __load
-        };
+        }
     };
 
     TokenStream::from(expanded)
diff --git a/rust/qemu-api/src/definitions.rs b/rust/qemu-api/src/definitions.rs
index 3323a665d92..f180c38bfb2 100644
--- a/rust/qemu-api/src/definitions.rs
+++ b/rust/qemu-api/src/definitions.rs
@@ -29,51 +29,40 @@ pub trait Class {
 
 #[macro_export]
 macro_rules! module_init {
-    ($func:expr, $type:expr) => {
-        #[used]
-        #[cfg_attr(
-            not(any(target_vendor = "apple", target_os = "windows")),
-            link_section = ".init_array"
-        )]
-        #[cfg_attr(target_vendor = "apple", link_section = "__DATA,__mod_init_func")]
-        #[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
-        pub static LOAD_MODULE: extern "C" fn() = {
-            extern "C" fn __load() {
-                unsafe {
-                    $crate::bindings::register_module_init(Some($func), $type);
-                }
-            }
-
-            __load
-        };
-    };
-    (qom: $func:ident => $body:block) => {
-        // NOTE: To have custom identifiers for the ctor func we need to either supply
-        // them directly as a macro argument or create them with a proc macro.
-        #[used]
-        #[cfg_attr(
-            not(any(target_vendor = "apple", target_os = "windows")),
-            link_section = ".init_array"
-        )]
-        #[cfg_attr(target_vendor = "apple", link_section = "__DATA,__mod_init_func")]
-        #[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
-        pub static LOAD_MODULE: extern "C" fn() = {
-            extern "C" fn __load() {
-                unsafe extern "C" fn $func() {
+    ($type:ident => $body:block) => {
+        const _: () = {
+            #[used]
+            #[cfg_attr(
+                not(any(target_vendor = "apple", target_os = "windows")),
+                link_section = ".init_array"
+            )]
+            #[cfg_attr(target_vendor = "apple", link_section = "__DATA,__mod_init_func")]
+            #[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
+            pub static LOAD_MODULE: extern "C" fn() = {
+                extern "C" fn init_fn() {
                     $body
                 }
 
-                unsafe {
-                    $crate::bindings::register_module_init(
-                        Some($func),
-                        $crate::bindings::module_init_type::MODULE_INIT_QOM,
-                    );
+                extern "C" fn ctor_fn() {
+                    unsafe {
+                        $crate::bindings::register_module_init(
+                            Some(init_fn),
+                            $crate::bindings::module_init_type::$type,
+                        );
+                    }
                 }
-            }
 
-            __load
+                ctor_fn
+            };
         };
     };
+
+    // shortcut because it's quite common that $body needs unsafe {}
+    ($type:ident => unsafe $body:block) => {
+        $crate::module_init! {
+            $type => { unsafe { $body } }
+        }
+    };
 }
 
 #[macro_export]
-- 
2.46.2
Re: [PATCH v2 08/13] rust: cleanup module_init!, use it from #[derive(Object)]
Posted by Junjie Mao 20 hours ago
Paolo Bonzini <pbonzini@redhat.com> writes:

> Remove the duplicate code by using the module_init! macro; at the same time,
> simplify how module_init! is used, by taking inspiration from the implementation
> of #[derive(Object)].
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Reviewed-by: Junjie Mao <junjie.mao@hotmail.com>

One minor comment below.

> ---
>  rust/qemu-api-macros/src/lib.rs  | 33 +++-------------
>  rust/qemu-api/src/definitions.rs | 66 ++++++++++++++------------------
>  2 files changed, 33 insertions(+), 66 deletions(-)
>
<snip>
> diff --git a/rust/qemu-api/src/definitions.rs b/rust/qemu-api/src/definitions.rs
> index 3323a665d92..f180c38bfb2 100644
> --- a/rust/qemu-api/src/definitions.rs
> +++ b/rust/qemu-api/src/definitions.rs
> @@ -29,51 +29,40 @@ pub trait Class {
>
>  #[macro_export]
>  macro_rules! module_init {
<snip>
> +    ($type:ident => $body:block) => {
> +        const _: () = {
> +            #[used]
> +            #[cfg_attr(
> +                not(any(target_vendor = "apple", target_os = "windows")),
> +                link_section = ".init_array"
> +            )]
> +            #[cfg_attr(target_vendor = "apple", link_section = "__DATA,__mod_init_func")]
> +            #[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
> +            pub static LOAD_MODULE: extern "C" fn() = {
> +                extern "C" fn init_fn() {

init_fn() should be unsafe fn according to the signature of
register_module_init. Being unsafe fn also makes sense here because it
is the caller, not init_fn() itself, that is responsible for the safety of
the unsafe code in the body.

--
Best Regards
Junjie Mao

>                      $body
>                  }
>
> -                unsafe {
> -                    $crate::bindings::register_module_init(
> -                        Some($func),
> -                        $crate::bindings::module_init_type::MODULE_INIT_QOM,
> -                    );
> +                extern "C" fn ctor_fn() {
> +                    unsafe {
> +                        $crate::bindings::register_module_init(
> +                            Some(init_fn),
> +                            $crate::bindings::module_init_type::$type,
> +                        );
> +                    }
>                  }
> -            }
>
> -            __load
> +                ctor_fn
> +            };
>          };
>      };
> +
> +    // shortcut because it's quite common that $body needs unsafe {}
> +    ($type:ident => unsafe $body:block) => {
> +        $crate::module_init! {
> +            $type => { unsafe { $body } }
> +        }
> +    };
>  }
>
>  #[macro_export]
Re: [PATCH v2 08/13] rust: cleanup module_init!, use it from #[derive(Object)]
Posted by Paolo Bonzini 17 hours ago
On Tue, Oct 22, 2024 at 4:12 AM Junjie Mao <junjie.mao@hotmail.com> wrote:
>
>
> Paolo Bonzini <pbonzini@redhat.com> writes:
>
> > Remove the duplicate code by using the module_init! macro; at the same time,
> > simplify how module_init! is used, by taking inspiration from the implementation
> > of #[derive(Object)].
> >
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>
> Reviewed-by: Junjie Mao <junjie.mao@hotmail.com>
>
> One minor comment below.
>
> > ---
> >  rust/qemu-api-macros/src/lib.rs  | 33 +++-------------
> >  rust/qemu-api/src/definitions.rs | 66 ++++++++++++++------------------
> >  2 files changed, 33 insertions(+), 66 deletions(-)
> >
> <snip>
> > diff --git a/rust/qemu-api/src/definitions.rs b/rust/qemu-api/src/definitions.rs
> > index 3323a665d92..f180c38bfb2 100644
> > --- a/rust/qemu-api/src/definitions.rs
> > +++ b/rust/qemu-api/src/definitions.rs
> > @@ -29,51 +29,40 @@ pub trait Class {
> >
> >  #[macro_export]
> >  macro_rules! module_init {
> <snip>
> > +    ($type:ident => $body:block) => {
> > +        const _: () = {
> > +            #[used]
> > +            #[cfg_attr(
> > +                not(any(target_vendor = "apple", target_os = "windows")),
> > +                link_section = ".init_array"
> > +            )]
> > +            #[cfg_attr(target_vendor = "apple", link_section = "__DATA,__mod_init_func")]
> > +            #[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
> > +            pub static LOAD_MODULE: extern "C" fn() = {
> > +                extern "C" fn init_fn() {
>
> init_fn() should be unsafe fn according to the signature of
> register_module_init.

I think it *can* be unsafe (which bindgen does by default). It's
always okay to pass a non-unsafe function as unsafe function pointer:

fn f() {
    println!("abc");
}

fn g(pf: unsafe fn()) {
    unsafe {
        pf();
    }
}

fn main() {
    g(f);
}

> Being unsafe fn also makes sense here because it
> is the caller, not init_fn() itself, that is responsible for the safety of
> the unsafe code in the body.

Isn't it the opposite? Since the caller of module_init! is responsible
for the safety, init_fn() itself can be safe. With
unsafe_op_in_unsafe_fn it's not a big deal; but without it, an unsafe
init_fn would hide what is safe and what is not in the argument of
module_init!.

It's also relevant in this respect that init_fn is called *after*
main(), only ctor_fn() is called before main.

Thanks,

Paolo
Re: [PATCH v2 08/13] rust: cleanup module_init!, use it from #[derive(Object)]
Posted by Junjie Mao 16 hours ago
Paolo Bonzini <pbonzini@redhat.com> writes:

> On Tue, Oct 22, 2024 at 4:12 AM Junjie Mao <junjie.mao@hotmail.com> wrote:
>> > +    ($type:ident => $body:block) => {
>> > +        const _: () = {
>> > +            #[used]
>> > +            #[cfg_attr(
>> > +                not(any(target_vendor = "apple", target_os = "windows")),
>> > +                link_section = ".init_array"
>> > +            )]
>> > +            #[cfg_attr(target_vendor = "apple", link_section = "__DATA,__mod_init_func")]
>> > +            #[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
>> > +            pub static LOAD_MODULE: extern "C" fn() = {
>> > +                extern "C" fn init_fn() {
>>
>> init_fn() should be unsafe fn according to the signature of
>> register_module_init.
>
> I think it *can* be unsafe (which bindgen does by default). It's
> always okay to pass a non-unsafe function as unsafe function pointer:
>
> fn f() {
>     println!("abc");
> }
>
> fn g(pf: unsafe fn()) {
>     unsafe {
>         pf();
>     }
> }
>
> fn main() {
>     g(f);
> }
>
>> Being unsafe fn also makes sense here because it
>> is the caller, not init_fn() itself, that is responsible for the safety of
>> the unsafe code in the body.
>
> Isn't it the opposite? Since the caller of module_init! is responsible
> for the safety, init_fn() itself can be safe.

My understanding is that:

  1. init_fn() is called by QEMU QOM subsystem with certain timing
     (e.g., called after main()) and concurrency (e.g., all init_fn()
     are called sequentially) preconditions.

  2. The caller of module_init! is responsible for justifying the safety
     of their $body under the preconditions established in #1.

"unsafe fn" in Rust is designed to mark functions with safety
preconditions so that the compiler can raise an error if the caller
forgets that it has those preconditions to uphold [1].

Under that interpretation, a safe "fn init_fn()" reads that init_fn() is
safe to call without the preconditions mentioned in #1. That is rarely
the case to me.

Using "unsafe" to mark the responsibility of device backends in #2 looks
like repurposing the keyword. That may make more sense in this specific
context because:

  1. the callers of init_fn() are in C, so Rust compiler cannot really
     check them,

  2. the caller will by design upholds the safety preconditions
     regardless of whether a specific callback needs it or not, and

  3. without unsafe_op_in_unsafe_fn an unsafe fn hides unsafe operation
     in its body from the compiler.

If that's what we prefer, I would suggest add the considerations above
to the code as comments, so that future readers are not confused by the
usage of "unsafe" here.

[1] https://rust-lang.github.io/rfcs/2585-unsafe-block-in-unsafe-fn.html

> With unsafe_op_in_unsafe_fn it's not a big deal; but without it, an
> unsafe init_fn would hide what is safe and what is not in the argument
> of module_init!.
>
> It's also relevant in this respect that init_fn is called *after*
> main(), only ctor_fn() is called before main.
>
> Thanks,
>
> Paolo

--
Best Regards
Junjie Mao
Re: [PATCH v2 08/13] rust: cleanup module_init!, use it from #[derive(Object)]
Posted by Kevin Wolf an hour ago
Am 22.10.2024 um 08:00 hat Junjie Mao geschrieben:
> 
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
> > On Tue, Oct 22, 2024 at 4:12 AM Junjie Mao <junjie.mao@hotmail.com> wrote:
> >> > +    ($type:ident => $body:block) => {
> >> > +        const _: () = {
> >> > +            #[used]
> >> > +            #[cfg_attr(
> >> > +                not(any(target_vendor = "apple", target_os = "windows")),
> >> > +                link_section = ".init_array"
> >> > +            )]
> >> > +            #[cfg_attr(target_vendor = "apple", link_section = "__DATA,__mod_init_func")]
> >> > +            #[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
> >> > +            pub static LOAD_MODULE: extern "C" fn() = {
> >> > +                extern "C" fn init_fn() {
> >>
> >> init_fn() should be unsafe fn according to the signature of
> >> register_module_init.
> >
> > I think it *can* be unsafe (which bindgen does by default). It's
> > always okay to pass a non-unsafe function as unsafe function pointer:
> >
> > fn f() {
> >     println!("abc");
> > }
> >
> > fn g(pf: unsafe fn()) {
> >     unsafe {
> >         pf();
> >     }
> > }
> >
> > fn main() {
> >     g(f);
> > }
> >
> >> Being unsafe fn also makes sense here because it
> >> is the caller, not init_fn() itself, that is responsible for the safety of
> >> the unsafe code in the body.
> >
> > Isn't it the opposite? Since the caller of module_init! is responsible
> > for the safety, init_fn() itself can be safe.
> 
> My understanding is that:
> 
>   1. init_fn() is called by QEMU QOM subsystem with certain timing
>      (e.g., called after main()) and concurrency (e.g., all init_fn()
>      are called sequentially) preconditions.

Though these conditions are not a matter of safety, but of correctness.

>   2. The caller of module_init! is responsible for justifying the safety
>      of their $body under the preconditions established in #1.
> 
> "unsafe fn" in Rust is designed to mark functions with safety
> preconditions so that the compiler can raise an error if the caller
> forgets that it has those preconditions to uphold [1].

I don't think we expect to have preconditions here that are required for
safety (in the sense with which the term is used in Rust).

But safe code is not automatically correct.

If you added "unsafe" for every function that requires some
preconditions to be met so that it functions correctly (instead of only
so that it doesn't have undefined behaviour on the language level), then
you would annotate almost every function as "unsafe".

I think the rule of thumb for marking a function unsafe is when you have
unsafe code inside of it whose safety (not correctness!) depends on a
condition that the caller must reason about because you can't guarantee
it in the function itself.

This macro should probably only be used with code that can guarantee the
safety of its unsafe blocks in itself. The C side of constructors can't
make many guarantees anyway, and there is nobody who would reason about
them.

Kevin


Re: [PATCH v2 08/13] rust: cleanup module_init!, use it from #[derive(Object)]
Posted by Paolo Bonzini 15 hours ago
On Tue, Oct 22, 2024 at 9:12 AM Junjie Mao <junjie.mao@hotmail.com> wrote:
> My understanding is that:
>
>   1. init_fn() is called by QEMU QOM subsystem with certain timing
>      (e.g., called after main()) and concurrency (e.g., all init_fn()
>      are called sequentially) preconditions.
>
>   2. The caller of module_init! is responsible for justifying the safety
>      of their $body under the preconditions established in #1.
>
> "unsafe fn" in Rust is designed to mark functions with safety
> preconditions so that the compiler can raise an error if the caller
> forgets that it has those preconditions to uphold [1].
>
> Under that interpretation, a safe "fn init_fn()" reads that init_fn() is
> safe to call without the preconditions mentioned in #1. That is rarely
> the case to me.
>
> Using "unsafe" to mark the responsibility of device backends in #2 looks
> like repurposing the keyword. That may make more sense in this specific
> context because:
>
>   1. the callers of init_fn() are in C, so Rust compiler cannot really
>      check them,
>
>   2. the caller will by design upholds the safety preconditions
>      regardless of whether a specific callback needs it or not, and

Or at least will try.

>   3. without unsafe_op_in_unsafe_fn an unsafe fn hides unsafe operation
>      in its body from the compiler.

4. In this specific case, init_fn is only used from ctor_fn and even
there only as a function pointer; it is not visible from outside. So
the "unsafe" marker's only purpose is documentation (and in fact, as
you point out in (3), it would even be harmful without
unsafe_op_in_unsafe_fn).

> If that's what we prefer, I would suggest add the considerations above
> to the code as comments, so that future readers are not confused by the
> usage of "unsafe" here.

Yes, I agree that since we're talking about a macro (not a function
which can itself be annotated with "unsafe") that lies at the C<->Rust
boundary, this is mostly a documentation issue.

In general the documentation of qemu-api and qemu-api-macros leaves
something to be desired. This is okay, but it is also the kind of
technical debt that we need to look at as soon as we can actually get
the thing to compile. :)

I'll add it shortly to https://wiki.qemu.org/Features/Rust.

Paolo