[PATCH] common: guest_physmap_add_page()'s return value needs checking

Jan Beulich posted 1 patch 2 years, 7 months ago
Failed in applying to current master (apply log)
[PATCH] common: guest_physmap_add_page()'s return value needs checking
Posted by Jan Beulich 2 years, 7 months ago
The function may fail; it is not correct to indicate "success" in this
case up the call stack. Mark the function must-check to prove all
cases have been caught (and no new ones will get introduced).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
In the grant-transfer case it is not really clear to me whether we can
stick to setting GTF_transfer_completed in the error case. Since a guest
may spin-wait for the flag to become set, simply not setting the flag is
not an option either. I was wondering whether we may want to slightly
alter (extend) the ABI and allow for a GTF_transfer_committed ->
GTF_transfer_completed transition (i.e. clearing GTF_transfer_committed
at the same time as setting GTF_transfer_completed).

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2394,7 +2394,7 @@ gnttab_transfer(
         {
             grant_entry_v1_t *sha = &shared_entry_v1(e->grant_table, gop.ref);
 
-            guest_physmap_add_page(e, _gfn(sha->frame), mfn, 0);
+            rc = guest_physmap_add_page(e, _gfn(sha->frame), mfn, 0);
             if ( !paging_mode_translate(e) )
                 sha->frame = mfn_x(mfn);
         }
@@ -2402,7 +2402,7 @@ gnttab_transfer(
         {
             grant_entry_v2_t *sha = &shared_entry_v2(e->grant_table, gop.ref);
 
-            guest_physmap_add_page(e, _gfn(sha->full_page.frame), mfn, 0);
+            rc = guest_physmap_add_page(e, _gfn(sha->full_page.frame), mfn, 0);
             if ( !paging_mode_translate(e) )
                 sha->full_page.frame = mfn_x(mfn);
         }
@@ -2415,7 +2415,7 @@ gnttab_transfer(
 
         rcu_unlock_domain(e);
 
-        gop.status = GNTST_okay;
+        gop.status = rc ? GNTST_general_error : GNTST_okay;
 
     copyback:
         if ( unlikely(__copy_field_to_guest(uop, &gop, status)) )
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -268,7 +268,8 @@ static void populate_physmap(struct memo
                 mfn = page_to_mfn(page);
             }
 
-            guest_physmap_add_page(d, _gfn(gpfn), mfn, a->extent_order);
+            if ( guest_physmap_add_page(d, _gfn(gpfn), mfn, a->extent_order) )
+                goto out;
 
             if ( !paging_mode_translate(d) &&
                  /* Inform the domain of the new page's machine address. */
@@ -765,8 +766,8 @@ static long memory_exchange(XEN_GUEST_HA
             }
 
             mfn = page_to_mfn(page);
-            guest_physmap_add_page(d, _gfn(gpfn), mfn,
-                                   exch.out.extent_order);
+            rc = guest_physmap_add_page(d, _gfn(gpfn), mfn,
+                                        exch.out.extent_order) ?: rc;
 
             if ( !paging_mode_translate(d) &&
                  __copy_mfn_to_guest_offset(exch.out.extent_start,
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -307,10 +307,9 @@ int guest_physmap_add_entry(struct domai
                             p2m_type_t t);
 
 /* Untyped version for RAM only, for compatibility */
-static inline int guest_physmap_add_page(struct domain *d,
-                                         gfn_t gfn,
-                                         mfn_t mfn,
-                                         unsigned int page_order)
+static inline int __must_check
+guest_physmap_add_page(struct domain *d, gfn_t gfn, mfn_t mfn,
+                       unsigned int page_order)
 {
     return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
 }
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -583,8 +583,8 @@ int guest_physmap_add_entry(struct domain *d, gfn_t gfn,
                             p2m_type_t t);
 
 /* Untyped version for RAM only, for compatibility and PV. */
-int guest_physmap_add_page(struct domain *d, gfn_t gfn, mfn_t mfn,
-                           unsigned int page_order);
+int __must_check guest_physmap_add_page(struct domain *d, gfn_t gfn, mfn_t mfn,
+                                        unsigned int page_order);
 
 /* Set a p2m range as populate-on-demand */
 int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,


Re: [PATCH] common: guest_physmap_add_page()'s return value needs checking
Posted by Andrew Cooper 2 years, 7 months ago
On 01/09/2021 17:06, Jan Beulich wrote:
> The function may fail; it is not correct to indicate "success" in this
> case up the call stack. Mark the function must-check to prove all
> cases have been caught (and no new ones will get introduced).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> In the grant-transfer case it is not really clear to me whether we can
> stick to setting GTF_transfer_completed in the error case. Since a guest
> may spin-wait for the flag to become set, simply not setting the flag is
> not an option either. I was wondering whether we may want to slightly
> alter (extend) the ABI and allow for a GTF_transfer_committed ->
> GTF_transfer_completed transition (i.e. clearing GTF_transfer_committed
> at the same time as setting GTF_transfer_completed).

Considering there are no production users of gnttab_transfer(), we can
do what we want.  It was introduced for (IIRC) netlink2 and never got
into production, and then we clobbered it almost entirely in an XSA
several years ago by restricting steal_page() to PV guests only.

As a consequence, we can do anything which seems sensible, and does not
necessarily need to be bound by a guest spinning on the bit.

The concept of gnttab_transfer() alone is crazy from an in-guest memory
management point of view.  We could alternatively save our future selves
more trouble by just Kconfig'ing it out now, deleting it in several
releases time, and fogetting about the problem as nothing will break in
practice.

~Andrew


Re: [PATCH] common: guest_physmap_add_page()'s return value needs checking
Posted by Jan Beulich 2 years, 7 months ago
On 01.09.2021 22:02, Andrew Cooper wrote:
> On 01/09/2021 17:06, Jan Beulich wrote:
>> The function may fail; it is not correct to indicate "success" in this
>> case up the call stack. Mark the function must-check to prove all
>> cases have been caught (and no new ones will get introduced).
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> In the grant-transfer case it is not really clear to me whether we can
>> stick to setting GTF_transfer_completed in the error case. Since a guest
>> may spin-wait for the flag to become set, simply not setting the flag is
>> not an option either. I was wondering whether we may want to slightly
>> alter (extend) the ABI and allow for a GTF_transfer_committed ->
>> GTF_transfer_completed transition (i.e. clearing GTF_transfer_committed
>> at the same time as setting GTF_transfer_completed).
> 
> Considering there are no production users of gnttab_transfer(), we can
> do what we want.  It was introduced for (IIRC) netlink2 and never got
> into production, and then we clobbered it almost entirely in an XSA
> several years ago by restricting steal_page() to PV guests only.
> 
> As a consequence, we can do anything which seems sensible, and does not
> necessarily need to be bound by a guest spinning on the bit.

Is this a "yes, let's go that route" then? Or rather leaving it to me,
i.e. translating "we can do anything which seems sensible" to "feel free
to do anything which seems sensible"? Which might as well be what is
there right now, and hence there could be the implied question of
whether your reply could be translated to an ack.

> The concept of gnttab_transfer() alone is crazy from an in-guest memory
> management point of view.  We could alternatively save our future selves
> more trouble by just Kconfig'ing it out now, deleting it in several
> releases time, and fogetting about the problem as nothing will break in
> practice.

I might ack such an initial patch. I might even consider making one
myself as long as it's agreed that the option will need to default to
y. I might also ack such a subsequent patch. But I would not want to
be the one to propose a patch removing functionality. I think I did say
more than once in the past that I don't think we can validly remove
anything from the public interface, as we will never be able to _prove_
there's no user anywhere. An exception might only be in cases where we
can prove certain functionality could never have been used successfully
for its intended or any other purpose. (For example, recently I've
[again] been considering to fully disable XENMEM_increase_reservation
for translated guests, rather than just leaving it useless by not
reporting back the allocated MFNs.)

Jan


Ping: [PATCH] common: guest_physmap_add_page()'s return value needs checking
Posted by Jan Beulich 2 years, 7 months ago
On 01.09.2021 18:06, Jan Beulich wrote:
> The function may fail; it is not correct to indicate "success" in this
> case up the call stack. Mark the function must-check to prove all
> cases have been caught (and no new ones will get introduced).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> In the grant-transfer case it is not really clear to me whether we can
> stick to setting GTF_transfer_completed in the error case. Since a guest
> may spin-wait for the flag to become set, simply not setting the flag is
> not an option either. I was wondering whether we may want to slightly
> alter (extend) the ABI and allow for a GTF_transfer_committed ->
> GTF_transfer_completed transition (i.e. clearing GTF_transfer_committed
> at the same time as setting GTF_transfer_completed).

While I did get a reply from Andrew on this additional remark, the code
change itself has remained un-acked and un-responded to, despite imo
being pretty straightforward. May I please as for an ack or otherwise
an indication of what needs to be changed?

Jan

> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -2394,7 +2394,7 @@ gnttab_transfer(
>          {
>              grant_entry_v1_t *sha = &shared_entry_v1(e->grant_table, gop.ref);
>  
> -            guest_physmap_add_page(e, _gfn(sha->frame), mfn, 0);
> +            rc = guest_physmap_add_page(e, _gfn(sha->frame), mfn, 0);
>              if ( !paging_mode_translate(e) )
>                  sha->frame = mfn_x(mfn);
>          }
> @@ -2402,7 +2402,7 @@ gnttab_transfer(
>          {
>              grant_entry_v2_t *sha = &shared_entry_v2(e->grant_table, gop.ref);
>  
> -            guest_physmap_add_page(e, _gfn(sha->full_page.frame), mfn, 0);
> +            rc = guest_physmap_add_page(e, _gfn(sha->full_page.frame), mfn, 0);
>              if ( !paging_mode_translate(e) )
>                  sha->full_page.frame = mfn_x(mfn);
>          }
> @@ -2415,7 +2415,7 @@ gnttab_transfer(
>  
>          rcu_unlock_domain(e);
>  
> -        gop.status = GNTST_okay;
> +        gop.status = rc ? GNTST_general_error : GNTST_okay;
>  
>      copyback:
>          if ( unlikely(__copy_field_to_guest(uop, &gop, status)) )
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -268,7 +268,8 @@ static void populate_physmap(struct memo
>                  mfn = page_to_mfn(page);
>              }
>  
> -            guest_physmap_add_page(d, _gfn(gpfn), mfn, a->extent_order);
> +            if ( guest_physmap_add_page(d, _gfn(gpfn), mfn, a->extent_order) )
> +                goto out;
>  
>              if ( !paging_mode_translate(d) &&
>                   /* Inform the domain of the new page's machine address. */
> @@ -765,8 +766,8 @@ static long memory_exchange(XEN_GUEST_HA
>              }
>  
>              mfn = page_to_mfn(page);
> -            guest_physmap_add_page(d, _gfn(gpfn), mfn,
> -                                   exch.out.extent_order);
> +            rc = guest_physmap_add_page(d, _gfn(gpfn), mfn,
> +                                        exch.out.extent_order) ?: rc;
>  
>              if ( !paging_mode_translate(d) &&
>                   __copy_mfn_to_guest_offset(exch.out.extent_start,
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -307,10 +307,9 @@ int guest_physmap_add_entry(struct domai
>                              p2m_type_t t);
>  
>  /* Untyped version for RAM only, for compatibility */
> -static inline int guest_physmap_add_page(struct domain *d,
> -                                         gfn_t gfn,
> -                                         mfn_t mfn,
> -                                         unsigned int page_order)
> +static inline int __must_check
> +guest_physmap_add_page(struct domain *d, gfn_t gfn, mfn_t mfn,
> +                       unsigned int page_order)
>  {
>      return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
>  }
> --- a/xen/include/asm-x86/p2m.h
> +++ b/xen/include/asm-x86/p2m.h
> @@ -583,8 +583,8 @@ int guest_physmap_add_entry(struct domain *d, gfn_t gfn,
>                              p2m_type_t t);
>  
>  /* Untyped version for RAM only, for compatibility and PV. */
> -int guest_physmap_add_page(struct domain *d, gfn_t gfn, mfn_t mfn,
> -                           unsigned int page_order);
> +int __must_check guest_physmap_add_page(struct domain *d, gfn_t gfn, mfn_t mfn,
> +                                        unsigned int page_order);
>  
>  /* Set a p2m range as populate-on-demand */
>  int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
> 
>