It's at least odd to check counters which aren't going to be
incremented. And it's also not helpful to use open-coded literal numbers
in these checks.
Calculate the increment values first and derive from them the mask to
use in the checks.
Also move the pin count checks ahead of the calculation of the status
(and for copy also sha2) pointers: They're not needed in the failure
cases, and this way the compiler may also have an easier time keeping
the variables at least transiently in registers for the subsequent uses.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -323,22 +323,25 @@ shared_entry_header(struct grant_table *
/* Active grant entry - used for shadowing GTF_permit_access grants. */
struct active_grant_entry {
uint32_t pin; /* Reference count information: */
+ /* Width of the individual counter fields. */
+#define GNTPIN_cntr_width 8
+#define GNTPIN_cntr_mask ((1U << GNTPIN_cntr_width) - 1)
/* Count of writable host-CPU mappings. */
#define GNTPIN_hstw_shift 0
#define GNTPIN_hstw_inc (1U << GNTPIN_hstw_shift)
-#define GNTPIN_hstw_mask (0xFFU << GNTPIN_hstw_shift)
+#define GNTPIN_hstw_mask (GNTPIN_cntr_mask << GNTPIN_hstw_shift)
/* Count of read-only host-CPU mappings. */
-#define GNTPIN_hstr_shift 8
+#define GNTPIN_hstr_shift (GNTPIN_hstw_shift + GNTPIN_cntr_width)
#define GNTPIN_hstr_inc (1U << GNTPIN_hstr_shift)
-#define GNTPIN_hstr_mask (0xFFU << GNTPIN_hstr_shift)
+#define GNTPIN_hstr_mask (GNTPIN_cntr_mask << GNTPIN_hstr_shift)
/* Count of writable device-bus mappings. */
-#define GNTPIN_devw_shift 16
+#define GNTPIN_devw_shift (GNTPIN_hstr_shift + GNTPIN_cntr_width)
#define GNTPIN_devw_inc (1U << GNTPIN_devw_shift)
-#define GNTPIN_devw_mask (0xFFU << GNTPIN_devw_shift)
+#define GNTPIN_devw_mask (GNTPIN_cntr_mask << GNTPIN_devw_shift)
/* Count of read-only device-bus mappings. */
-#define GNTPIN_devr_shift 24
+#define GNTPIN_devr_shift (GNTPIN_devw_shift + GNTPIN_cntr_width)
#define GNTPIN_devr_inc (1U << GNTPIN_devr_shift)
-#define GNTPIN_devr_mask (0xFFU << GNTPIN_devr_shift)
+#define GNTPIN_devr_mask (GNTPIN_cntr_mask << GNTPIN_devr_shift)
domid_t domid; /* Domain being granted access. */
unsigned int start:15; /* For sub-page grants, the start offset
@@ -988,7 +991,8 @@ map_grant_ref(
mfn_t mfn;
struct page_info *pg = NULL;
int rc = GNTST_okay;
- unsigned int cache_flags, clear_flags = 0, refcnt = 0, typecnt = 0;
+ unsigned int cache_flags, clear_flags = 0, refcnt = 0, typecnt = 0,
+ pin_incr = 0;
bool host_map_created = false;
struct active_grant_entry *act = NULL;
struct grant_mapping *mt;
@@ -998,7 +1002,14 @@ map_grant_ref(
ld = current->domain;
- if ( unlikely((op->flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
+ if ( op->flags & GNTMAP_device_map )
+ pin_incr += (op->flags & GNTMAP_readonly) ? GNTPIN_devr_inc
+ : GNTPIN_devw_inc;
+ if ( op->flags & GNTMAP_host_map )
+ pin_incr += (op->flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
+ : GNTPIN_hstw_inc;
+
+ if ( unlikely(!pin_incr) )
{
gdprintk(XENLOG_INFO, "Bad flags in grant map op: %x\n", op->flags);
op->status = GNTST_bad_gntref;
@@ -1052,19 +1063,19 @@ map_grant_ref(
shah = shared_entry_header(rgt, ref);
act = active_entry_acquire(rgt, ref);
- /* Make sure we do not access memory speculatively */
- status = evaluate_nospec(rgt->gt_version == 1) ? &shah->flags
- : &status_entry(rgt, ref);
-
/* If already pinned, check the active domid and avoid refcnt overflow. */
if ( act->pin &&
((act->domid != ld->domain_id) ||
- (act->pin & 0x80808080U) != 0 ||
+ (act->pin & (pin_incr << (GNTPIN_cntr_width - 1))) ||
(act->is_sub_page)) )
PIN_FAIL(act_release_out, GNTST_general_error,
"Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n",
act->domid, ld->domain_id, act->pin, act->is_sub_page);
+ /* Make sure we do not access memory speculatively */
+ status = evaluate_nospec(rgt->gt_version == 1) ? &shah->flags
+ : &status_entry(rgt, ref);
+
if ( !act->pin ||
(!(op->flags & GNTMAP_readonly) &&
!(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) )
@@ -1095,12 +1106,7 @@ map_grant_ref(
}
}
- if ( op->flags & GNTMAP_device_map )
- act->pin += (op->flags & GNTMAP_readonly) ?
- GNTPIN_devr_inc : GNTPIN_devw_inc;
- if ( op->flags & GNTMAP_host_map )
- act->pin += (op->flags & GNTMAP_readonly) ?
- GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+ act->pin += pin_incr;
mfn = act->mfn;
@@ -1275,13 +1281,7 @@ map_grant_ref(
grant_read_lock(rgt);
act = active_entry_acquire(rgt, op->ref);
-
- if ( op->flags & GNTMAP_device_map )
- act->pin -= (op->flags & GNTMAP_readonly) ?
- GNTPIN_devr_inc : GNTPIN_devw_inc;
- if ( op->flags & GNTMAP_host_map )
- act->pin -= (op->flags & GNTMAP_readonly) ?
- GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+ act->pin -= pin_incr;
unlock_out_clear:
if ( !(op->flags & GNTMAP_readonly) &&
@@ -2516,6 +2516,7 @@ acquire_grant_for_copy(
uint16_t trans_length;
bool is_sub_page;
s16 rc = GNTST_okay;
+ unsigned int pin_incr = readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc;
unsigned int clear_flags = 0;
*page = NULL;
@@ -2530,6 +2531,14 @@ acquire_grant_for_copy(
shah = shared_entry_header(rgt, gref);
act = active_entry_acquire(rgt, gref);
+ /* If already pinned, check the active domid and avoid refcnt overflow. */
+ if ( act->pin &&
+ ((act->domid != ldom) ||
+ (act->pin & (pin_incr << (GNTPIN_cntr_width - 1)))) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Bad domain (%d != %d), or risk of counter overflow %08x\n",
+ act->domid, ldom, act->pin);
+
if ( evaluate_nospec(rgt->gt_version == 1) )
{
sha2 = NULL;
@@ -2541,12 +2550,6 @@ acquire_grant_for_copy(
status = &status_entry(rgt, gref);
}
- /* If already pinned, check the active domid and avoid refcnt overflow. */
- if ( act->pin && ((act->domid != ldom) || (act->pin & 0x80808080U) != 0) )
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Bad domain (%d != %d), or risk of counter overflow %08x\n",
- act->domid, ldom, act->pin);
-
old_pin = act->pin;
if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive )
{
@@ -2728,7 +2731,7 @@ acquire_grant_for_copy(
}
}
- act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+ act->pin += pin_incr;
*page_off = act->start;
*length = act->length;
On 14/01/2021 15:23, Jan Beulich wrote:
> It's at least odd to check counters which aren't going to be
> incremented. And it's also not helpful to use open-coded literal numbers
> in these checks.
>
> Calculate the increment values first and derive from them the mask to
> use in the checks.
>
> Also move the pin count checks ahead of the calculation of the status
> (and for copy also sha2) pointers: They're not needed in the failure
> cases, and this way the compiler may also have an easier time keeping
> the variables at least transiently in registers for the subsequent uses.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -323,22 +323,25 @@ shared_entry_header(struct grant_table *
> /* Active grant entry - used for shadowing GTF_permit_access grants. */
> struct active_grant_entry {
> uint32_t pin; /* Reference count information: */
> + /* Width of the individual counter fields. */
> +#define GNTPIN_cntr_width 8
> +#define GNTPIN_cntr_mask ((1U << GNTPIN_cntr_width) - 1)
> /* Count of writable host-CPU mappings. */
> #define GNTPIN_hstw_shift 0
> #define GNTPIN_hstw_inc (1U << GNTPIN_hstw_shift)
> -#define GNTPIN_hstw_mask (0xFFU << GNTPIN_hstw_shift)
> +#define GNTPIN_hstw_mask (GNTPIN_cntr_mask << GNTPIN_hstw_shift)
> /* Count of read-only host-CPU mappings. */
> -#define GNTPIN_hstr_shift 8
> +#define GNTPIN_hstr_shift (GNTPIN_hstw_shift + GNTPIN_cntr_width)
While this patch is by-and-large an improvement, it unfortunately
further hides how the pin field works, which is already clear-as-mud.
I'd recommend replacing the "Reference count information:" comment with:
/*
* 4x byte-wide reference counts, for {host,device}{read,write}
* mappings, implemented as a single 32bit presumably to optimise
* checking for any reference.
*/
uint32_t pin;
I still can't make up my mind as to whether this is a sensible
optimisation. It is borderline obfuscated code, due to having a totally
undocumented and weird data arrangement.
> @@ -1052,19 +1063,19 @@ map_grant_ref(
> shah = shared_entry_header(rgt, ref);
> act = active_entry_acquire(rgt, ref);
>
> - /* Make sure we do not access memory speculatively */
> - status = evaluate_nospec(rgt->gt_version == 1) ? &shah->flags
> - : &status_entry(rgt, ref);
> -
> /* If already pinned, check the active domid and avoid refcnt overflow. */
> if ( act->pin &&
> ((act->domid != ld->domain_id) ||
> - (act->pin & 0x80808080U) != 0 ||
> + (act->pin & (pin_incr << (GNTPIN_cntr_width - 1))) ||
This, I'm afraid, is not an improvement. What we want is:
#define GNTPIN_overflow_mask 0x80808080U
The reason for checking all at once is defence in depth (not strictly
necessary, but also not a problem), and in this specific example, using
a constant results in better code because pin_incr doesn't need
unspilling from stack and manipulated.
If you're happy with both of these suggestions, then Reviewed-by: Andrew
Cooper <andrew.cooper3@citrix.com> to avoid a repost.
~Andrew
On 15.01.2021 14:09, Andrew Cooper wrote:
> On 14/01/2021 15:23, Jan Beulich wrote:
>> --- a/xen/common/grant_table.c
>> +++ b/xen/common/grant_table.c
>> @@ -323,22 +323,25 @@ shared_entry_header(struct grant_table *
>> /* Active grant entry - used for shadowing GTF_permit_access grants. */
>> struct active_grant_entry {
>> uint32_t pin; /* Reference count information: */
>> + /* Width of the individual counter fields. */
>> +#define GNTPIN_cntr_width 8
>> +#define GNTPIN_cntr_mask ((1U << GNTPIN_cntr_width) - 1)
>> /* Count of writable host-CPU mappings. */
>> #define GNTPIN_hstw_shift 0
>> #define GNTPIN_hstw_inc (1U << GNTPIN_hstw_shift)
>> -#define GNTPIN_hstw_mask (0xFFU << GNTPIN_hstw_shift)
>> +#define GNTPIN_hstw_mask (GNTPIN_cntr_mask << GNTPIN_hstw_shift)
>> /* Count of read-only host-CPU mappings. */
>> -#define GNTPIN_hstr_shift 8
>> +#define GNTPIN_hstr_shift (GNTPIN_hstw_shift + GNTPIN_cntr_width)
>
> While this patch is by-and-large an improvement, it unfortunately
> further hides how the pin field works, which is already clear-as-mud.
>
> I'd recommend replacing the "Reference count information:" comment with:
>
> /*
> * 4x byte-wide reference counts, for {host,device}{read,write}
> * mappings, implemented as a single 32bit presumably to optimise
> * checking for any reference.
> */
> uint32_t pin;
Sure, added.
>> @@ -1052,19 +1063,19 @@ map_grant_ref(
>> shah = shared_entry_header(rgt, ref);
>> act = active_entry_acquire(rgt, ref);
>>
>> - /* Make sure we do not access memory speculatively */
>> - status = evaluate_nospec(rgt->gt_version == 1) ? &shah->flags
>> - : &status_entry(rgt, ref);
>> -
>> /* If already pinned, check the active domid and avoid refcnt overflow. */
>> if ( act->pin &&
>> ((act->domid != ld->domain_id) ||
>> - (act->pin & 0x80808080U) != 0 ||
>> + (act->pin & (pin_incr << (GNTPIN_cntr_width - 1))) ||
>
> This, I'm afraid, is not an improvement. What we want is:
>
> #define GNTPIN_overflow_mask 0x80808080U
>
> The reason for checking all at once is defence in depth (not strictly
> necessary, but also not a problem),
How is this not a problem? There is absolutely no reason to
reject a request just because some unrelated field may be
about to overflow. In fact I now think that I didn't even
leverage the full potential - the "act->pin != old_pin" check
could also be relaxed this way, I think. Just that it sits on
a path we probably don't really care about very much.
> and in this specific example, using
> a constant results in better code because pin_incr doesn't need
> unspilling from stack and manipulated.
That's, I think, an acceptable price to pay for getting the
checking correct.
> If you're happy with both of these suggestions, then Reviewed-by: Andrew
> Cooper <andrew.cooper3@citrix.com> to avoid a repost.
Thanks, but as per above - not yet. Plus if I made this 2nd
suggested change, the patch would change significantly (the
new local variable would then become pointless in at least
acquire_grant_for_copy()), which I think would better involve
re-posting then.
Jan
On 15/01/2021 13:26, Jan Beulich wrote: > On 15.01.2021 14:09, Andrew Cooper wrote: >> On 14/01/2021 15:23, Jan Beulich wrote: >>> @@ -1052,19 +1063,19 @@ map_grant_ref( >>> shah = shared_entry_header(rgt, ref); >>> act = active_entry_acquire(rgt, ref); >>> >>> - /* Make sure we do not access memory speculatively */ >>> - status = evaluate_nospec(rgt->gt_version == 1) ? &shah->flags >>> - : &status_entry(rgt, ref); >>> - >>> /* If already pinned, check the active domid and avoid refcnt overflow. */ >>> if ( act->pin && >>> ((act->domid != ld->domain_id) || >>> - (act->pin & 0x80808080U) != 0 || >>> + (act->pin & (pin_incr << (GNTPIN_cntr_width - 1))) || >> This, I'm afraid, is not an improvement. What we want is: >> >> #define GNTPIN_overflow_mask 0x80808080U >> >> The reason for checking all at once is defence in depth (not strictly >> necessary, but also not a problem), > How is this not a problem? There is absolutely no reason to > reject a request just because some unrelated field may be > about to overflow. In fact I now think that I didn't even > leverage the full potential - the "act->pin != old_pin" check > could also be relaxed this way, I think. Just that it sits on > a path we probably don't really care about very much. Hmm - I see your point. I'd missed the fact that a previous operation could leave this timebomb waiting for us. (Probably wants a note to this effect in the commit message). However we go about fixing it, "pin_incr << (GNTPIN_cntr_width - 1)" is too obscure IMO. Perhaps all we need is a #define GNTPIN_overflow_mask(x) ((x) << (GNTPIN_cntr_width - 1)) but it does occur to me that this logic only works to being with when pin_incr is strictly 1 in the relevant bytes. ~Andrew
On 15.01.2021 14:35, Andrew Cooper wrote:
> On 15/01/2021 13:26, Jan Beulich wrote:
>> On 15.01.2021 14:09, Andrew Cooper wrote:
>>> On 14/01/2021 15:23, Jan Beulich wrote:
>>>> @@ -1052,19 +1063,19 @@ map_grant_ref(
>>>> shah = shared_entry_header(rgt, ref);
>>>> act = active_entry_acquire(rgt, ref);
>>>>
>>>> - /* Make sure we do not access memory speculatively */
>>>> - status = evaluate_nospec(rgt->gt_version == 1) ? &shah->flags
>>>> - : &status_entry(rgt, ref);
>>>> -
>>>> /* If already pinned, check the active domid and avoid refcnt overflow. */
>>>> if ( act->pin &&
>>>> ((act->domid != ld->domain_id) ||
>>>> - (act->pin & 0x80808080U) != 0 ||
>>>> + (act->pin & (pin_incr << (GNTPIN_cntr_width - 1))) ||
>>> This, I'm afraid, is not an improvement. What we want is:
>>>
>>> #define GNTPIN_overflow_mask 0x80808080U
>>>
>>> The reason for checking all at once is defence in depth (not strictly
>>> necessary, but also not a problem),
>> How is this not a problem? There is absolutely no reason to
>> reject a request just because some unrelated field may be
>> about to overflow. In fact I now think that I didn't even
>> leverage the full potential - the "act->pin != old_pin" check
>> could also be relaxed this way, I think. Just that it sits on
>> a path we probably don't really care about very much.
>
> Hmm - I see your point. I'd missed the fact that a previous operation
> could leave this timebomb waiting for us. (Probably wants a note to
> this effect in the commit message).
I've added half a sentence.
> However we go about fixing it, "pin_incr << (GNTPIN_cntr_width - 1)" is
> too obscure IMO. Perhaps all we need is a
>
> #define GNTPIN_overflow_mask(x) ((x) << (GNTPIN_cntr_width - 1))
>
> but it does occur to me that this logic only works to being with when
> pin_incr is strictly 1 in the relevant bytes.
Perhaps
#define GNTPIN_overflow_mask(x) ({ \
ASSERT(!((x) & ~(GNTPIN_hstw_inc | GNTPIN_hstr_inc | \
GNTPIN_devw_inc | GNTPIN_devr_inc))); \
(x) << (GNTPIN_cntr_width - 1); \
})
? And maybe name the whole thing e.g. GNTPIN_incr2oflow_mask()
to clarify the input is an increment (which I personally take
to mean consisting of only values of 1, but I realize views
may vary)?
Jan
On 15/01/2021 14:21, Jan Beulich wrote:
> On 15.01.2021 14:35, Andrew Cooper wrote:
>> On 15/01/2021 13:26, Jan Beulich wrote:
>>> On 15.01.2021 14:09, Andrew Cooper wrote:
>>>> On 14/01/2021 15:23, Jan Beulich wrote:
>>>>> @@ -1052,19 +1063,19 @@ map_grant_ref(
>>>>> shah = shared_entry_header(rgt, ref);
>>>>> act = active_entry_acquire(rgt, ref);
>>>>>
>>>>> - /* Make sure we do not access memory speculatively */
>>>>> - status = evaluate_nospec(rgt->gt_version == 1) ? &shah->flags
>>>>> - : &status_entry(rgt, ref);
>>>>> -
>>>>> /* If already pinned, check the active domid and avoid refcnt overflow. */
>>>>> if ( act->pin &&
>>>>> ((act->domid != ld->domain_id) ||
>>>>> - (act->pin & 0x80808080U) != 0 ||
>>>>> + (act->pin & (pin_incr << (GNTPIN_cntr_width - 1))) ||
>>>> This, I'm afraid, is not an improvement. What we want is:
>>>>
>>>> #define GNTPIN_overflow_mask 0x80808080U
>>>>
>>>> The reason for checking all at once is defence in depth (not strictly
>>>> necessary, but also not a problem),
>>> How is this not a problem? There is absolutely no reason to
>>> reject a request just because some unrelated field may be
>>> about to overflow. In fact I now think that I didn't even
>>> leverage the full potential - the "act->pin != old_pin" check
>>> could also be relaxed this way, I think. Just that it sits on
>>> a path we probably don't really care about very much.
>> Hmm - I see your point. I'd missed the fact that a previous operation
>> could leave this timebomb waiting for us. (Probably wants a note to
>> this effect in the commit message).
> I've added half a sentence.
>
>> However we go about fixing it, "pin_incr << (GNTPIN_cntr_width - 1)" is
>> too obscure IMO. Perhaps all we need is a
>>
>> #define GNTPIN_overflow_mask(x) ((x) << (GNTPIN_cntr_width - 1))
>>
>> but it does occur to me that this logic only works to being with when
>> pin_incr is strictly 1 in the relevant bytes.
> Perhaps
>
> #define GNTPIN_overflow_mask(x) ({ \
> ASSERT(!((x) & ~(GNTPIN_hstw_inc | GNTPIN_hstr_inc | \
> GNTPIN_devw_inc | GNTPIN_devr_inc))); \
> (x) << (GNTPIN_cntr_width - 1); \
> })
>
> ? And maybe name the whole thing e.g. GNTPIN_incr2oflow_mask()
> to clarify the input is an increment (which I personally take
> to mean consisting of only values of 1, but I realize views
> may vary)?
Yeah - lets go with this. Its a massive improvement on the current code.
~Andrew
© 2016 - 2026 Red Hat, Inc.