Setting cacheability flags that are not ones specified by Xen is a bug
in the guest. By default, return -EINVAL if a guests attempts to do
this. The invalid-cacheability= Xen command-line flag allows the
administrator to allow such attempts or to produce
Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
---
Changes since v5:
- Make parameters static and __ro_after_init.
- Replace boolean parameter allow_invalid_cacheability with string
parameter invalid-cacheability.
- Move parameter definitions to near where they are used.
- Add documentation.
Changes since v4:
- Remove pointless BUILD_BUG_ON().
- Add comment explaining why an exception is being injected.
Changes since v3:
- Add Andrew Cooper’s Suggested-by
---
docs/misc/xen-command-line.pandoc | 11 ++++++
xen/arch/x86/mm.c | 60 ++++++++++++++++++++++++++++++-
2 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index 424b12cfb27d6ade2ec63eacb8afe5df82465451..0230a7bc17cbd4362a42ea64cea695f31f5e0f86 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1417,6 +1417,17 @@ detection of systems known to misbehave upon accesses to that port.
### idle_latency_factor (x86)
> `= <integer>`
+### invalid-cacheability (x86)
+> `= allow | deny | trap`
+
+> Default: `deny` in release builds, otherwise `trap`
+
+Specify what happens when a PV guest tries to use one of the reserved entries in
+the PAT. `deny` causes the attempt to be rejected with -EINVAL, `allow` allows
+the attempt, and `trap` causes a general protection fault to be raised.
+Currently, the reserved entries are marked as uncacheable in Xen's PAT, but this
+will change if new memory types are added, so guests must not rely on it.
+
### ioapic_ack (x86)
> `= old | new`
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 65ba0f58ed8c26ac0343528303851739981c03bd..bacfb776d688f68dcbf79d83723fff329b75fd18 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1324,6 +1324,37 @@ static int put_page_from_l4e(l4_pgentry_t l4e, mfn_t l4mfn, unsigned int flags)
return put_pt_page(l4e_get_page(l4e), mfn_to_page(l4mfn), flags);
}
+enum {
+ INVALID_CACHEABILITY_ALLOW,
+ INVALID_CACHEABILITY_DENY,
+ INVALID_CACHEABILITY_TRAP,
+};
+
+#ifdef NDEBUG
+#define INVALID_CACHEABILITY_DEFAULT INVALID_CACHEABILITY_DENY
+#else
+#define INVALID_CACHEABILITY_DEFAULT INVALID_CACHEABILITY_TRAP
+#endif
+
+static __ro_after_init uint8_t invalid_cacheability =
+ INVALID_CACHEABILITY_DEFAULT;
+
+static int __init cf_check set_invalid_cacheability(const char *str)
+{
+ if (strcmp("allow", str) == 0)
+ invalid_cacheability = INVALID_CACHEABILITY_ALLOW;
+ else if (strcmp("deny", str) == 0)
+ invalid_cacheability = INVALID_CACHEABILITY_DENY;
+ else if (strcmp("trap", str) == 0)
+ invalid_cacheability = INVALID_CACHEABILITY_TRAP;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+custom_param("invalid-cacheability", set_invalid_cacheability);
+
static int promote_l1_table(struct page_info *page)
{
struct domain *d = page_get_owner(page);
@@ -1343,7 +1374,34 @@ static int promote_l1_table(struct page_info *page)
}
else
{
- switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
+ l1_pgentry_t l1e = pl1e[i];
+
+ if ( invalid_cacheability != INVALID_CACHEABILITY_ALLOW )
+ {
+ switch ( l1e.l1 & PAGE_CACHE_ATTRS )
+ {
+ case _PAGE_WB:
+ case _PAGE_UC:
+ case _PAGE_UCM:
+ case _PAGE_WC:
+ case _PAGE_WT:
+ case _PAGE_WP:
+ break;
+ default:
+ /*
+ * If we get here, a PV guest tried to use one of the
+ * reserved values in Xen's PAT. This indicates a bug
+ * in the guest. If requested by the user, inject #GP
+ * to cause the guest to log a stack trace.
+ */
+ if ( invalid_cacheability == INVALID_CACHEABILITY_TRAP )
+ pv_inject_hw_exception(TRAP_gp_fault, 0);
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+
+ switch ( ret = get_page_from_l1e(l1e, d, d) )
{
default:
goto fail;
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
On 22/12/2022 10:31 pm, Demi Marie Obenour wrote:
> diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
> index 424b12cfb27d6ade2ec63eacb8afe5df82465451..0230a7bc17cbd4362a42ea64cea695f31f5e0f86 100644
> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -1417,6 +1417,17 @@ detection of systems known to misbehave upon accesses to that port.
> ### idle_latency_factor (x86)
> > `= <integer>`
>
> +### invalid-cacheability (x86)
> +> `= allow | deny | trap`
> +
> +> Default: `deny` in release builds, otherwise `trap`
> +
> +Specify what happens when a PV guest tries to use one of the reserved entries in
> +the PAT. `deny` causes the attempt to be rejected with -EINVAL, `allow` allows
> +the attempt, and `trap` causes a general protection fault to be raised.
> +Currently, the reserved entries are marked as uncacheable in Xen's PAT, but this
> +will change if new memory types are added, so guests must not rely on it.
> +
This wants to be scoped under "pv", and not a top-level option. Current
parsing is at the top of xen/arch/x86/pv/domain.c
How about `pv={no-}oob-pat`, and parse it into the opt_pv_oob_pat boolean ?
There really is no need to distinguish between deny and trap. IMO,
injecting #GP unilaterally is fine (to a guest expecting the hypercall
to succeed, -EINVAL vs #GP makes no difference, but #GP is far more
useful to a human trying to debug issues here), but I could be talked
into putting that behind a CONFIG_DEBUG if other feel strongly.
> @@ -1343,7 +1374,34 @@ static int promote_l1_table(struct page_info *page)
> }
> else
> {
> - switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
> + l1_pgentry_t l1e = pl1e[i];
> +
> + if ( invalid_cacheability != INVALID_CACHEABILITY_ALLOW )
> + {
> + switch ( l1e.l1 & PAGE_CACHE_ATTRS )
> + {
> + case _PAGE_WB:
> + case _PAGE_UC:
> + case _PAGE_UCM:
> + case _PAGE_WC:
> + case _PAGE_WT:
> + case _PAGE_WP:
> + break;
> + default:
> + /*
> + * If we get here, a PV guest tried to use one of the
> + * reserved values in Xen's PAT. This indicates a bug
> + * in the guest. If requested by the user, inject #GP
> + * to cause the guest to log a stack trace.
> + */
> + if ( invalid_cacheability == INVALID_CACHEABILITY_TRAP )
> + pv_inject_hw_exception(TRAP_gp_fault, 0);
> + ret = -EINVAL;
> + goto fail;
> + }
> + }
This will catch cases where the guest writes a full pagetable, then
promotes it, but won't catch cases where the guest modifies an
individual entry with mmu_update/etc.
The logic needs to be in get_page_from_l1e() to get applied uniformly to
all PTE modifications.
Right now, the l1_disallow_mask() check near the start hides the "can
you use a nonzero cacheattr" check. If I ever get around to cleaning up
my post-XSA-402 series, I have a load of modifications to this.
But this could be something like this:
if ( opt_pv_oob_pat && (l1f & PAGE_CACHE_ATTRS) > _PAGE_WP )
{
// #GP
return -EINVAL;
}
fairly early on.
It occurs to me that this check is only applicable when we're using the
Xen ABI APT, and seeing as we've talked about possibly making patch 5 a
Kconfig option, that may want accounting here. (Perhaps simply making
opt_pb_oob_pat be false in a !XEN_PAT build.)
~Andrew
On 05.01.2023 21:28, Andrew Cooper wrote:
> On 22/12/2022 10:31 pm, Demi Marie Obenour wrote:
>> diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
>> index 424b12cfb27d6ade2ec63eacb8afe5df82465451..0230a7bc17cbd4362a42ea64cea695f31f5e0f86 100644
>> --- a/docs/misc/xen-command-line.pandoc
>> +++ b/docs/misc/xen-command-line.pandoc
>> @@ -1417,6 +1417,17 @@ detection of systems known to misbehave upon accesses to that port.
>> ### idle_latency_factor (x86)
>> > `= <integer>`
>>
>> +### invalid-cacheability (x86)
>> +> `= allow | deny | trap`
>> +
>> +> Default: `deny` in release builds, otherwise `trap`
>> +
>> +Specify what happens when a PV guest tries to use one of the reserved entries in
>> +the PAT. `deny` causes the attempt to be rejected with -EINVAL, `allow` allows
>> +the attempt, and `trap` causes a general protection fault to be raised.
>> +Currently, the reserved entries are marked as uncacheable in Xen's PAT, but this
>> +will change if new memory types are added, so guests must not rely on it.
>> +
>
> This wants to be scoped under "pv", and not a top-level option. Current
> parsing is at the top of xen/arch/x86/pv/domain.c
>
> How about `pv={no-}oob-pat`, and parse it into the opt_pv_oob_pat boolean ?
>
> There really is no need to distinguish between deny and trap. IMO,
> injecting #GP unilaterally is fine (to a guest expecting the hypercall
> to succeed, -EINVAL vs #GP makes no difference, but #GP is far more
> useful to a human trying to debug issues here), but I could be talked
> into putting that behind a CONFIG_DEBUG if other feel strongly.
What is or is not useful to guests is guesswork. They might be "handling"
failure with BUG(), in which case they'd still see a backtrace. So to me,
as previously said, injecting #GP in the case here can only reasonably be
a debugging aid (and hence be dependent upon DEBUG).
Jan
On Thu, Jan 05, 2023 at 08:28:26PM +0000, Andrew Cooper wrote:
> On 22/12/2022 10:31 pm, Demi Marie Obenour wrote:
> > diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
> > index 424b12cfb27d6ade2ec63eacb8afe5df82465451..0230a7bc17cbd4362a42ea64cea695f31f5e0f86 100644
> > --- a/docs/misc/xen-command-line.pandoc
> > +++ b/docs/misc/xen-command-line.pandoc
> > @@ -1417,6 +1417,17 @@ detection of systems known to misbehave upon accesses to that port.
> > ### idle_latency_factor (x86)
> > > `= <integer>`
> >
> > +### invalid-cacheability (x86)
> > +> `= allow | deny | trap`
> > +
> > +> Default: `deny` in release builds, otherwise `trap`
> > +
> > +Specify what happens when a PV guest tries to use one of the reserved entries in
> > +the PAT. `deny` causes the attempt to be rejected with -EINVAL, `allow` allows
> > +the attempt, and `trap` causes a general protection fault to be raised.
> > +Currently, the reserved entries are marked as uncacheable in Xen's PAT, but this
> > +will change if new memory types are added, so guests must not rely on it.
> > +
>
> This wants to be scoped under "pv", and not a top-level option. Current
> parsing is at the top of xen/arch/x86/pv/domain.c
>
> How about `pv={no-}oob-pat`, and parse it into the opt_pv_oob_pat boolean ?
Works for me, though I will use ‘invalid’ instead of ‘oob’ as valid PAT
entries might not be contiguous.
> There really is no need to distinguish between deny and trap. IMO,
> injecting #GP unilaterally is fine (to a guest expecting the hypercall
> to succeed, -EINVAL vs #GP makes no difference, but #GP is far more
> useful to a human trying to debug issues here), but I could be talked
> into putting that behind a CONFIG_DEBUG if other feel strongly.
Marek, thoughts?
> > @@ -1343,7 +1374,34 @@ static int promote_l1_table(struct page_info *page)
> > }
> > else
> > {
> > - switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
> > + l1_pgentry_t l1e = pl1e[i];
> > +
> > + if ( invalid_cacheability != INVALID_CACHEABILITY_ALLOW )
> > + {
> > + switch ( l1e.l1 & PAGE_CACHE_ATTRS )
> > + {
> > + case _PAGE_WB:
> > + case _PAGE_UC:
> > + case _PAGE_UCM:
> > + case _PAGE_WC:
> > + case _PAGE_WT:
> > + case _PAGE_WP:
> > + break;
> > + default:
> > + /*
> > + * If we get here, a PV guest tried to use one of the
> > + * reserved values in Xen's PAT. This indicates a bug
> > + * in the guest. If requested by the user, inject #GP
> > + * to cause the guest to log a stack trace.
> > + */
> > + if ( invalid_cacheability == INVALID_CACHEABILITY_TRAP )
> > + pv_inject_hw_exception(TRAP_gp_fault, 0);
> > + ret = -EINVAL;
> > + goto fail;
> > + }
> > + }
>
> This will catch cases where the guest writes a full pagetable, then
> promotes it, but won't catch cases where the guest modifies an
> individual entry with mmu_update/etc.
>
> The logic needs to be in get_page_from_l1e() to get applied uniformly to
> all PTE modifications.
I will move it there, and also update Qubes OS’s patchset.
> Right now, the l1_disallow_mask() check near the start hides the "can
> you use a nonzero cacheattr" check. If I ever get around to cleaning up
> my post-XSA-402 series, I have a load of modifications to this.
I came up with some major cleanups too.
> But this could be something like this:
>
> if ( opt_pv_oob_pat && (l1f & PAGE_CACHE_ATTRS) > _PAGE_WP )
> {
> // #GP
> return -EINVAL;
> }
>
> fairly early on.
>
> It occurs to me that this check is only applicable when we're using the
> Xen ABI APT, and seeing as we've talked about possibly making patch 5 a
> Kconfig option, that may want accounting here. (Perhaps simply making
> opt_pb_oob_pat be false in a !XEN_PAT build.)
It’s actually applicable even with other PATs. While Marek and I were
tracking down an Intel iGPU cache coherency problem, Marek used it to
verify that PAT entries that we thought were not being used were in fact
unused. This allowed proving that the behavior of the GPU was impacted
by changes to PAT entries the hardware should not even be looking at,
and therefore that the hardware itself must be buggy.
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
On Thu, Jan 05, 2023 at 09:00:03PM -0500, Demi Marie Obenour wrote:
> On Thu, Jan 05, 2023 at 08:28:26PM +0000, Andrew Cooper wrote:
> > On 22/12/2022 10:31 pm, Demi Marie Obenour wrote:
> > > diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
> > > index 424b12cfb27d6ade2ec63eacb8afe5df82465451..0230a7bc17cbd4362a42ea64cea695f31f5e0f86 100644
> > > --- a/docs/misc/xen-command-line.pandoc
> > > +++ b/docs/misc/xen-command-line.pandoc
> > > @@ -1417,6 +1417,17 @@ detection of systems known to misbehave upon accesses to that port.
> > > ### idle_latency_factor (x86)
> > > > `= <integer>`
> > >
> > > +### invalid-cacheability (x86)
> > > +> `= allow | deny | trap`
> > > +
> > > +> Default: `deny` in release builds, otherwise `trap`
> > > +
> > > +Specify what happens when a PV guest tries to use one of the reserved entries in
> > > +the PAT. `deny` causes the attempt to be rejected with -EINVAL, `allow` allows
> > > +the attempt, and `trap` causes a general protection fault to be raised.
> > > +Currently, the reserved entries are marked as uncacheable in Xen's PAT, but this
> > > +will change if new memory types are added, so guests must not rely on it.
> > > +
> >
> > This wants to be scoped under "pv", and not a top-level option. Current
> > parsing is at the top of xen/arch/x86/pv/domain.c
> >
> > How about `pv={no-}oob-pat`, and parse it into the opt_pv_oob_pat boolean ?
>
> Works for me, though I will use ‘invalid’ instead of ‘oob’ as valid PAT
> entries might not be contiguous.
If we're talking about alternative PAT settings, I'm not sure if they
can be called "invalid". With the default Xen's choice of PAT, top two
entries are documented as reserved (in xen.h) and only that makes them
forbidden to use. But with alternative settings, it's only behavior of
Linux parsing that prefers using lower options. When breaking contract
set in xen.h, "reserved" entries stop being reserved too.
So, _if_ that option would be applicable alternative PAT choice, it's
only useful for debugging Linux specifically (assuming Linux won't
change its approach to choosing entries - which I think it's allowed to
do).
> > There really is no need to distinguish between deny and trap. IMO,
> > injecting #GP unilaterally is fine (to a guest expecting the hypercall
> > to succeed, -EINVAL vs #GP makes no difference, but #GP is far more
> > useful to a human trying to debug issues here), but I could be talked
> > into putting that behind a CONFIG_DEBUG if other feel strongly.
>
> Marek, thoughts?
With Xen's default PAT, #GP may be useful indeed, but it must come with
a message why it was injected.
> > > @@ -1343,7 +1374,34 @@ static int promote_l1_table(struct page_info *page)
> > > }
> > > else
> > > {
> > > - switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
> > > + l1_pgentry_t l1e = pl1e[i];
> > > +
> > > + if ( invalid_cacheability != INVALID_CACHEABILITY_ALLOW )
> > > + {
> > > + switch ( l1e.l1 & PAGE_CACHE_ATTRS )
> > > + {
> > > + case _PAGE_WB:
> > > + case _PAGE_UC:
> > > + case _PAGE_UCM:
> > > + case _PAGE_WC:
> > > + case _PAGE_WT:
> > > + case _PAGE_WP:
> > > + break;
> > > + default:
> > > + /*
> > > + * If we get here, a PV guest tried to use one of the
> > > + * reserved values in Xen's PAT. This indicates a bug
> > > + * in the guest. If requested by the user, inject #GP
> > > + * to cause the guest to log a stack trace.
> > > + */
> > > + if ( invalid_cacheability == INVALID_CACHEABILITY_TRAP )
> > > + pv_inject_hw_exception(TRAP_gp_fault, 0);
> > > + ret = -EINVAL;
> > > + goto fail;
> > > + }
> > > + }
> >
> > This will catch cases where the guest writes a full pagetable, then
> > promotes it, but won't catch cases where the guest modifies an
> > individual entry with mmu_update/etc.
> >
> > The logic needs to be in get_page_from_l1e() to get applied uniformly to
> > all PTE modifications.
>
> I will move it there, and also update Qubes OS’s patchset.
>
> > Right now, the l1_disallow_mask() check near the start hides the "can
> > you use a nonzero cacheattr" check. If I ever get around to cleaning up
> > my post-XSA-402 series, I have a load of modifications to this.
>
> I came up with some major cleanups too.
>
> > But this could be something like this:
> >
> > if ( opt_pv_oob_pat && (l1f & PAGE_CACHE_ATTRS) > _PAGE_WP )
> > {
> > // #GP
> > return -EINVAL;
> > }
> >
> > fairly early on.
> >
> > It occurs to me that this check is only applicable when we're using the
> > Xen ABI APT, and seeing as we've talked about possibly making patch 5 a
> > Kconfig option, that may want accounting here. (Perhaps simply making
> > opt_pb_oob_pat be false in a !XEN_PAT build.)
>
> It’s actually applicable even with other PATs. While Marek and I were
> tracking down an Intel iGPU cache coherency problem, Marek used it to
> verify that PAT entries that we thought were not being used were in fact
> unused. This allowed proving that the behavior of the GPU was impacted
> by changes to PAT entries the hardware should not even be looking at,
> and therefore that the hardware itself must be buggy.
In fact, I did that via WARN() on the Linux side, to _not_ have guest
killed in this case, to potentially collect more info. As said above,
with alternative PAT settings, the contract which entries are "valid"
isn't there anymore, so punishing guest for using them isn't
appropriate. It could still be useful feature for debugging Linux (and
it feels, we'll need this feature for some time...). So, with !XEN_PAT
it should be at least disabled by default, or maybe even hidden behind
CONFIG_DEBUG.
--
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab
On Fri, Jan 06, 2023 at 03:30:01AM +0100, Marek Marczykowski-Górecki wrote:
> On Thu, Jan 05, 2023 at 09:00:03PM -0500, Demi Marie Obenour wrote:
> > On Thu, Jan 05, 2023 at 08:28:26PM +0000, Andrew Cooper wrote:
> > > On 22/12/2022 10:31 pm, Demi Marie Obenour wrote:
> > > > diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
> > > > index 424b12cfb27d6ade2ec63eacb8afe5df82465451..0230a7bc17cbd4362a42ea64cea695f31f5e0f86 100644
> > > > --- a/docs/misc/xen-command-line.pandoc
> > > > +++ b/docs/misc/xen-command-line.pandoc
> > > > @@ -1417,6 +1417,17 @@ detection of systems known to misbehave upon accesses to that port.
> > > > ### idle_latency_factor (x86)
> > > > > `= <integer>`
> > > >
> > > > +### invalid-cacheability (x86)
> > > > +> `= allow | deny | trap`
> > > > +
> > > > +> Default: `deny` in release builds, otherwise `trap`
> > > > +
> > > > +Specify what happens when a PV guest tries to use one of the reserved entries in
> > > > +the PAT. `deny` causes the attempt to be rejected with -EINVAL, `allow` allows
> > > > +the attempt, and `trap` causes a general protection fault to be raised.
> > > > +Currently, the reserved entries are marked as uncacheable in Xen's PAT, but this
> > > > +will change if new memory types are added, so guests must not rely on it.
> > > > +
> > >
> > > This wants to be scoped under "pv", and not a top-level option. Current
> > > parsing is at the top of xen/arch/x86/pv/domain.c
> > >
> > > How about `pv={no-}oob-pat`, and parse it into the opt_pv_oob_pat boolean ?
> >
> > Works for me, though I will use ‘invalid’ instead of ‘oob’ as valid PAT
> > entries might not be contiguous.
>
> If we're talking about alternative PAT settings, I'm not sure if they
> can be called "invalid". With the default Xen's choice of PAT, top two
> entries are documented as reserved (in xen.h) and only that makes them
> forbidden to use. But with alternative settings, it's only behavior of
> Linux parsing that prefers using lower options. When breaking contract
> set in xen.h, "reserved" entries stop being reserved too.
That makes sense.
> So, _if_ that option would be applicable alternative PAT choice, it's
> only useful for debugging Linux specifically (assuming Linux won't
> change its approach to choosing entries - which I think it's allowed to
> do).
Point taken.
> > > There really is no need to distinguish between deny and trap. IMO,
> > > injecting #GP unilaterally is fine (to a guest expecting the hypercall
> > > to succeed, -EINVAL vs #GP makes no difference, but #GP is far more
> > > useful to a human trying to debug issues here), but I could be talked
> > > into putting that behind a CONFIG_DEBUG if other feel strongly.
> >
> > Marek, thoughts?
>
> With Xen's default PAT, #GP may be useful indeed, but it must come with
> a message why it was injected.
In xl dmesg?
> > > > @@ -1343,7 +1374,34 @@ static int promote_l1_table(struct page_info *page)
> > > > }
> > > > else
> > > > {
> > > > - switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
> > > > + l1_pgentry_t l1e = pl1e[i];
> > > > +
> > > > + if ( invalid_cacheability != INVALID_CACHEABILITY_ALLOW )
> > > > + {
> > > > + switch ( l1e.l1 & PAGE_CACHE_ATTRS )
> > > > + {
> > > > + case _PAGE_WB:
> > > > + case _PAGE_UC:
> > > > + case _PAGE_UCM:
> > > > + case _PAGE_WC:
> > > > + case _PAGE_WT:
> > > > + case _PAGE_WP:
> > > > + break;
> > > > + default:
> > > > + /*
> > > > + * If we get here, a PV guest tried to use one of the
> > > > + * reserved values in Xen's PAT. This indicates a bug
> > > > + * in the guest. If requested by the user, inject #GP
> > > > + * to cause the guest to log a stack trace.
> > > > + */
> > > > + if ( invalid_cacheability == INVALID_CACHEABILITY_TRAP )
> > > > + pv_inject_hw_exception(TRAP_gp_fault, 0);
> > > > + ret = -EINVAL;
> > > > + goto fail;
> > > > + }
> > > > + }
> > >
> > > This will catch cases where the guest writes a full pagetable, then
> > > promotes it, but won't catch cases where the guest modifies an
> > > individual entry with mmu_update/etc.
> > >
> > > The logic needs to be in get_page_from_l1e() to get applied uniformly to
> > > all PTE modifications.
> >
> > I will move it there, and also update Qubes OS’s patchset.
> >
> > > Right now, the l1_disallow_mask() check near the start hides the "can
> > > you use a nonzero cacheattr" check. If I ever get around to cleaning up
> > > my post-XSA-402 series, I have a load of modifications to this.
> >
> > I came up with some major cleanups too.
> >
> > > But this could be something like this:
> > >
> > > if ( opt_pv_oob_pat && (l1f & PAGE_CACHE_ATTRS) > _PAGE_WP )
> > > {
> > > // #GP
> > > return -EINVAL;
> > > }
> > >
> > > fairly early on.
> > >
> > > It occurs to me that this check is only applicable when we're using the
> > > Xen ABI APT, and seeing as we've talked about possibly making patch 5 a
> > > Kconfig option, that may want accounting here. (Perhaps simply making
> > > opt_pb_oob_pat be false in a !XEN_PAT build.)
> >
> > It’s actually applicable even with other PATs. While Marek and I were
> > tracking down an Intel iGPU cache coherency problem, Marek used it to
> > verify that PAT entries that we thought were not being used were in fact
> > unused. This allowed proving that the behavior of the GPU was impacted
> > by changes to PAT entries the hardware should not even be looking at,
> > and therefore that the hardware itself must be buggy.
>
> In fact, I did that via WARN() on the Linux side, to _not_ have guest
> killed in this case, to potentially collect more info.
Whoops! I must have misunderstood what you meant by "trap".
> As said above,
> with alternative PAT settings, the contract which entries are "valid"
> isn't there anymore, so punishing guest for using them isn't
> appropriate. It could still be useful feature for debugging Linux (and
> it feels, we'll need this feature for some time...). So, with !XEN_PAT
> it should be at least disabled by default, or maybe even hidden behind
> CONFIG_DEBUG.
Okay, makes sense.
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
On 22.12.2022 23:31, Demi Marie Obenour wrote:
> Setting cacheability flags that are not ones specified by Xen is a bug
> in the guest. By default, return -EINVAL if a guests attempts to do
> this. The invalid-cacheability= Xen command-line flag allows the
> administrator to allow such attempts or to produce
Unfinished sentence?
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -1324,6 +1324,37 @@ static int put_page_from_l4e(l4_pgentry_t l4e, mfn_t l4mfn, unsigned int flags)
> return put_pt_page(l4e_get_page(l4e), mfn_to_page(l4mfn), flags);
> }
>
> +enum {
> + INVALID_CACHEABILITY_ALLOW,
> + INVALID_CACHEABILITY_DENY,
> + INVALID_CACHEABILITY_TRAP,
> +};
> +
> +#ifdef NDEBUG
> +#define INVALID_CACHEABILITY_DEFAULT INVALID_CACHEABILITY_DENY
> +#else
> +#define INVALID_CACHEABILITY_DEFAULT INVALID_CACHEABILITY_TRAP
> +#endif
> +
> +static __ro_after_init uint8_t invalid_cacheability =
> + INVALID_CACHEABILITY_DEFAULT;
> +
> +static int __init cf_check set_invalid_cacheability(const char *str)
> +{
> + if (strcmp("allow", str) == 0)
> + invalid_cacheability = INVALID_CACHEABILITY_ALLOW;
> + else if (strcmp("deny", str) == 0)
> + invalid_cacheability = INVALID_CACHEABILITY_DENY;
> + else if (strcmp("trap", str) == 0)
> + invalid_cacheability = INVALID_CACHEABILITY_TRAP;
Style: Missing blanks immediately inside if(). Also note that generally
we prefer '!' over "== 0".
> + else
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +custom_param("invalid-cacheability", set_invalid_cacheability);
Nit: Generally we avoid blank lines between the handler of a
custom_param() and the actual param definition.
> @@ -1343,7 +1374,34 @@ static int promote_l1_table(struct page_info *page)
> }
> else
> {
> - switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
> + l1_pgentry_t l1e = pl1e[i];
> +
> + if ( invalid_cacheability != INVALID_CACHEABILITY_ALLOW )
> + {
> + switch ( l1e.l1 & PAGE_CACHE_ATTRS )
You want to use l1e_get_flags() here.
Jan
© 2016 - 2026 Red Hat, Inc.