[v2] Enable automatic SVN updates for SGX enclaves

[PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Elena Reshetova 10 months, 2 weeks ago

SGX architecture introduced a new instruction called EUPDATESVN
to Ice Lake. It allows updating security SVN version, given that EPC
is completely empty. The latter is required for security reasons
in order to reason that enclave security posture is as secure as the
security SVN version of the TCB that created it.

Additionally it is important to ensure that while ENCLS[EUPDATESVN]
runs, no concurrent page creation happens in EPC, because it might
result in #GP delivered to the creator. Legacy SW might not be prepared
to handle such unexpected #GPs and therefore this patch introduces
a locking mechanism to ensure no concurrent EPC allocations can happen.

It is also ensured that ENCLS[EUPDATESVN] is not called when running
in a VM since it does not have a meaning in this context (microcode
updates application is limited to the host OS) and will create
unnecessary load.

This patch is based on previous submision by Cathy Zhang
https://lore.kernel.org/all/20220520103904.1216-1-cathy.zhang@intel.com/

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
---
 arch/x86/include/asm/sgx.h      | 41 +++++++++++++--------
 arch/x86/kernel/cpu/sgx/encls.h |  6 ++++
 arch/x86/kernel/cpu/sgx/main.c  | 63 ++++++++++++++++++++++++++++++++-
 arch/x86/kernel/cpu/sgx/sgx.h   |  1 +
 4 files changed, 95 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 6a0069761508..5caf5c31ebc6 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -26,23 +26,26 @@
 #define SGX_CPUID_EPC_SECTION	0x1
 /* The bitmask for the EPC section type. */
 #define SGX_CPUID_EPC_MASK	GENMASK(3, 0)
+/* EUPDATESVN presence indication */
+#define SGX_CPUID_EUPDATESVN	BIT(10)
 
 enum sgx_encls_function {
-	ECREATE	= 0x00,
-	EADD	= 0x01,
-	EINIT	= 0x02,
-	EREMOVE	= 0x03,
-	EDGBRD	= 0x04,
-	EDGBWR	= 0x05,
-	EEXTEND	= 0x06,
-	ELDU	= 0x08,
-	EBLOCK	= 0x09,
-	EPA	= 0x0A,
-	EWB	= 0x0B,
-	ETRACK	= 0x0C,
-	EAUG	= 0x0D,
-	EMODPR	= 0x0E,
-	EMODT	= 0x0F,
+	ECREATE		= 0x00,
+	EADD		= 0x01,
+	EINIT		= 0x02,
+	EREMOVE		= 0x03,
+	EDGBRD		= 0x04,
+	EDGBWR		= 0x05,
+	EEXTEND		= 0x06,
+	ELDU		= 0x08,
+	EBLOCK		= 0x09,
+	EPA		= 0x0A,
+	EWB		= 0x0B,
+	ETRACK		= 0x0C,
+	EAUG		= 0x0D,
+	EMODPR		= 0x0E,
+	EMODT		= 0x0F,
+	EUPDATESVN	= 0x18,
 };
 
 /**
@@ -73,6 +76,11 @@ enum sgx_encls_function {
  *				public key does not match IA32_SGXLEPUBKEYHASH.
  * %SGX_PAGE_NOT_MODIFIABLE:	The EPC page cannot be modified because it
  *				is in the PENDING or MODIFIED state.
+ * %SGX_INSUFFICIENT_ENTROPY:	Insufficient entropy in RNG.
+ * %SGX_EPC_NOT_READY:		EPC is not ready for SVN update.
+ * %SGX_NO_UPDATE:		EUPDATESVN was successful, but CPUSVN was not
+ *				updated because current SVN was not newer than
+ *				CPUSVN.
  * %SGX_UNMASKED_EVENT:		An unmasked event, e.g. INTR, was received
  */
 enum sgx_return_code {
@@ -81,6 +89,9 @@ enum sgx_return_code {
 	SGX_CHILD_PRESENT		= 13,
 	SGX_INVALID_EINITTOKEN		= 16,
 	SGX_PAGE_NOT_MODIFIABLE		= 20,
+	SGX_INSUFFICIENT_ENTROPY	= 29,
+	SGX_EPC_NOT_READY		= 30,
+	SGX_NO_UPDATE			= 31,
 	SGX_UNMASKED_EVENT		= 128,
 };
 
diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
index 99004b02e2ed..3d83c76dc91f 100644
--- a/arch/x86/kernel/cpu/sgx/encls.h
+++ b/arch/x86/kernel/cpu/sgx/encls.h
@@ -233,4 +233,10 @@ static inline int __eaug(struct sgx_pageinfo *pginfo, void *addr)
 	return __encls_2(EAUG, pginfo, addr);
 }
 
+/* Update CPUSVN at runtime. */
+static inline int __eupdatesvn(void)
+{
+	return __encls_ret_1(EUPDATESVN, "");
+}
+
 #endif /* _X86_ENCLS_H */
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index b61d3bad0446..24563110811d 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -32,6 +32,11 @@ static DEFINE_XARRAY(sgx_epc_address_space);
 static LIST_HEAD(sgx_active_page_list);
 static DEFINE_SPINLOCK(sgx_reclaimer_lock);
 
+/* This lock is held to prevent new EPC pages from being created
+ * during the execution of ENCLS[EUPDATESVN].
+ */
+static DEFINE_SPINLOCK(sgx_epc_eupdatesvn_lock);
+
 static atomic_long_t sgx_nr_used_pages = ATOMIC_LONG_INIT(0);
 static unsigned long sgx_nr_total_pages;
 
@@ -457,7 +462,17 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
 	page->flags = 0;
 
 	spin_unlock(&node->lock);
-	atomic_long_inc(&sgx_nr_used_pages);
+
+	if (!atomic_long_inc_not_zero(&sgx_nr_used_pages)) {
+		spin_lock(&sgx_epc_eupdatesvn_lock);
+		/* Only call sgx_updatesvn() once the first enclave's
+		 * page is allocated from EPC
+		 */
+		if (atomic_long_read(&sgx_nr_used_pages) == 0)
+			sgx_updatesvn();
+		atomic_long_inc(&sgx_nr_used_pages);
+		spin_unlock(&sgx_epc_eupdatesvn_lock);
+	}
 
 	return page;
 }
@@ -970,3 +985,49 @@ static int __init sgx_init(void)
 }
 
 device_initcall(sgx_init);
+
+/**
+ * sgx_updatesvn() - Issue ENCLS[EUPDATESVN]
+ * If EPC is ready, this instruction will update CPUSVN to the currently
+ * loaded microcode update SVN and generate new cryptographic assets.
+ */
+void sgx_updatesvn(void)
+{
+	int retry = 10;
+	int ret;
+
+	lockdep_assert_held(&sgx_epc_eupdatesvn_lock);
+
+	if (!(cpuid_eax(SGX_CPUID) & SGX_CPUID_EUPDATESVN))
+		return;
+
+	/* Do not execute ENCLS[EUPDATESVN] if running in a VM since
+	 * microcode updates are only meaningful to be applied on the host.
+	 */
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+		return;
+
+	do {
+		ret = __eupdatesvn();
+		if (ret != SGX_INSUFFICIENT_ENTROPY)
+			break;
+
+	} while (--retry);
+
+	switch (ret) {
+	case 0:
+		pr_info("EUPDATESVN: success\n");
+		break;
+	case SGX_EPC_NOT_READY:
+	case SGX_INSUFFICIENT_ENTROPY:
+	case SGX_EPC_PAGE_CONFLICT:
+		pr_err("EUPDATESVN: error %d\n", ret);
+		break;
+	case SGX_NO_UPDATE:
+		break;
+	default:
+		pr_err("EUPDATESVN: unknown error %d\n", ret);
+		break;
+	}
+
+}
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index d2dad21259a8..12ae49e78959 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -103,5 +103,6 @@ static inline int __init sgx_vepc_init(void)
 #endif
 
 void sgx_update_lepubkeyhash(u64 *lepubkeyhash);
+void sgx_updatesvn(void);
 
 #endif /* _X86_SGX_H */
-- 
2.45.2

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Jarkko Sakkinen 10 months, 2 weeks ago

On Fri, Mar 28, 2025 at 02:57:41PM +0200, Elena Reshetova wrote:
> SGX architecture introduced a new instruction called EUPDATESVN
> to Ice Lake. It allows updating security SVN version, given that EPC
> is completely empty. The latter is required for security reasons
> in order to reason that enclave security posture is as secure as the
> security SVN version of the TCB that created it.
> 
> Additionally it is important to ensure that while ENCLS[EUPDATESVN]
> runs, no concurrent page creation happens in EPC, because it might
> result in #GP delivered to the creator. Legacy SW might not be prepared
> to handle such unexpected #GPs and therefore this patch introduces
> a locking mechanism to ensure no concurrent EPC allocations can happen.
> 
> It is also ensured that ENCLS[EUPDATESVN] is not called when running
> in a VM since it does not have a meaning in this context (microcode
> updates application is limited to the host OS) and will create
> unnecessary load.
> 
> This patch is based on previous submision by Cathy Zhang
> https://lore.kernel.org/all/20220520103904.1216-1-cathy.zhang@intel.com/
> 
> Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> ---
>  arch/x86/include/asm/sgx.h      | 41 +++++++++++++--------
>  arch/x86/kernel/cpu/sgx/encls.h |  6 ++++
>  arch/x86/kernel/cpu/sgx/main.c  | 63 ++++++++++++++++++++++++++++++++-
>  arch/x86/kernel/cpu/sgx/sgx.h   |  1 +
>  4 files changed, 95 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
> index 6a0069761508..5caf5c31ebc6 100644
> --- a/arch/x86/include/asm/sgx.h
> +++ b/arch/x86/include/asm/sgx.h
> @@ -26,23 +26,26 @@
>  #define SGX_CPUID_EPC_SECTION	0x1
>  /* The bitmask for the EPC section type. */
>  #define SGX_CPUID_EPC_MASK	GENMASK(3, 0)
> +/* EUPDATESVN presence indication */
> +#define SGX_CPUID_EUPDATESVN	BIT(10)
>  
>  enum sgx_encls_function {
> -	ECREATE	= 0x00,
> -	EADD	= 0x01,
> -	EINIT	= 0x02,
> -	EREMOVE	= 0x03,
> -	EDGBRD	= 0x04,
> -	EDGBWR	= 0x05,
> -	EEXTEND	= 0x06,
> -	ELDU	= 0x08,
> -	EBLOCK	= 0x09,
> -	EPA	= 0x0A,
> -	EWB	= 0x0B,
> -	ETRACK	= 0x0C,
> -	EAUG	= 0x0D,
> -	EMODPR	= 0x0E,
> -	EMODT	= 0x0F,
> +	ECREATE		= 0x00,
> +	EADD		= 0x01,
> +	EINIT		= 0x02,
> +	EREMOVE		= 0x03,
> +	EDGBRD		= 0x04,
> +	EDGBWR		= 0x05,
> +	EEXTEND		= 0x06,
> +	ELDU		= 0x08,
> +	EBLOCK		= 0x09,
> +	EPA		= 0x0A,
> +	EWB		= 0x0B,
> +	ETRACK		= 0x0C,
> +	EAUG		= 0x0D,
> +	EMODPR		= 0x0E,
> +	EMODT		= 0x0F,
> +	EUPDATESVN	= 0x18,
>  };
>  
>  /**
> @@ -73,6 +76,11 @@ enum sgx_encls_function {
>   *				public key does not match IA32_SGXLEPUBKEYHASH.
>   * %SGX_PAGE_NOT_MODIFIABLE:	The EPC page cannot be modified because it
>   *				is in the PENDING or MODIFIED state.
> + * %SGX_INSUFFICIENT_ENTROPY:	Insufficient entropy in RNG.
> + * %SGX_EPC_NOT_READY:		EPC is not ready for SVN update.
> + * %SGX_NO_UPDATE:		EUPDATESVN was successful, but CPUSVN was not
> + *				updated because current SVN was not newer than
> + *				CPUSVN.
>   * %SGX_UNMASKED_EVENT:		An unmasked event, e.g. INTR, was received
>   */
>  enum sgx_return_code {
> @@ -81,6 +89,9 @@ enum sgx_return_code {
>  	SGX_CHILD_PRESENT		= 13,
>  	SGX_INVALID_EINITTOKEN		= 16,
>  	SGX_PAGE_NOT_MODIFIABLE		= 20,
> +	SGX_INSUFFICIENT_ENTROPY	= 29,
> +	SGX_EPC_NOT_READY		= 30,
> +	SGX_NO_UPDATE			= 31,
>  	SGX_UNMASKED_EVENT		= 128,
>  };
>  
> diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
> index 99004b02e2ed..3d83c76dc91f 100644
> --- a/arch/x86/kernel/cpu/sgx/encls.h
> +++ b/arch/x86/kernel/cpu/sgx/encls.h
> @@ -233,4 +233,10 @@ static inline int __eaug(struct sgx_pageinfo *pginfo, void *addr)
>  	return __encls_2(EAUG, pginfo, addr);
>  }
>  
> +/* Update CPUSVN at runtime. */
> +static inline int __eupdatesvn(void)
> +{
> +	return __encls_ret_1(EUPDATESVN, "");
> +}
> +
>  #endif /* _X86_ENCLS_H */
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index b61d3bad0446..24563110811d 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -32,6 +32,11 @@ static DEFINE_XARRAY(sgx_epc_address_space);
>  static LIST_HEAD(sgx_active_page_list);
>  static DEFINE_SPINLOCK(sgx_reclaimer_lock);
>  
> +/* This lock is held to prevent new EPC pages from being created
> + * during the execution of ENCLS[EUPDATESVN].
> + */
> +static DEFINE_SPINLOCK(sgx_epc_eupdatesvn_lock);
> +
>  static atomic_long_t sgx_nr_used_pages = ATOMIC_LONG_INIT(0);
>  static unsigned long sgx_nr_total_pages;
>  
> @@ -457,7 +462,17 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
>  	page->flags = 0;
>  
>  	spin_unlock(&node->lock);
> -	atomic_long_inc(&sgx_nr_used_pages);
> +
> +	if (!atomic_long_inc_not_zero(&sgx_nr_used_pages)) {
> +		spin_lock(&sgx_epc_eupdatesvn_lock);
> +		/* Only call sgx_updatesvn() once the first enclave's
> +		 * page is allocated from EPC
> +		 */
> +		if (atomic_long_read(&sgx_nr_used_pages) == 0)
> +			sgx_updatesvn();
> +		atomic_long_inc(&sgx_nr_used_pages);
> +		spin_unlock(&sgx_epc_eupdatesvn_lock);
> +	}
>  
>  	return page;
>  }
> @@ -970,3 +985,49 @@ static int __init sgx_init(void)
>  }
>  
>  device_initcall(sgx_init);
> +
> +/**
> + * sgx_updatesvn() - Issue ENCLS[EUPDATESVN]
> + * If EPC is ready, this instruction will update CPUSVN to the currently
> + * loaded microcode update SVN and generate new cryptographic assets.
> + */
> +void sgx_updatesvn(void)
> +{
> +	int retry = 10;
> +	int ret;
> +
> +	lockdep_assert_held(&sgx_epc_eupdatesvn_lock);
> +
> +	if (!(cpuid_eax(SGX_CPUID) & SGX_CPUID_EUPDATESVN))
> +		return;
> +
> +	/* Do not execute ENCLS[EUPDATESVN] if running in a VM since
> +	 * microcode updates are only meaningful to be applied on the host.
> +	 */
> +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
> +		return;
> +
> +	do {
> +		ret = __eupdatesvn();
> +		if (ret != SGX_INSUFFICIENT_ENTROPY)
> +			break;
> +
> +	} while (--retry);
> +
> +	switch (ret) {
> +	case 0:
> +		pr_info("EUPDATESVN: success\n");
> +		break;
> +	case SGX_EPC_NOT_READY:
> +	case SGX_INSUFFICIENT_ENTROPY:
> +	case SGX_EPC_PAGE_CONFLICT:
> +		pr_err("EUPDATESVN: error %d\n", ret);
> +		break;
> +	case SGX_NO_UPDATE:
> +		break;
> +	default:
> +		pr_err("EUPDATESVN: unknown error %d\n", ret);
> +		break;
> +	}

Overall, I think you're right in that "inversion" does make sense,
now that other stuff is better aligned.

At least when there is spurious error, I think ioctl's should stop
responding and driver should not do anything useful anymore. I.e.,
it should go out-of-service.

I don't think the driver should tear-down, just stop servicing
VM's and responding ioctl's.

Possibly thish should be also right action for other errors than
"insufficient entropy" but I'm open for comments for this.

BR, Jarkko

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Jarkko Sakkinen 10 months, 2 weeks ago

On Fri, Mar 28, 2025 at 07:50:43PM +0200, Jarkko Sakkinen wrote:
> On Fri, Mar 28, 2025 at 02:57:41PM +0200, Elena Reshetova wrote:
> > SGX architecture introduced a new instruction called EUPDATESVN
> > to Ice Lake. It allows updating security SVN version, given that EPC
> > is completely empty. The latter is required for security reasons
> > in order to reason that enclave security posture is as secure as the
> > security SVN version of the TCB that created it.
> > 
> > Additionally it is important to ensure that while ENCLS[EUPDATESVN]
> > runs, no concurrent page creation happens in EPC, because it might
> > result in #GP delivered to the creator. Legacy SW might not be prepared
> > to handle such unexpected #GPs and therefore this patch introduces
> > a locking mechanism to ensure no concurrent EPC allocations can happen.
> > 
> > It is also ensured that ENCLS[EUPDATESVN] is not called when running
> > in a VM since it does not have a meaning in this context (microcode
> > updates application is limited to the host OS) and will create
> > unnecessary load.
> > 
> > This patch is based on previous submision by Cathy Zhang
> > https://lore.kernel.org/all/20220520103904.1216-1-cathy.zhang@intel.com/
> > 
> > Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> > ---
> >  arch/x86/include/asm/sgx.h      | 41 +++++++++++++--------
> >  arch/x86/kernel/cpu/sgx/encls.h |  6 ++++
> >  arch/x86/kernel/cpu/sgx/main.c  | 63 ++++++++++++++++++++++++++++++++-
> >  arch/x86/kernel/cpu/sgx/sgx.h   |  1 +
> >  4 files changed, 95 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
> > index 6a0069761508..5caf5c31ebc6 100644
> > --- a/arch/x86/include/asm/sgx.h
> > +++ b/arch/x86/include/asm/sgx.h
> > @@ -26,23 +26,26 @@
> >  #define SGX_CPUID_EPC_SECTION	0x1
> >  /* The bitmask for the EPC section type. */
> >  #define SGX_CPUID_EPC_MASK	GENMASK(3, 0)
> > +/* EUPDATESVN presence indication */
> > +#define SGX_CPUID_EUPDATESVN	BIT(10)
> >  
> >  enum sgx_encls_function {
> > -	ECREATE	= 0x00,
> > -	EADD	= 0x01,
> > -	EINIT	= 0x02,
> > -	EREMOVE	= 0x03,
> > -	EDGBRD	= 0x04,
> > -	EDGBWR	= 0x05,
> > -	EEXTEND	= 0x06,
> > -	ELDU	= 0x08,
> > -	EBLOCK	= 0x09,
> > -	EPA	= 0x0A,
> > -	EWB	= 0x0B,
> > -	ETRACK	= 0x0C,
> > -	EAUG	= 0x0D,
> > -	EMODPR	= 0x0E,
> > -	EMODT	= 0x0F,
> > +	ECREATE		= 0x00,
> > +	EADD		= 0x01,
> > +	EINIT		= 0x02,
> > +	EREMOVE		= 0x03,
> > +	EDGBRD		= 0x04,
> > +	EDGBWR		= 0x05,
> > +	EEXTEND		= 0x06,
> > +	ELDU		= 0x08,
> > +	EBLOCK		= 0x09,
> > +	EPA		= 0x0A,
> > +	EWB		= 0x0B,
> > +	ETRACK		= 0x0C,
> > +	EAUG		= 0x0D,
> > +	EMODPR		= 0x0E,
> > +	EMODT		= 0x0F,
> > +	EUPDATESVN	= 0x18,
> >  };
> >  
> >  /**
> > @@ -73,6 +76,11 @@ enum sgx_encls_function {
> >   *				public key does not match IA32_SGXLEPUBKEYHASH.
> >   * %SGX_PAGE_NOT_MODIFIABLE:	The EPC page cannot be modified because it
> >   *				is in the PENDING or MODIFIED state.
> > + * %SGX_INSUFFICIENT_ENTROPY:	Insufficient entropy in RNG.
> > + * %SGX_EPC_NOT_READY:		EPC is not ready for SVN update.
> > + * %SGX_NO_UPDATE:		EUPDATESVN was successful, but CPUSVN was not
> > + *				updated because current SVN was not newer than
> > + *				CPUSVN.
> >   * %SGX_UNMASKED_EVENT:		An unmasked event, e.g. INTR, was received
> >   */
> >  enum sgx_return_code {
> > @@ -81,6 +89,9 @@ enum sgx_return_code {
> >  	SGX_CHILD_PRESENT		= 13,
> >  	SGX_INVALID_EINITTOKEN		= 16,
> >  	SGX_PAGE_NOT_MODIFIABLE		= 20,
> > +	SGX_INSUFFICIENT_ENTROPY	= 29,
> > +	SGX_EPC_NOT_READY		= 30,
> > +	SGX_NO_UPDATE			= 31,
> >  	SGX_UNMASKED_EVENT		= 128,
> >  };
> >  
> > diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
> > index 99004b02e2ed..3d83c76dc91f 100644
> > --- a/arch/x86/kernel/cpu/sgx/encls.h
> > +++ b/arch/x86/kernel/cpu/sgx/encls.h
> > @@ -233,4 +233,10 @@ static inline int __eaug(struct sgx_pageinfo *pginfo, void *addr)
> >  	return __encls_2(EAUG, pginfo, addr);
> >  }
> >  
> > +/* Update CPUSVN at runtime. */
> > +static inline int __eupdatesvn(void)
> > +{
> > +	return __encls_ret_1(EUPDATESVN, "");
> > +}
> > +
> >  #endif /* _X86_ENCLS_H */
> > diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> > index b61d3bad0446..24563110811d 100644
> > --- a/arch/x86/kernel/cpu/sgx/main.c
> > +++ b/arch/x86/kernel/cpu/sgx/main.c
> > @@ -32,6 +32,11 @@ static DEFINE_XARRAY(sgx_epc_address_space);
> >  static LIST_HEAD(sgx_active_page_list);
> >  static DEFINE_SPINLOCK(sgx_reclaimer_lock);
> >  
> > +/* This lock is held to prevent new EPC pages from being created
> > + * during the execution of ENCLS[EUPDATESVN].
> > + */
> > +static DEFINE_SPINLOCK(sgx_epc_eupdatesvn_lock);
> > +
> >  static atomic_long_t sgx_nr_used_pages = ATOMIC_LONG_INIT(0);
> >  static unsigned long sgx_nr_total_pages;
> >  
> > @@ -457,7 +462,17 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
> >  	page->flags = 0;
> >  
> >  	spin_unlock(&node->lock);
> > -	atomic_long_inc(&sgx_nr_used_pages);
> > +
> > +	if (!atomic_long_inc_not_zero(&sgx_nr_used_pages)) {
> > +		spin_lock(&sgx_epc_eupdatesvn_lock);
> > +		/* Only call sgx_updatesvn() once the first enclave's
> > +		 * page is allocated from EPC
> > +		 */
> > +		if (atomic_long_read(&sgx_nr_used_pages) == 0)
> > +			sgx_updatesvn();
> > +		atomic_long_inc(&sgx_nr_used_pages);
> > +		spin_unlock(&sgx_epc_eupdatesvn_lock);
> > +	}
> >  
> >  	return page;
> >  }
> > @@ -970,3 +985,49 @@ static int __init sgx_init(void)
> >  }
> >  
> >  device_initcall(sgx_init);
> > +
> > +/**
> > + * sgx_updatesvn() - Issue ENCLS[EUPDATESVN]
> > + * If EPC is ready, this instruction will update CPUSVN to the currently
> > + * loaded microcode update SVN and generate new cryptographic assets.
> > + */
> > +void sgx_updatesvn(void)
> > +{
> > +	int retry = 10;
> > +	int ret;
> > +
> > +	lockdep_assert_held(&sgx_epc_eupdatesvn_lock);
> > +
> > +	if (!(cpuid_eax(SGX_CPUID) & SGX_CPUID_EUPDATESVN))
> > +		return;
> > +
> > +	/* Do not execute ENCLS[EUPDATESVN] if running in a VM since
> > +	 * microcode updates are only meaningful to be applied on the host.
> > +	 */
> > +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
> > +		return;
> > +
> > +	do {
> > +		ret = __eupdatesvn();
> > +		if (ret != SGX_INSUFFICIENT_ENTROPY)
> > +			break;
> > +
> > +	} while (--retry);
> > +
> > +	switch (ret) {
> > +	case 0:
> > +		pr_info("EUPDATESVN: success\n");
> > +		break;
> > +	case SGX_EPC_NOT_READY:
> > +	case SGX_INSUFFICIENT_ENTROPY:
> > +	case SGX_EPC_PAGE_CONFLICT:
> > +		pr_err("EUPDATESVN: error %d\n", ret);
> > +		break;
> > +	case SGX_NO_UPDATE:
> > +		break;
> > +	default:
> > +		pr_err("EUPDATESVN: unknown error %d\n", ret);
> > +		break;
> > +	}
> 
> Overall, I think you're right in that "inversion" does make sense,
> now that other stuff is better aligned.
> 
> At least when there is spurious error, I think ioctl's should stop
> responding and driver should not do anything useful anymore. I.e.,
> it should go out-of-service.
> 
> I don't think the driver should tear-down, just stop servicing
> VM's and responding ioctl's.
> 
> Possibly thish should be also right action for other errors than
> "insufficient entropy" but I'm open for comments for this.

Or actually actually I take one step back with my suggestions
because this really should be a question for which I don't have
the definitive answer.

The current code works like this: if anything that we don't
like happens, we re-iterate.

Should some of the "exceptional conditions" have a different
recovery or not?

BR, Jarkko

RE: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Reshetova, Elena 10 months, 2 weeks ago

> 
> On Fri, Mar 28, 2025 at 07:50:43PM +0200, Jarkko Sakkinen wrote:
> > On Fri, Mar 28, 2025 at 02:57:41PM +0200, Elena Reshetova wrote:
> > > SGX architecture introduced a new instruction called EUPDATESVN
> > > to Ice Lake. It allows updating security SVN version, given that EPC
> > > is completely empty. The latter is required for security reasons
> > > in order to reason that enclave security posture is as secure as the
> > > security SVN version of the TCB that created it.
> > >
> > > Additionally it is important to ensure that while ENCLS[EUPDATESVN]
> > > runs, no concurrent page creation happens in EPC, because it might
> > > result in #GP delivered to the creator. Legacy SW might not be prepared
> > > to handle such unexpected #GPs and therefore this patch introduces
> > > a locking mechanism to ensure no concurrent EPC allocations can happen.
> > >
> > > It is also ensured that ENCLS[EUPDATESVN] is not called when running
> > > in a VM since it does not have a meaning in this context (microcode
> > > updates application is limited to the host OS) and will create
> > > unnecessary load.
> > >
> > > This patch is based on previous submision by Cathy Zhang
> > > https://lore.kernel.org/all/20220520103904.1216-1-
> cathy.zhang@intel.com/
> > >
> > > Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> > > ---
> > >  arch/x86/include/asm/sgx.h      | 41 +++++++++++++--------
> > >  arch/x86/kernel/cpu/sgx/encls.h |  6 ++++
> > >  arch/x86/kernel/cpu/sgx/main.c  | 63
> ++++++++++++++++++++++++++++++++-
> > >  arch/x86/kernel/cpu/sgx/sgx.h   |  1 +
> > >  4 files changed, 95 insertions(+), 16 deletions(-)
> > >
> > > diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
> > > index 6a0069761508..5caf5c31ebc6 100644
> > > --- a/arch/x86/include/asm/sgx.h
> > > +++ b/arch/x86/include/asm/sgx.h
> > > @@ -26,23 +26,26 @@
> > >  #define SGX_CPUID_EPC_SECTION	0x1
> > >  /* The bitmask for the EPC section type. */
> > >  #define SGX_CPUID_EPC_MASK	GENMASK(3, 0)
> > > +/* EUPDATESVN presence indication */
> > > +#define SGX_CPUID_EUPDATESVN	BIT(10)
> > >
> > >  enum sgx_encls_function {
> > > -	ECREATE	= 0x00,
> > > -	EADD	= 0x01,
> > > -	EINIT	= 0x02,
> > > -	EREMOVE	= 0x03,
> > > -	EDGBRD	= 0x04,
> > > -	EDGBWR	= 0x05,
> > > -	EEXTEND	= 0x06,
> > > -	ELDU	= 0x08,
> > > -	EBLOCK	= 0x09,
> > > -	EPA	= 0x0A,
> > > -	EWB	= 0x0B,
> > > -	ETRACK	= 0x0C,
> > > -	EAUG	= 0x0D,
> > > -	EMODPR	= 0x0E,
> > > -	EMODT	= 0x0F,
> > > +	ECREATE		= 0x00,
> > > +	EADD		= 0x01,
> > > +	EINIT		= 0x02,
> > > +	EREMOVE		= 0x03,
> > > +	EDGBRD		= 0x04,
> > > +	EDGBWR		= 0x05,
> > > +	EEXTEND		= 0x06,
> > > +	ELDU		= 0x08,
> > > +	EBLOCK		= 0x09,
> > > +	EPA		= 0x0A,
> > > +	EWB		= 0x0B,
> > > +	ETRACK		= 0x0C,
> > > +	EAUG		= 0x0D,
> > > +	EMODPR		= 0x0E,
> > > +	EMODT		= 0x0F,
> > > +	EUPDATESVN	= 0x18,
> > >  };
> > >
> > >  /**
> > > @@ -73,6 +76,11 @@ enum sgx_encls_function {
> > >   *				public key does not match
> IA32_SGXLEPUBKEYHASH.
> > >   * %SGX_PAGE_NOT_MODIFIABLE:	The EPC page cannot be modified
> because it
> > >   *				is in the PENDING or MODIFIED state.
> > > + * %SGX_INSUFFICIENT_ENTROPY:	Insufficient entropy in RNG.
> > > + * %SGX_EPC_NOT_READY:		EPC is not ready for SVN update.
> > > + * %SGX_NO_UPDATE:		EUPDATESVN was successful, but
> CPUSVN was not
> > > + *				updated because current SVN was not newer
> than
> > > + *				CPUSVN.
> > >   * %SGX_UNMASKED_EVENT:		An unmasked event, e.g.
> INTR, was received
> > >   */
> > >  enum sgx_return_code {
> > > @@ -81,6 +89,9 @@ enum sgx_return_code {
> > >  	SGX_CHILD_PRESENT		= 13,
> > >  	SGX_INVALID_EINITTOKEN		= 16,
> > >  	SGX_PAGE_NOT_MODIFIABLE		= 20,
> > > +	SGX_INSUFFICIENT_ENTROPY	= 29,
> > > +	SGX_EPC_NOT_READY		= 30,
> > > +	SGX_NO_UPDATE			= 31,
> > >  	SGX_UNMASKED_EVENT		= 128,
> > >  };
> > >
> > > diff --git a/arch/x86/kernel/cpu/sgx/encls.h
> b/arch/x86/kernel/cpu/sgx/encls.h
> > > index 99004b02e2ed..3d83c76dc91f 100644
> > > --- a/arch/x86/kernel/cpu/sgx/encls.h
> > > +++ b/arch/x86/kernel/cpu/sgx/encls.h
> > > @@ -233,4 +233,10 @@ static inline int __eaug(struct sgx_pageinfo
> *pginfo, void *addr)
> > >  	return __encls_2(EAUG, pginfo, addr);
> > >  }
> > >
> > > +/* Update CPUSVN at runtime. */
> > > +static inline int __eupdatesvn(void)
> > > +{
> > > +	return __encls_ret_1(EUPDATESVN, "");
> > > +}
> > > +
> > >  #endif /* _X86_ENCLS_H */
> > > diff --git a/arch/x86/kernel/cpu/sgx/main.c
> b/arch/x86/kernel/cpu/sgx/main.c
> > > index b61d3bad0446..24563110811d 100644
> > > --- a/arch/x86/kernel/cpu/sgx/main.c
> > > +++ b/arch/x86/kernel/cpu/sgx/main.c
> > > @@ -32,6 +32,11 @@ static DEFINE_XARRAY(sgx_epc_address_space);
> > >  static LIST_HEAD(sgx_active_page_list);
> > >  static DEFINE_SPINLOCK(sgx_reclaimer_lock);
> > >
> > > +/* This lock is held to prevent new EPC pages from being created
> > > + * during the execution of ENCLS[EUPDATESVN].
> > > + */
> > > +static DEFINE_SPINLOCK(sgx_epc_eupdatesvn_lock);
> > > +
> > >  static atomic_long_t sgx_nr_used_pages = ATOMIC_LONG_INIT(0);
> > >  static unsigned long sgx_nr_total_pages;
> > >
> > > @@ -457,7 +462,17 @@ static struct sgx_epc_page
> *__sgx_alloc_epc_page_from_node(int nid)
> > >  	page->flags = 0;
> > >
> > >  	spin_unlock(&node->lock);
> > > -	atomic_long_inc(&sgx_nr_used_pages);
> > > +
> > > +	if (!atomic_long_inc_not_zero(&sgx_nr_used_pages)) {
> > > +		spin_lock(&sgx_epc_eupdatesvn_lock);
> > > +		/* Only call sgx_updatesvn() once the first enclave's
> > > +		 * page is allocated from EPC
> > > +		 */
> > > +		if (atomic_long_read(&sgx_nr_used_pages) == 0)
> > > +			sgx_updatesvn();
> > > +		atomic_long_inc(&sgx_nr_used_pages);
> > > +		spin_unlock(&sgx_epc_eupdatesvn_lock);
> > > +	}
> > >
> > >  	return page;
> > >  }
> > > @@ -970,3 +985,49 @@ static int __init sgx_init(void)
> > >  }
> > >
> > >  device_initcall(sgx_init);
> > > +
> > > +/**
> > > + * sgx_updatesvn() - Issue ENCLS[EUPDATESVN]
> > > + * If EPC is ready, this instruction will update CPUSVN to the currently
> > > + * loaded microcode update SVN and generate new cryptographic assets.
> > > + */
> > > +void sgx_updatesvn(void)
> > > +{
> > > +	int retry = 10;
> > > +	int ret;
> > > +
> > > +	lockdep_assert_held(&sgx_epc_eupdatesvn_lock);
> > > +
> > > +	if (!(cpuid_eax(SGX_CPUID) & SGX_CPUID_EUPDATESVN))
> > > +		return;
> > > +
> > > +	/* Do not execute ENCLS[EUPDATESVN] if running in a VM since
> > > +	 * microcode updates are only meaningful to be applied on the host.
> > > +	 */
> > > +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
> > > +		return;
> > > +
> > > +	do {
> > > +		ret = __eupdatesvn();
> > > +		if (ret != SGX_INSUFFICIENT_ENTROPY)
> > > +			break;
> > > +
> > > +	} while (--retry);
> > > +
> > > +	switch (ret) {
> > > +	case 0:
> > > +		pr_info("EUPDATESVN: success\n");
> > > +		break;
> > > +	case SGX_EPC_NOT_READY:
> > > +	case SGX_INSUFFICIENT_ENTROPY:
> > > +	case SGX_EPC_PAGE_CONFLICT:
> > > +		pr_err("EUPDATESVN: error %d\n", ret);
> > > +		break;
> > > +	case SGX_NO_UPDATE:
> > > +		break;
> > > +	default:
> > > +		pr_err("EUPDATESVN: unknown error %d\n", ret);
> > > +		break;
> > > +	}
> >
> > Overall, I think you're right in that "inversion" does make sense,
> > now that other stuff is better aligned.
> >
> > At least when there is spurious error, I think ioctl's should stop
> > responding and driver should not do anything useful anymore. I.e.,
> > it should go out-of-service.
> >
> > I don't think the driver should tear-down, just stop servicing
> > VM's and responding ioctl's.
> >
> > Possibly thish should be also right action for other errors than
> > "insufficient entropy" but I'm open for comments for this.
> 
> Or actually actually I take one step back with my suggestions
> because this really should be a question for which I don't have
> the definitive answer.
> 
> The current code works like this: if anything that we don't
> like happens, we re-iterate.
> 
> Should some of the "exceptional conditions" have a different
> recovery or not?

None of these exceptional conditions are fatal or present an
immediate danger to the system security. So, allowing the re-tries
seems logical in this case. In case re-tries also fail, the system
admin will have an option of gracefully shutting down all enclaves
and doing either a full reboot (if SVN is the only concern) or other
necessary actions like taking the physical node out of use, etc. 

Does this sound reasonable? 

Best Regards,
Elena.

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Jarkko Sakkinen 10 months, 2 weeks ago

On Mon, Mar 31, 2025 at 07:26:45AM +0000, Reshetova, Elena wrote:
> > > > +	default:
> > > > +		pr_err("EUPDATESVN: unknown error %d\n", ret);
> > > > +		break;
> > > > +	}
> > >
> > > Overall, I think you're right in that "inversion" does make sense,
> > > now that other stuff is better aligned.
> > >
> > > At least when there is spurious error, I think ioctl's should stop
> > > responding and driver should not do anything useful anymore. I.e.,
> > > it should go out-of-service.
> > >
> > > I don't think the driver should tear-down, just stop servicing
> > > VM's and responding ioctl's.
> > >
> > > Possibly thish should be also right action for other errors than
> > > "insufficient entropy" but I'm open for comments for this.
> > 
> > Or actually actually I take one step back with my suggestions
> > because this really should be a question for which I don't have
> > the definitive answer.
> > 
> > The current code works like this: if anything that we don't
> > like happens, we re-iterate.
> > 
> > Should some of the "exceptional conditions" have a different
> > recovery or not?
> 
> None of these exceptional conditions are fatal or present an
> immediate danger to the system security. So, allowing the re-tries
> seems logical in this case. In case re-tries also fail, the system
> admin will have an option of gracefully shutting down all enclaves
> and doing either a full reboot (if SVN is the only concern) or other
> necessary actions like taking the physical node out of use, etc. 
> 
> Does this sound reasonable? 

Uknown error I don't think would hold that premise.

> 
> Best Regards,
> Elena.
> 

BR, Jarkko
>

RE: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Reshetova, Elena 10 months, 2 weeks ago

> On Mon, Mar 31, 2025 at 07:26:45AM +0000, Reshetova, Elena wrote:
> > > > > +	default:
> > > > > +		pr_err("EUPDATESVN: unknown error %d\n", ret);
> > > > > +		break;
> > > > > +	}
> > > >
> > > > Overall, I think you're right in that "inversion" does make sense,
> > > > now that other stuff is better aligned.
> > > >
> > > > At least when there is spurious error, I think ioctl's should stop
> > > > responding and driver should not do anything useful anymore. I.e.,
> > > > it should go out-of-service.
> > > >
> > > > I don't think the driver should tear-down, just stop servicing
> > > > VM's and responding ioctl's.
> > > >
> > > > Possibly thish should be also right action for other errors than
> > > > "insufficient entropy" but I'm open for comments for this.
> > >
> > > Or actually actually I take one step back with my suggestions
> > > because this really should be a question for which I don't have
> > > the definitive answer.
> > >
> > > The current code works like this: if anything that we don't
> > > like happens, we re-iterate.
> > >
> > > Should some of the "exceptional conditions" have a different
> > > recovery or not?
> >
> > None of these exceptional conditions are fatal or present an
> > immediate danger to the system security. So, allowing the re-tries
> > seems logical in this case. In case re-tries also fail, the system
> > admin will have an option of gracefully shutting down all enclaves
> > and doing either a full reboot (if SVN is the only concern) or other
> > necessary actions like taking the physical node out of use, etc.
> >
> > Does this sound reasonable?
> 
> Uknown error I don't think would hold that premise.

True, unknown is an unknown ))
But unknown errors should not happen (per SGX spec), and the
current SGX kernel code does not handle such errors in any other way
than notifying that operation failed for other ENCLS leaves. So, I don't
see why ENCLS[EUPDATESVN] should be different from existing behaviour?

Best Regards,
Elena.

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Jarkko Sakkinen 10 months, 1 week ago

On Tue, Apr 01, 2025 at 09:35:33AM +0000, Reshetova, Elena wrote:
> > > None of these exceptional conditions are fatal or present an
> > > immediate danger to the system security. So, allowing the re-tries
> > > seems logical in this case. In case re-tries also fail, the system
> > > admin will have an option of gracefully shutting down all enclaves
> > > and doing either a full reboot (if SVN is the only concern) or other
> > > necessary actions like taking the physical node out of use, etc.
> > >
> > > Does this sound reasonable?
> > 
> > Uknown error I don't think would hold that premise.
> 
> True, unknown is an unknown ))
> But unknown errors should not happen (per SGX spec), and the

Thus if for some reason unknown error code would be returned something
would be horribly wrong (e.g. bad emulation of the opcode or who knows
what) and thus it would make sense disable the driver if this happens.

Or maybe even BUG_ON() in this situation?

> current SGX kernel code does not handle such errors in any other way
> than notifying that operation failed for other ENCLS leaves. So, I don't
> see why ENCLS[EUPDATESVN] should be different from existing behaviour?

While not disagreeing fully (it depends on call site), in some
situations it is more difficult to take more preventive actions.

This is a situation where we know that there are *zero* EPC pages in
traffic so it is relatively easy to stop the madness, isn't it?

I guess the best action would be make sgx_alloc_epc_page() return
consistently -ENOMEM, if the unexpected happens.

/* <- this
 * Do not execute ENCLS[EUPDATESVN] if running in a VM since
 * microcode updates are only meaningful to be applied on the host.
 */

According to https://www.kernel.org/doc/Documentation/kernel-doc-nano-HOWTO.txt

> 
> Best Regards,
> Elena.
> 
> 

BR, Jarkko

RE: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Reshetova, Elena 10 months, 1 week ago

> On Tue, Apr 01, 2025 at 09:35:33AM +0000, Reshetova, Elena wrote:
> > > > None of these exceptional conditions are fatal or present an
> > > > immediate danger to the system security. So, allowing the re-tries
> > > > seems logical in this case. In case re-tries also fail, the system
> > > > admin will have an option of gracefully shutting down all enclaves
> > > > and doing either a full reboot (if SVN is the only concern) or other
> > > > necessary actions like taking the physical node out of use, etc.
> > > >
> > > > Does this sound reasonable?
> > >
> > > Uknown error I don't think would hold that premise.
> >
> > True, unknown is an unknown ))
> > But unknown errors should not happen (per SGX spec), and the
> 
> Thus if for some reason unknown error code would be returned something
> would be horribly wrong (e.g. bad emulation of the opcode or who knows
> what) and thus it would make sense disable the driver if this happens.

Again, this is just unknown code with regards to this operation, EUDAPSTESVN,
and while yes it should not happen, there is no indicator that smth else is
definitely broken with the exception of EUPDATESVN functionality. 

> 
> Or maybe even BUG_ON() in this situation?

I think there is a high bar in the kernel for using BUG_ON() and broken
SGX code is likely not reaching this bar: the rest of kernel is definitely ok
in this situation so at max it should be WARN_ON(). 

> 
> > current SGX kernel code does not handle such errors in any other way
> > than notifying that operation failed for other ENCLS leaves. So, I don't
> > see why ENCLS[EUPDATESVN] should be different from existing behaviour?
> 
> While not disagreeing fully (it depends on call site), in some
> situations it is more difficult to take more preventive actions.
> 
> This is a situation where we know that there are *zero* EPC pages in
> traffic so it is relatively easy to stop the madness, isn't it?
> 
> I guess the best action would be make sgx_alloc_epc_page() return
> consistently -ENOMEM, if the unexpected happens.

But this would be very misleading imo. We do have memory, even page
allocation might function as normal in EPC, the only thing that is broken
can be EUPDATESVN functionality. Returning -ENOMEM in this case seems
wrong.

> 
> /* <- this
>  * Do not execute ENCLS[EUPDATESVN] if running in a VM since
>  * microcode updates are only meaningful to be applied on the host.
>  */
> 
> According to https://www.kernel.org/doc/Documentation/kernel-doc-nano-
> HOWTO.txt

Sure, will fix in the next version. Thanks for catching!

Best Regards,
Elena.


> 
> >
> > Best Regards,
> > Elena.
> >
> >
> 
> BR, Jarkko

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Jarkko Sakkinen 10 months, 1 week ago

On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena wrote:
> > > current SGX kernel code does not handle such errors in any other way
> > > than notifying that operation failed for other ENCLS leaves. So, I don't
> > > see why ENCLS[EUPDATESVN] should be different from existing behaviour?
> > 
> > While not disagreeing fully (it depends on call site), in some
> > situations it is more difficult to take more preventive actions.
> > 
> > This is a situation where we know that there are *zero* EPC pages in
> > traffic so it is relatively easy to stop the madness, isn't it?
> > 
> > I guess the best action would be make sgx_alloc_epc_page() return
> > consistently -ENOMEM, if the unexpected happens.
> 
> But this would be very misleading imo. We do have memory, even page
> allocation might function as normal in EPC, the only thing that is broken
> can be EUPDATESVN functionality. Returning -ENOMEM in this case seems
> wrong.

This makes it not misleading at all:

	pr_err("EUPDATESVN: unknown error %d\n", ret);

Since hardware should never return this, it indicates a kernel bug.

BR, Jarkko

RE: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Reshetova, Elena 10 months, 1 week ago

> On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena wrote:
> > > > current SGX kernel code does not handle such errors in any other way
> > > > than notifying that operation failed for other ENCLS leaves. So, I don't
> > > > see why ENCLS[EUPDATESVN] should be different from existing
> behaviour?
> > >
> > > While not disagreeing fully (it depends on call site), in some
> > > situations it is more difficult to take more preventive actions.
> > >
> > > This is a situation where we know that there are *zero* EPC pages in
> > > traffic so it is relatively easy to stop the madness, isn't it?
> > >
> > > I guess the best action would be make sgx_alloc_epc_page() return
> > > consistently -ENOMEM, if the unexpected happens.
> >
> > But this would be very misleading imo. We do have memory, even page
> > allocation might function as normal in EPC, the only thing that is broken
> > can be EUPDATESVN functionality. Returning -ENOMEM in this case seems
> > wrong.
> 
> This makes it not misleading at all:
> 
> 	pr_err("EUPDATESVN: unknown error %d\n", ret);
> 
> Since hardware should never return this, it indicates a kernel bug.

OK, so you propose in this case to print the above message, sgx_updatesvn
returning an error, and then NULL from __sgx_alloc_epc_page_from_node and
the __sgx_alloc_epc_page returning -ENOMEM after an iteration over
a whole set of numa nodes given that we will keep getting the unknown error
on each node upon trying to do an allocation from each one?

Best Regards,
Elena.

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Jarkko Sakkinen 10 months, 1 week ago

On Fri, Apr 04, 2025 at 06:53:17AM +0000, Reshetova, Elena wrote:
> > On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena wrote:
> > > > > current SGX kernel code does not handle such errors in any other way
> > > > > than notifying that operation failed for other ENCLS leaves. So, I don't
> > > > > see why ENCLS[EUPDATESVN] should be different from existing
> > behaviour?
> > > >
> > > > While not disagreeing fully (it depends on call site), in some
> > > > situations it is more difficult to take more preventive actions.
> > > >
> > > > This is a situation where we know that there are *zero* EPC pages in
> > > > traffic so it is relatively easy to stop the madness, isn't it?
> > > >
> > > > I guess the best action would be make sgx_alloc_epc_page() return
> > > > consistently -ENOMEM, if the unexpected happens.
> > >
> > > But this would be very misleading imo. We do have memory, even page
> > > allocation might function as normal in EPC, the only thing that is broken
> > > can be EUPDATESVN functionality. Returning -ENOMEM in this case seems
> > > wrong.
> > 
> > This makes it not misleading at all:
> > 
> > 	pr_err("EUPDATESVN: unknown error %d\n", ret);
> > 
> > Since hardware should never return this, it indicates a kernel bug.
> 
> OK, so you propose in this case to print the above message, sgx_updatesvn
> returning an error, and then NULL from __sgx_alloc_epc_page_from_node and
> the __sgx_alloc_epc_page returning -ENOMEM after an iteration over
> a whole set of numa nodes given that we will keep getting the unknown error
> on each node upon trying to do an allocation from each one?

I'd disable ioctl's in this case and return -ENOMEM. It's a cheap sanity
check. Should not ever happen, but if e.g., a new kernel patch breaks
anything, it could help catching issues.

We are talking here about situation that is never expected to happen so I
don't think it is too heavy hammer here. Here it makes sense because not
much effort is required to implement the counter-measures.

> 
> Best Regards,
> Elena. 
> 
> 
> 

BR, Jarkko

RE: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Reshetova, Elena 10 months, 1 week ago

> On Fri, Apr 04, 2025 at 06:53:17AM +0000, Reshetova, Elena wrote:
> > > On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena wrote:
> > > > > > current SGX kernel code does not handle such errors in any other
> way
> > > > > > than notifying that operation failed for other ENCLS leaves. So, I don't
> > > > > > see why ENCLS[EUPDATESVN] should be different from existing
> > > behaviour?
> > > > >
> > > > > While not disagreeing fully (it depends on call site), in some
> > > > > situations it is more difficult to take more preventive actions.
> > > > >
> > > > > This is a situation where we know that there are *zero* EPC pages in
> > > > > traffic so it is relatively easy to stop the madness, isn't it?
> > > > >
> > > > > I guess the best action would be make sgx_alloc_epc_page() return
> > > > > consistently -ENOMEM, if the unexpected happens.
> > > >
> > > > But this would be very misleading imo. We do have memory, even page
> > > > allocation might function as normal in EPC, the only thing that is broken
> > > > can be EUPDATESVN functionality. Returning -ENOMEM in this case
> seems
> > > > wrong.
> > >
> > > This makes it not misleading at all:
> > >
> > > 	pr_err("EUPDATESVN: unknown error %d\n", ret);
> > >
> > > Since hardware should never return this, it indicates a kernel bug.
> >
> > OK, so you propose in this case to print the above message, sgx_updatesvn
> > returning an error, and then NULL from __sgx_alloc_epc_page_from_node
> and
> > the __sgx_alloc_epc_page returning -ENOMEM after an iteration over
> > a whole set of numa nodes given that we will keep getting the unknown
> error
> > on each node upon trying to do an allocation from each one?
> 
> I'd disable ioctl's in this case and return -ENOMEM. It's a cheap sanity
> check. Should not ever happen, but if e.g., a new kernel patch breaks
> anything, it could help catching issues.
> 
> We are talking here about situation that is never expected to happen so I
> don't think it is too heavy hammer here. Here it makes sense because not
> much effort is required to implement the counter-measures.

OK, but does it really make sense to explicitly disable ioctls? 
Note that everything *in practice* will be disabled simply because not a single page
anymore can be allocated from EPC since we are getting -ENOMEM on EPC
page allocation. Also, note that any approach we chose should be symmetrical
to SGX virtualization side also, which doesn’t use ioctls at all. Simply returning
-ENOMEM for page allocation in EPC seems like a correct symmetrical solution
that would work for both nativel enclaves and EPC pages allocated for VMs.
And nothing would  be able to proceed creating/managing enclaves at this point. 

Best Regards,
Elena.

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Huang, Kai 10 months, 1 week ago

On Mon, 2025-04-07 at 08:23 +0000, Reshetova, Elena wrote:
> > On Fri, Apr 04, 2025 at 06:53:17AM +0000, Reshetova, Elena wrote:
> > > > On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena wrote:
> > > > > > > current SGX kernel code does not handle such errors in any other
> > way
> > > > > > > than notifying that operation failed for other ENCLS leaves. So, I don't
> > > > > > > see why ENCLS[EUPDATESVN] should be different from existing
> > > > behaviour?
> > > > > > 
> > > > > > While not disagreeing fully (it depends on call site), in some
> > > > > > situations it is more difficult to take more preventive actions.
> > > > > > 
> > > > > > This is a situation where we know that there are *zero* EPC pages in
> > > > > > traffic so it is relatively easy to stop the madness, isn't it?
> > > > > > 
> > > > > > I guess the best action would be make sgx_alloc_epc_page() return
> > > > > > consistently -ENOMEM, if the unexpected happens.
> > > > > 
> > > > > But this would be very misleading imo. We do have memory, even page
> > > > > allocation might function as normal in EPC, the only thing that is broken
> > > > > can be EUPDATESVN functionality. Returning -ENOMEM in this case
> > seems
> > > > > wrong.
> > > > 
> > > > This makes it not misleading at all:
> > > > 
> > > > 	pr_err("EUPDATESVN: unknown error %d\n", ret);
> > > > 
> > > > Since hardware should never return this, it indicates a kernel bug.
> > > 
> > > OK, so you propose in this case to print the above message, sgx_updatesvn
> > > returning an error, and then NULL from __sgx_alloc_epc_page_from_node
> > and
> > > the __sgx_alloc_epc_page returning -ENOMEM after an iteration over
> > > a whole set of numa nodes given that we will keep getting the unknown
> > error
> > > on each node upon trying to do an allocation from each one?
> > 
> > I'd disable ioctl's in this case and return -ENOMEM. It's a cheap sanity
> > check. Should not ever happen, but if e.g., a new kernel patch breaks
> > anything, it could help catching issues.
> > 
> > We are talking here about situation that is never expected to happen so I
> > don't think it is too heavy hammer here. Here it makes sense because not
> > much effort is required to implement the counter-measures.
> 
> OK, but does it really make sense to explicitly disable ioctls? 
> Note that everything *in practice* will be disabled simply because not a single page
> anymore can be allocated from EPC since we are getting -ENOMEM on EPC
> page allocation. Also, note that any approach we chose should be symmetrical
> to SGX virtualization side also, which doesn’t use ioctls at all. Simply returning
> -ENOMEM for page allocation in EPC seems like a correct symmetrical solution
> that would work for both nativel enclaves and EPC pages allocated for VMs.
> And nothing would  be able to proceed creating/managing enclaves at this point. 
> 

Right, failing ioctls() doesn't cover SGX virtualization.  If we ever want to
fail, we should fail the EPC allocation.

Btw, for the unknown error, and any other errors which should not happen,
couldn't we use the ENCLS_WARN()?  AFAICT there are already cases that we are
using ENCLS_WARN() for those "impossible-to-happen-errors".

E.g., in __sgx_encl_extend():

	        ret = __eextend(sgx_get_epc_virt_addr(encl->secs.epc_page),
                                sgx_get_epc_virt_addr(epc_page) + offset);
                if (ret) {
                        if (encls_failed(ret))
                                ENCLS_WARN(ret, "EEXTEND");
   
                        return -EIO;
                }

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Jarkko Sakkinen 10 months, 1 week ago

On Tue, Apr 08, 2025 at 12:06:32AM +0000, Huang, Kai wrote:
> On Mon, 2025-04-07 at 08:23 +0000, Reshetova, Elena wrote:
> > > On Fri, Apr 04, 2025 at 06:53:17AM +0000, Reshetova, Elena wrote:
> > > > > On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena wrote:
> > > > > > > > current SGX kernel code does not handle such errors in any other
> > > way
> > > > > > > > than notifying that operation failed for other ENCLS leaves. So, I don't
> > > > > > > > see why ENCLS[EUPDATESVN] should be different from existing
> > > > > behaviour?
> > > > > > > 
> > > > > > > While not disagreeing fully (it depends on call site), in some
> > > > > > > situations it is more difficult to take more preventive actions.
> > > > > > > 
> > > > > > > This is a situation where we know that there are *zero* EPC pages in
> > > > > > > traffic so it is relatively easy to stop the madness, isn't it?
> > > > > > > 
> > > > > > > I guess the best action would be make sgx_alloc_epc_page() return
> > > > > > > consistently -ENOMEM, if the unexpected happens.
> > > > > > 
> > > > > > But this would be very misleading imo. We do have memory, even page
> > > > > > allocation might function as normal in EPC, the only thing that is broken
> > > > > > can be EUPDATESVN functionality. Returning -ENOMEM in this case
> > > seems
> > > > > > wrong.
> > > > > 
> > > > > This makes it not misleading at all:
> > > > > 
> > > > > 	pr_err("EUPDATESVN: unknown error %d\n", ret);
> > > > > 
> > > > > Since hardware should never return this, it indicates a kernel bug.
> > > > 
> > > > OK, so you propose in this case to print the above message, sgx_updatesvn
> > > > returning an error, and then NULL from __sgx_alloc_epc_page_from_node
> > > and
> > > > the __sgx_alloc_epc_page returning -ENOMEM after an iteration over
> > > > a whole set of numa nodes given that we will keep getting the unknown
> > > error
> > > > on each node upon trying to do an allocation from each one?
> > > 
> > > I'd disable ioctl's in this case and return -ENOMEM. It's a cheap sanity
> > > check. Should not ever happen, but if e.g., a new kernel patch breaks
> > > anything, it could help catching issues.
> > > 
> > > We are talking here about situation that is never expected to happen so I
> > > don't think it is too heavy hammer here. Here it makes sense because not
> > > much effort is required to implement the counter-measures.
> > 
> > OK, but does it really make sense to explicitly disable ioctls? 
> > Note that everything *in practice* will be disabled simply because not a single page
> > anymore can be allocated from EPC since we are getting -ENOMEM on EPC
> > page allocation. Also, note that any approach we chose should be symmetrical
> > to SGX virtualization side also, which doesn´t use ioctls at all. Simply returning
> > -ENOMEM for page allocation in EPC seems like a correct symmetrical solution
> > that would work for both nativel enclaves and EPC pages allocated for VMs.
> > And nothing would  be able to proceed creating/managing enclaves at this point. 
> > 
> 
> Right, failing ioctls() doesn't cover SGX virtualization.  If we ever want to
> fail, we should fail the EPC allocation.

"I guess the best action would be make sgx_alloc_epc_page() return
 consistently -ENOMEM, if the unexpected happens." -me

> 
> Btw, for the unknown error, and any other errors which should not happen,
> couldn't we use the ENCLS_WARN()?  AFAICT there are already cases that we are
> using ENCLS_WARN() for those "impossible-to-happen-errors".
> 
> E.g., in __sgx_encl_extend():
> 
> 	        ret = __eextend(sgx_get_epc_virt_addr(encl->secs.epc_page),
>                                 sgx_get_epc_virt_addr(epc_page) + offset);
>                 if (ret) {
>                         if (encls_failed(ret))
>                                 ENCLS_WARN(ret, "EEXTEND");
>    
>                         return -EIO;
>                 }

BR, Jarkko

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Jarkko Sakkinen 10 months, 1 week ago

On Tue, Apr 08, 2025 at 09:40:14AM +0300, Jarkko Sakkinen wrote:
> On Tue, Apr 08, 2025 at 12:06:32AM +0000, Huang, Kai wrote:
> > On Mon, 2025-04-07 at 08:23 +0000, Reshetova, Elena wrote:
> > > > On Fri, Apr 04, 2025 at 06:53:17AM +0000, Reshetova, Elena wrote:
> > > > > > On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena wrote:
> > > > > > > > > current SGX kernel code does not handle such errors in any other
> > > > way
> > > > > > > > > than notifying that operation failed for other ENCLS leaves. So, I don't
> > > > > > > > > see why ENCLS[EUPDATESVN] should be different from existing
> > > > > > behaviour?
> > > > > > > > 
> > > > > > > > While not disagreeing fully (it depends on call site), in some
> > > > > > > > situations it is more difficult to take more preventive actions.
> > > > > > > > 
> > > > > > > > This is a situation where we know that there are *zero* EPC pages in
> > > > > > > > traffic so it is relatively easy to stop the madness, isn't it?
> > > > > > > > 
> > > > > > > > I guess the best action would be make sgx_alloc_epc_page() return
> > > > > > > > consistently -ENOMEM, if the unexpected happens.
> > > > > > > 
> > > > > > > But this would be very misleading imo. We do have memory, even page
> > > > > > > allocation might function as normal in EPC, the only thing that is broken
> > > > > > > can be EUPDATESVN functionality. Returning -ENOMEM in this case
> > > > seems
> > > > > > > wrong.
> > > > > > 
> > > > > > This makes it not misleading at all:
> > > > > > 
> > > > > > 	pr_err("EUPDATESVN: unknown error %d\n", ret);
> > > > > > 
> > > > > > Since hardware should never return this, it indicates a kernel bug.
> > > > > 
> > > > > OK, so you propose in this case to print the above message, sgx_updatesvn
> > > > > returning an error, and then NULL from __sgx_alloc_epc_page_from_node
> > > > and
> > > > > the __sgx_alloc_epc_page returning -ENOMEM after an iteration over
> > > > > a whole set of numa nodes given that we will keep getting the unknown
> > > > error
> > > > > on each node upon trying to do an allocation from each one?
> > > > 
> > > > I'd disable ioctl's in this case and return -ENOMEM. It's a cheap sanity
> > > > check. Should not ever happen, but if e.g., a new kernel patch breaks
> > > > anything, it could help catching issues.
> > > > 
> > > > We are talking here about situation that is never expected to happen so I
> > > > don't think it is too heavy hammer here. Here it makes sense because not
> > > > much effort is required to implement the counter-measures.
> > > 
> > > OK, but does it really make sense to explicitly disable ioctls? 
> > > Note that everything *in practice* will be disabled simply because not a single page
> > > anymore can be allocated from EPC since we are getting -ENOMEM on EPC
> > > page allocation. Also, note that any approach we chose should be symmetrical
> > > to SGX virtualization side also, which doesn´t use ioctls at all. Simply returning
> > > -ENOMEM for page allocation in EPC seems like a correct symmetrical solution
> > > that would work for both nativel enclaves and EPC pages allocated for VMs.
> > > And nothing would  be able to proceed creating/managing enclaves at this point. 
> > > 
> > 
> > Right, failing ioctls() doesn't cover SGX virtualization.  If we ever want to
> > fail, we should fail the EPC allocation.
> 
> "I guess the best action would be make sgx_alloc_epc_page() return
>  consistently -ENOMEM, if the unexpected happens." -me
> 
> > 
> > Btw, for the unknown error, and any other errors which should not happen,
> > couldn't we use the ENCLS_WARN()?  AFAICT there are already cases that we are
> > using ENCLS_WARN() for those "impossible-to-happen-errors".

Sorry forgot to response this. I don't have anything against this but at
minimum disabling allocation should be combined with it (in case kernel
command-line does not have oops_on_warn or whatever the option was
called).

> > 
> > E.g., in __sgx_encl_extend():
> > 
> > 	        ret = __eextend(sgx_get_epc_virt_addr(encl->secs.epc_page),
> >                                 sgx_get_epc_virt_addr(epc_page) + offset);
> >                 if (ret) {
> >                         if (encls_failed(ret))
> >                                 ENCLS_WARN(ret, "EEXTEND");
> >    
> >                         return -EIO;
> >                 }
> 
> BR, Jarkko
> 

BR, Jarkko

RE: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Reshetova, Elena 10 months, 1 week ago

> 
> On Tue, Apr 08, 2025 at 09:40:14AM +0300, Jarkko Sakkinen wrote:
> > On Tue, Apr 08, 2025 at 12:06:32AM +0000, Huang, Kai wrote:
> > > On Mon, 2025-04-07 at 08:23 +0000, Reshetova, Elena wrote:
> > > > > On Fri, Apr 04, 2025 at 06:53:17AM +0000, Reshetova, Elena wrote:
> > > > > > > On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena
> wrote:
> > > > > > > > > > current SGX kernel code does not handle such errors in any
> other
> > > > > way
> > > > > > > > > > than notifying that operation failed for other ENCLS leaves. So,
> I don't
> > > > > > > > > > see why ENCLS[EUPDATESVN] should be different from
> existing
> > > > > > > behaviour?
> > > > > > > > >
> > > > > > > > > While not disagreeing fully (it depends on call site), in some
> > > > > > > > > situations it is more difficult to take more preventive actions.
> > > > > > > > >
> > > > > > > > > This is a situation where we know that there are *zero* EPC
> pages in
> > > > > > > > > traffic so it is relatively easy to stop the madness, isn't it?
> > > > > > > > >
> > > > > > > > > I guess the best action would be make sgx_alloc_epc_page()
> return
> > > > > > > > > consistently -ENOMEM, if the unexpected happens.
> > > > > > > >
> > > > > > > > But this would be very misleading imo. We do have memory,
> even page
> > > > > > > > allocation might function as normal in EPC, the only thing that is
> broken
> > > > > > > > can be EUPDATESVN functionality. Returning -ENOMEM in this
> case
> > > > > seems
> > > > > > > > wrong.
> > > > > > >
> > > > > > > This makes it not misleading at all:
> > > > > > >
> > > > > > > 	pr_err("EUPDATESVN: unknown error %d\n", ret);
> > > > > > >
> > > > > > > Since hardware should never return this, it indicates a kernel bug.
> > > > > >
> > > > > > OK, so you propose in this case to print the above message,
> sgx_updatesvn
> > > > > > returning an error, and then NULL from
> __sgx_alloc_epc_page_from_node
> > > > > and
> > > > > > the __sgx_alloc_epc_page returning -ENOMEM after an iteration
> over
> > > > > > a whole set of numa nodes given that we will keep getting the
> unknown
> > > > > error
> > > > > > on each node upon trying to do an allocation from each one?
> > > > >
> > > > > I'd disable ioctl's in this case and return -ENOMEM. It's a cheap sanity
> > > > > check. Should not ever happen, but if e.g., a new kernel patch breaks
> > > > > anything, it could help catching issues.
> > > > >
> > > > > We are talking here about situation that is never expected to happen
> so I
> > > > > don't think it is too heavy hammer here. Here it makes sense because
> not
> > > > > much effort is required to implement the counter-measures.
> > > >
> > > > OK, but does it really make sense to explicitly disable ioctls?
> > > > Note that everything *in practice* will be disabled simply because not a
> single page
> > > > anymore can be allocated from EPC since we are getting -ENOMEM on
> EPC
> > > > page allocation. Also, note that any approach we chose should be
> symmetrical
> > > > to SGX virtualization side also, which doesn´t use ioctls at all. Simply
> returning
> > > > -ENOMEM for page allocation in EPC seems like a correct symmetrical
> solution
> > > > that would work for both nativel enclaves and EPC pages allocated for
> VMs.
> > > > And nothing would  be able to proceed creating/managing enclaves at
> this point.
> > > >
> > >
> > > Right, failing ioctls() doesn't cover SGX virtualization.  If we ever want to
> > > fail, we should fail the EPC allocation.
> >
> > "I guess the best action would be make sgx_alloc_epc_page() return
> >  consistently -ENOMEM, if the unexpected happens." -me
> >
> > >
> > > Btw, for the unknown error, and any other errors which should not
> happen,
> > > couldn't we use the ENCLS_WARN()?  AFAICT there are already cases that
> we are
> > > using ENCLS_WARN() for those "impossible-to-happen-errors".

Ok, so to summarise the approach I will be sending in the next version:

In case unknown error returns, issue ENCLS_WARN (uses WARN_ON underneath)
and return -ENOMEM from EPC page allocation. No other explicit ioctl disabling needed
since nothing can proceed anyhow if we cannot allocate a page from EPC.

Does this sound right? 

Best Regards,
Elena.

Re: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Jarkko Sakkinen 10 months ago

On Tue, Apr 08, 2025 at 06:54:14AM +0000, Reshetova, Elena wrote:
> > 
> > On Tue, Apr 08, 2025 at 09:40:14AM +0300, Jarkko Sakkinen wrote:
> > > On Tue, Apr 08, 2025 at 12:06:32AM +0000, Huang, Kai wrote:
> > > > On Mon, 2025-04-07 at 08:23 +0000, Reshetova, Elena wrote:
> > > > > > On Fri, Apr 04, 2025 at 06:53:17AM +0000, Reshetova, Elena wrote:
> > > > > > > > On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena
> > wrote:
> > > > > > > > > > > current SGX kernel code does not handle such errors in any
> > other
> > > > > > way
> > > > > > > > > > > than notifying that operation failed for other ENCLS leaves. So,
> > I don't
> > > > > > > > > > > see why ENCLS[EUPDATESVN] should be different from
> > existing
> > > > > > > > behaviour?
> > > > > > > > > >
> > > > > > > > > > While not disagreeing fully (it depends on call site), in some
> > > > > > > > > > situations it is more difficult to take more preventive actions.
> > > > > > > > > >
> > > > > > > > > > This is a situation where we know that there are *zero* EPC
> > pages in
> > > > > > > > > > traffic so it is relatively easy to stop the madness, isn't it?
> > > > > > > > > >
> > > > > > > > > > I guess the best action would be make sgx_alloc_epc_page()
> > return
> > > > > > > > > > consistently -ENOMEM, if the unexpected happens.
> > > > > > > > >
> > > > > > > > > But this would be very misleading imo. We do have memory,
> > even page
> > > > > > > > > allocation might function as normal in EPC, the only thing that is
> > broken
> > > > > > > > > can be EUPDATESVN functionality. Returning -ENOMEM in this
> > case
> > > > > > seems
> > > > > > > > > wrong.
> > > > > > > >
> > > > > > > > This makes it not misleading at all:
> > > > > > > >
> > > > > > > > 	pr_err("EUPDATESVN: unknown error %d\n", ret);
> > > > > > > >
> > > > > > > > Since hardware should never return this, it indicates a kernel bug.
> > > > > > >
> > > > > > > OK, so you propose in this case to print the above message,
> > sgx_updatesvn
> > > > > > > returning an error, and then NULL from
> > __sgx_alloc_epc_page_from_node
> > > > > > and
> > > > > > > the __sgx_alloc_epc_page returning -ENOMEM after an iteration
> > over
> > > > > > > a whole set of numa nodes given that we will keep getting the
> > unknown
> > > > > > error
> > > > > > > on each node upon trying to do an allocation from each one?
> > > > > >
> > > > > > I'd disable ioctl's in this case and return -ENOMEM. It's a cheap sanity
> > > > > > check. Should not ever happen, but if e.g., a new kernel patch breaks
> > > > > > anything, it could help catching issues.
> > > > > >
> > > > > > We are talking here about situation that is never expected to happen
> > so I
> > > > > > don't think it is too heavy hammer here. Here it makes sense because
> > not
> > > > > > much effort is required to implement the counter-measures.
> > > > >
> > > > > OK, but does it really make sense to explicitly disable ioctls?
> > > > > Note that everything *in practice* will be disabled simply because not a
> > single page
> > > > > anymore can be allocated from EPC since we are getting -ENOMEM on
> > EPC
> > > > > page allocation. Also, note that any approach we chose should be
> > symmetrical
> > > > > to SGX virtualization side also, which doesn´t use ioctls at all. Simply
> > returning
> > > > > -ENOMEM for page allocation in EPC seems like a correct symmetrical
> > solution
> > > > > that would work for both nativel enclaves and EPC pages allocated for
> > VMs.
> > > > > And nothing would  be able to proceed creating/managing enclaves at
> > this point.
> > > > >
> > > >
> > > > Right, failing ioctls() doesn't cover SGX virtualization.  If we ever want to
> > > > fail, we should fail the EPC allocation.
> > >
> > > "I guess the best action would be make sgx_alloc_epc_page() return
> > >  consistently -ENOMEM, if the unexpected happens." -me
> > >
> > > >
> > > > Btw, for the unknown error, and any other errors which should not
> > happen,
> > > > couldn't we use the ENCLS_WARN()?  AFAICT there are already cases that
> > we are
> > > > using ENCLS_WARN() for those "impossible-to-happen-errors".
> 
> Ok, so to summarise the approach I will be sending in the next version:
> 
> In case unknown error returns, issue ENCLS_WARN (uses WARN_ON underneath)
> and return -ENOMEM from EPC page allocation. No other explicit ioctl disabling needed
> since nothing can proceed anyhow if we cannot allocate a page from EPC.
> 
> Does this sound right? 

I think it should be sufficient (not a review tho).

> 
> Best Regards,
> Elena.

BR, Jarkko

RE: [PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc

Posted by Reshetova, Elena 10 months ago

> 
> On Tue, Apr 08, 2025 at 06:54:14AM +0000, Reshetova, Elena wrote:
> > >
> > > On Tue, Apr 08, 2025 at 09:40:14AM +0300, Jarkko Sakkinen wrote:
> > > > On Tue, Apr 08, 2025 at 12:06:32AM +0000, Huang, Kai wrote:
> > > > > On Mon, 2025-04-07 at 08:23 +0000, Reshetova, Elena wrote:
> > > > > > > On Fri, Apr 04, 2025 at 06:53:17AM +0000, Reshetova, Elena wrote:
> > > > > > > > > On Wed, Apr 02, 2025 at 01:11:25PM +0000, Reshetova, Elena
> > > wrote:
> > > > > > > > > > > > current SGX kernel code does not handle such errors in
> any
> > > other
> > > > > > > way
> > > > > > > > > > > > than notifying that operation failed for other ENCLS
> leaves. So,
> > > I don't
> > > > > > > > > > > > see why ENCLS[EUPDATESVN] should be different from
> > > existing
> > > > > > > > > behaviour?
> > > > > > > > > > >
> > > > > > > > > > > While not disagreeing fully (it depends on call site), in some
> > > > > > > > > > > situations it is more difficult to take more preventive
> actions.
> > > > > > > > > > >
> > > > > > > > > > > This is a situation where we know that there are *zero* EPC
> > > pages in
> > > > > > > > > > > traffic so it is relatively easy to stop the madness, isn't it?
> > > > > > > > > > >
> > > > > > > > > > > I guess the best action would be make sgx_alloc_epc_page()
> > > return
> > > > > > > > > > > consistently -ENOMEM, if the unexpected happens.
> > > > > > > > > >
> > > > > > > > > > But this would be very misleading imo. We do have memory,
> > > even page
> > > > > > > > > > allocation might function as normal in EPC, the only thing that
> is
> > > broken
> > > > > > > > > > can be EUPDATESVN functionality. Returning -ENOMEM in
> this
> > > case
> > > > > > > seems
> > > > > > > > > > wrong.
> > > > > > > > >
> > > > > > > > > This makes it not misleading at all:
> > > > > > > > >
> > > > > > > > > 	pr_err("EUPDATESVN: unknown error %d\n", ret);
> > > > > > > > >
> > > > > > > > > Since hardware should never return this, it indicates a kernel
> bug.
> > > > > > > >
> > > > > > > > OK, so you propose in this case to print the above message,
> > > sgx_updatesvn
> > > > > > > > returning an error, and then NULL from
> > > __sgx_alloc_epc_page_from_node
> > > > > > > and
> > > > > > > > the __sgx_alloc_epc_page returning -ENOMEM after an iteration
> > > over
> > > > > > > > a whole set of numa nodes given that we will keep getting the
> > > unknown
> > > > > > > error
> > > > > > > > on each node upon trying to do an allocation from each one?
> > > > > > >
> > > > > > > I'd disable ioctl's in this case and return -ENOMEM. It's a cheap
> sanity
> > > > > > > check. Should not ever happen, but if e.g., a new kernel patch
> breaks
> > > > > > > anything, it could help catching issues.
> > > > > > >
> > > > > > > We are talking here about situation that is never expected to
> happen
> > > so I
> > > > > > > don't think it is too heavy hammer here. Here it makes sense
> because
> > > not
> > > > > > > much effort is required to implement the counter-measures.
> > > > > >
> > > > > > OK, but does it really make sense to explicitly disable ioctls?
> > > > > > Note that everything *in practice* will be disabled simply because
> not a
> > > single page
> > > > > > anymore can be allocated from EPC since we are getting -ENOMEM
> on
> > > EPC
> > > > > > page allocation. Also, note that any approach we chose should be
> > > symmetrical
> > > > > > to SGX virtualization side also, which doesn´t use ioctls at all. Simply
> > > returning
> > > > > > -ENOMEM for page allocation in EPC seems like a correct
> symmetrical
> > > solution
> > > > > > that would work for both nativel enclaves and EPC pages allocated
> for
> > > VMs.
> > > > > > And nothing would  be able to proceed creating/managing enclaves
> at
> > > this point.
> > > > > >
> > > > >
> > > > > Right, failing ioctls() doesn't cover SGX virtualization.  If we ever want
> to
> > > > > fail, we should fail the EPC allocation.
> > > >
> > > > "I guess the best action would be make sgx_alloc_epc_page() return
> > > >  consistently -ENOMEM, if the unexpected happens." -me
> > > >
> > > > >
> > > > > Btw, for the unknown error, and any other errors which should not
> > > happen,
> > > > > couldn't we use the ENCLS_WARN()?  AFAICT there are already cases
> that
> > > we are
> > > > > using ENCLS_WARN() for those "impossible-to-happen-errors".
> >
> > Ok, so to summarise the approach I will be sending in the next version:
> >
> > In case unknown error returns, issue ENCLS_WARN (uses WARN_ON
> underneath)
> > and return -ENOMEM from EPC page allocation. No other explicit ioctl
> disabling needed
> > since nothing can proceed anyhow if we cannot allocate a page from EPC.
> >
> > Does this sound right?
> 
> I think it should be sufficient (not a review tho).

Sounds good, thank you! I will respin a new version soon. 

Best Regards,
Elena.

[PATCH v2 1/2] x86/sgx: Use sgx_nr_used_pages for EPC page count instead of sgx_nr_free_pages
[PATCH v2 2/2] x86/sgx: Implement EUPDATESVN and opportunistically call it during first EPC page alloc