[PATCH v4 5/6] crypto: ccp: Use firmware_upload API for SNP firmware

Dionna Glaze posted 6 patches 2 weeks, 5 days ago
[PATCH v4 5/6] crypto: ccp: Use firmware_upload API for SNP firmware
Posted by Dionna Glaze 2 weeks, 5 days ago
On init, the ccp device will make /sys/class/firmware/amd/loading etc
firmware upload API attributes available to late-load a SEV-SNP firmware
binary.

The firmware upload api errors reported are actionable in the following
ways:
* FW_UPLOAD_ERR_HW_ERROR: the machine is in an unstable state and must
  be reset.
* FW_UPLOAD_ERR_RW_ERROR: the firmware update went bad but can be
  recovered by hotloading the previous firmware version.
  Also used in the case that the kernel used the API wrong (bug).
* FW_UPLOAD_ERR_FW_INVALID: user error with the data provided, but no
  instability is expected and no recovery actions are needed.
* FW_UPLOAD_ERR_BUSY: upload attempted at a bad time either due to
  overload or the machine is in the wrong platform state.

synthetic_restore_required:
Instead of tracking the status of whether an individual GCTX is safe for
use in a firmware command, force all following commands to fail with an
error that is indicative of needing a firmware rollback.

To test:
1. Build the kernel enabling SEV-SNP as normal and add CONFIG_FW_UPLOAD=y.
2. Add the following to your kernel_cmdline: ccp.psp_init_on_probe=0.
3.Get an AMD SEV-SNP firmware sbin appropriate to your Epyc chip model at
https://www.amd.com/en/developer/sev.html and extract to get a .sbin
file.
4. Run the following with your sbinfile in FW:

echo 1 > /sys/class/firmware/snp_dlfw_ex/loading
cat "${FW?}" > /sys/class/firmware/snp_dlfw_ex/data
echo 0 > /sys/class/firmware/snp_dlfw_ex/loading

5. Verify the firmware update message in dmesg.

Co-developed-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Tested-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
---
 drivers/crypto/ccp/Kconfig   |   2 +
 drivers/crypto/ccp/sev-dev.c |  12 +-
 drivers/crypto/ccp/sev-dev.h |  16 +++
 drivers/crypto/ccp/sev-fw.c  | 213 +++++++++++++++++++++++++++++++++++
 4 files changed, 241 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index f394e45e11ab4..520b1c84d11f4 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -39,6 +39,8 @@ config CRYPTO_DEV_SP_PSP
 	bool "Platform Security Processor (PSP) device"
 	default y
 	depends on CRYPTO_DEV_CCP_DD && X86_64 && AMD_IOMMU
+	select FW_LOADER
+	select FW_UPLOAD
 	help
 	 Provide support for the AMD Platform Security Processor (PSP).
 	 The PSP is a dedicated processor that provides support for key
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 32f7b6147905e..8c73f023b6420 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -485,7 +485,7 @@ void snp_free_firmware_page(void *addr)
 }
 EXPORT_SYMBOL_GPL(snp_free_firmware_page);
 
-static void *sev_fw_alloc(unsigned long len)
+void *sev_fw_alloc(unsigned long len)
 {
 	struct page *page;
 
@@ -853,6 +853,10 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
 	if (WARN_ON_ONCE(!data != !buf_len))
 		return -EINVAL;
 
+	ret = sev_snp_synthetic_error(sev, psp_ret);
+	if (ret)
+		return ret;
+
 	/*
 	 * Copy the incoming data to driver's scratch buffer as __pa() will not
 	 * work for some memory, e.g. vmalloc'd addresses, and @data may not be
@@ -1523,7 +1527,7 @@ void *psp_copy_user_blob(u64 uaddr, u32 len)
 }
 EXPORT_SYMBOL_GPL(psp_copy_user_blob);
 
-static int sev_get_api_version(void)
+int sev_get_api_version(void)
 {
 	struct sev_device *sev = psp_master->sev_data;
 	struct sev_user_data_status status;
@@ -2320,6 +2324,8 @@ int sev_dev_init(struct psp_device *psp)
 	if (ret)
 		goto e_irq;
 
+	sev_snp_dev_init_firmware_upload(sev);
+
 	dev_notice(dev, "sev enabled\n");
 
 	return 0;
@@ -2398,6 +2404,8 @@ void sev_dev_destroy(struct psp_device *psp)
 		kref_put(&misc_dev->refcount, sev_exit);
 
 	psp_clear_sev_irq_handler(psp);
+
+	sev_snp_dev_init_firmware_upload(sev);
 }
 
 static int snp_shutdown_on_panic(struct notifier_block *nb,
diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h
index 28add34484ed1..52a423e7df84f 100644
--- a/drivers/crypto/ccp/sev-dev.h
+++ b/drivers/crypto/ccp/sev-dev.h
@@ -59,7 +59,13 @@ struct sev_device {
 	bool snp_initialized;
 
 #ifdef CONFIG_FW_UPLOAD
+	/* Lock to protect fw_cancel */
+	struct mutex fw_lock;
+	struct fw_upload *fwl;
+	bool fw_cancel;
+
 	u32 last_snp_asid;
+	bool synthetic_restore_required;
 	u64 *snp_asid_to_gctx_pages_map;
 	u64 *snp_unbound_gctx_pages;
 	u32 snp_unbound_gctx_end;
@@ -72,12 +78,22 @@ void sev_dev_destroy(struct psp_device *psp);
 void sev_pci_init(void);
 void sev_pci_exit(void);
 
+void *sev_fw_alloc(unsigned long len);
+int sev_get_api_version(void);
+int sev_snp_download_firmware_ex(struct sev_device *sev, const u8 *data, u32 size, int *error);
+
 #ifdef CONFIG_FW_UPLOAD
 void snp_cmd_bookkeeping_locked(int cmd, struct sev_device *sev, void *data);
 int sev_snp_platform_init_firmware_upload(struct sev_device *sev);
+void sev_snp_dev_init_firmware_upload(struct sev_device *sev);
+void sev_snp_destroy_firmware_upload(struct sev_device *sev);
+int sev_snp_synthetic_error(struct sev_device *sev, int *psp_ret);
 #else
 static inline void snp_cmd_bookkeeping_locked(int cmd, struct sev_device *sev, void *data) { }
 static inline int sev_snp_platform_init_firmware_upload(struct sev_device *sev) { return 0; }
+static inline void sev_snp_dev_init_firmware_upload(struct sev_device *sev) { }
+static inline void sev_snp_destroy_firmware_upload(struct sev_device *sev) { }
+static inline int sev_snp_synthetic_error(struct sev_device *sev, int *psp_ret) { return 0; }
 #endif /* CONFIG_FW_UPLOAD */
 
 #endif /* __SEV_DEV_H */
diff --git a/drivers/crypto/ccp/sev-fw.c b/drivers/crypto/ccp/sev-fw.c
index 55a5a572da8f1..97ce90f2af29a 100644
--- a/drivers/crypto/ccp/sev-fw.c
+++ b/drivers/crypto/ccp/sev-fw.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/firmware.h>
+#include <linux/psp.h>
 #include <linux/psp-sev.h>
 
 #include <asm/sev.h>
@@ -115,3 +116,215 @@ int sev_snp_platform_init_firmware_upload(struct sev_device *sev)
 	}
 	return 0;
 }
+
+static enum fw_upload_err snp_dlfw_ex_prepare(struct fw_upload *fw_upload,
+					      const u8 *data, u32 size)
+{
+	return FW_UPLOAD_ERR_NONE;
+}
+
+static enum fw_upload_err snp_dlfw_ex_poll_complete(struct fw_upload *fw_upload)
+{
+	return FW_UPLOAD_ERR_NONE;
+}
+
+/*
+ * This may be called asynchronously with an on-going update.  All other
+ * functions are called sequentially in a single thread. To avoid contention on
+ * register accesses, only update the cancel_request flag. Other functions will
+ * check this flag and handle the cancel request synchronously.
+ */
+static void snp_dlfw_ex_cancel(struct fw_upload *fw_upload)
+{
+	struct sev_device *sev = fw_upload->dd_handle;
+
+	mutex_lock(&sev->fw_lock);
+	sev->fw_cancel = true;
+	mutex_unlock(&sev->fw_lock);
+}
+
+static enum fw_upload_err snp_dlfw_ex_err_translate(struct sev_device *sev, int psp_ret)
+{
+	dev_dbg(sev->dev, "Failed to update SEV firmware: %#x\n", psp_ret);
+	/*
+	 * Operation error:
+	 *   HW_ERROR: Critical error. Machine needs repairs now.
+	 *   RW_ERROR: Severe error. Roll back to the prior version to recover.
+	 * User error:
+	 *   FW_INVALID: Bad input for this interface.
+	 *   BUSY: Wrong machine state to run download_firmware_ex.
+	 */
+	switch (psp_ret) {
+	case SEV_RET_RESTORE_REQUIRED:
+		dev_warn(sev->dev, "Firmware updated but unusable\n");
+		dev_warn(sev->dev, "Need to do manual firmware rollback!!!\n");
+		return FW_UPLOAD_ERR_RW_ERROR;
+	case SEV_RET_SHUTDOWN_REQUIRED:
+		/* No state changes made. Not a hardware error. */
+		dev_warn(sev->dev, "Firmware image cannot be live updated\n");
+		return FW_UPLOAD_ERR_FW_INVALID;
+	case SEV_RET_BAD_VERSION:
+		/* No state changes made. Not a hardware error. */
+		dev_warn(sev->dev, "Firmware image is not well formed\n");
+		return FW_UPLOAD_ERR_FW_INVALID;
+		/* SEV-specific errors that can still happen. */
+	case SEV_RET_BAD_SIGNATURE:
+		/* No state changes made. Not a hardware error. */
+		dev_warn(sev->dev, "Firmware image signature is bad\n");
+		return FW_UPLOAD_ERR_FW_INVALID;
+	case SEV_RET_INVALID_PLATFORM_STATE:
+		/* Calling at the wrong time. Not a hardware error. */
+		dev_warn(sev->dev, "Firmware not updated as SEV in INIT state\n");
+		return FW_UPLOAD_ERR_BUSY;
+	case SEV_RET_HWSEV_RET_UNSAFE:
+		dev_err(sev->dev, "Firmware is unstable. Reset your machine!!!\n");
+		return FW_UPLOAD_ERR_HW_ERROR;
+		/* Kernel bug cases. */
+	case SEV_RET_INVALID_PARAM:
+		dev_err(sev->dev, "Download-firmware-EX invalid parameter\n");
+		return FW_UPLOAD_ERR_RW_ERROR;
+	case SEV_RET_INVALID_ADDRESS:
+		dev_err(sev->dev, "Download-firmware-EX invalid address\n");
+		return FW_UPLOAD_ERR_RW_ERROR;
+	default:
+		dev_err(sev->dev, "Unhandled download_firmware_ex err %d\n", psp_ret);
+		return FW_UPLOAD_ERR_HW_ERROR;
+	}
+}
+
+static enum fw_upload_err snp_update_guest_statuses(struct sev_device *sev)
+{
+	struct sev_data_snp_guest_status status_data;
+	void *snp_guest_status;
+	enum fw_upload_err ret;
+	int error;
+
+	/*
+	 * Force an update of guest context pages after SEV firmware
+	 * live update by issuing SNP_GUEST_STATUS on all guest
+	 * context pages.
+	 */
+	snp_guest_status = sev_fw_alloc(PAGE_SIZE);
+	if (!snp_guest_status)
+		return FW_UPLOAD_ERR_INVALID_SIZE;
+
+	/*
+	 * After the last bound asid-to-gctx page is snp_unbound_gctx_end-many
+	 * unbound gctx pages that also need updating.
+	 */
+	for (int i = 1; i <= sev->last_snp_asid + sev->snp_unbound_gctx_end; i++) {
+		if (sev->snp_asid_to_gctx_pages_map[i]) {
+			status_data.gctx_paddr = sev->snp_asid_to_gctx_pages_map[i];
+			status_data.address = __psp_pa(snp_guest_status);
+			ret = sev_do_cmd(SEV_CMD_SNP_GUEST_STATUS, &status_data, &error);
+			if (ret) {
+				/*
+				 * Handle race with SNP VM being destroyed/decommissoned,
+				 * if guest context page invalid error is returned,
+				 * assume guest has been destroyed.
+				 */
+				if (error == SEV_RET_INVALID_GUEST)
+					continue;
+				sev->synthetic_restore_required = true;
+				dev_err(sev->dev, "SNP GCTX update error: %#x\n", error);
+				dev_err(sev->dev, "Roll back SNP firmware!\n");
+				snp_free_firmware_page(snp_guest_status);
+				ret = FW_UPLOAD_ERR_RW_ERROR;
+				goto fw_err;
+			}
+		}
+	}
+fw_err:
+	snp_free_firmware_page(snp_guest_status);
+	return ret;
+}
+
+static enum fw_upload_err snp_dlfw_ex_write(struct fw_upload *fwl, const u8 *data,
+					    u32 offset, u32 size, u32 *written)
+{
+	struct sev_device *sev = fwl->dd_handle;
+	u8 api_major, api_minor, build;
+	int ret, error;
+	bool cancel;
+
+	if (!sev)
+		return FW_UPLOAD_ERR_HW_ERROR;
+
+	mutex_lock(&sev->fw_lock);
+	cancel = sev->fw_cancel;
+	mutex_unlock(&sev->fw_lock);
+
+	if (cancel)
+		return FW_UPLOAD_ERR_CANCELED;
+
+	/*
+	 * SEV firmware update is a one-shot update operation, the write()
+	 * callback to be invoked multiple times for the same update is
+	 * unexpected.
+	 */
+	if (offset)
+		return FW_UPLOAD_ERR_INVALID_SIZE;
+
+	if (sev_get_api_version())
+		return FW_UPLOAD_ERR_HW_ERROR;
+
+	api_major = sev->api_major;
+	api_minor = sev->api_minor;
+	build     = sev->build;
+
+	ret = sev_snp_download_firmware_ex(sev, data, size, &error);
+	if (ret)
+		return snp_dlfw_ex_err_translate(sev, error);
+
+	ret = snp_update_guest_statuses(sev);
+	if (ret)
+		return ret;
+
+	sev_get_api_version();
+	if (api_major != sev->api_major || api_minor != sev->api_minor ||
+	    build != sev->build) {
+		dev_info(sev->dev, "SEV firmware updated from %d.%d.%d to %d.%d.%d\n",
+			 api_major, api_minor, build,
+			 sev->api_major, sev->api_minor, sev->build);
+	} else {
+		dev_info(sev->dev, "SEV firmware same as old %d.%d.%d\n",
+			 api_major, api_minor, build);
+	}
+
+	*written = size;
+	return FW_UPLOAD_ERR_NONE;
+}
+
+static const struct fw_upload_ops snp_dlfw_ex_ops = {
+	.prepare = snp_dlfw_ex_prepare,
+	.write = snp_dlfw_ex_write,
+	.poll_complete = snp_dlfw_ex_poll_complete,
+	.cancel = snp_dlfw_ex_cancel,
+};
+
+void sev_snp_dev_init_firmware_upload(struct sev_device *sev)
+{
+	struct fw_upload *fwl;
+
+	fwl = firmware_upload_register(THIS_MODULE, sev->dev, "snp_dlfw_ex", &snp_dlfw_ex_ops, sev);
+
+	if (IS_ERR(fwl))
+		dev_err(sev->dev, "SEV firmware upload initialization error %ld\n", PTR_ERR(fwl));
+
+	sev->fwl = fwl;
+	mutex_init(&sev->fw_lock);
+}
+
+void sev_snp_destroy_firmware_upload(struct sev_device *sev)
+{
+	firmware_upload_unregister(sev->fwl);
+}
+
+int sev_snp_synthetic_error(struct sev_device *sev, int *psp_ret)
+{
+	if (sev->synthetic_restore_required) {
+		*psp_ret = SEV_RET_RESTORE_REQUIRED;
+		return -EIO;
+	}
+	return 0;
+}
-- 
2.47.0.199.ga7371fff76-goog
Re: [PATCH v4 5/6] crypto: ccp: Use firmware_upload API for SNP firmware
Posted by Tom Lendacky 2 weeks, 4 days ago
On 11/4/24 19:05, Dionna Glaze wrote:
> On init, the ccp device will make /sys/class/firmware/amd/loading etc
> firmware upload API attributes available to late-load a SEV-SNP firmware
> binary.
> 
> The firmware upload api errors reported are actionable in the following
> ways:
> * FW_UPLOAD_ERR_HW_ERROR: the machine is in an unstable state and must
>   be reset.
> * FW_UPLOAD_ERR_RW_ERROR: the firmware update went bad but can be
>   recovered by hotloading the previous firmware version.
>   Also used in the case that the kernel used the API wrong (bug).
> * FW_UPLOAD_ERR_FW_INVALID: user error with the data provided, but no
>   instability is expected and no recovery actions are needed.
> * FW_UPLOAD_ERR_BUSY: upload attempted at a bad time either due to
>   overload or the machine is in the wrong platform state.
> 
> synthetic_restore_required:
> Instead of tracking the status of whether an individual GCTX is safe for
> use in a firmware command, force all following commands to fail with an
> error that is indicative of needing a firmware rollback.
> 
> To test:
> 1. Build the kernel enabling SEV-SNP as normal and add CONFIG_FW_UPLOAD=y.
> 2. Add the following to your kernel_cmdline: ccp.psp_init_on_probe=0.
> 3.Get an AMD SEV-SNP firmware sbin appropriate to your Epyc chip model at
> https://www.amd.com/en/developer/sev.html and extract to get a .sbin
> file.
> 4. Run the following with your sbinfile in FW:
> 
> echo 1 > /sys/class/firmware/snp_dlfw_ex/loading
> cat "${FW?}" > /sys/class/firmware/snp_dlfw_ex/data
> echo 0 > /sys/class/firmware/snp_dlfw_ex/loading
> 
> 5. Verify the firmware update message in dmesg.
> 
> Co-developed-by: Ashish Kalra <ashish.kalra@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> Tested-by: Ashish Kalra <ashish.kalra@amd.com>
> Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
> ---
>  drivers/crypto/ccp/Kconfig   |   2 +
>  drivers/crypto/ccp/sev-dev.c |  12 +-
>  drivers/crypto/ccp/sev-dev.h |  16 +++
>  drivers/crypto/ccp/sev-fw.c  | 213 +++++++++++++++++++++++++++++++++++
>  4 files changed, 241 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
> index f394e45e11ab4..520b1c84d11f4 100644
> --- a/drivers/crypto/ccp/Kconfig
> +++ b/drivers/crypto/ccp/Kconfig
> @@ -39,6 +39,8 @@ config CRYPTO_DEV_SP_PSP
>  	bool "Platform Security Processor (PSP) device"
>  	default y
>  	depends on CRYPTO_DEV_CCP_DD && X86_64 && AMD_IOMMU
> +	select FW_LOADER
> +	select FW_UPLOAD
>  	help
>  	 Provide support for the AMD Platform Security Processor (PSP).
>  	 The PSP is a dedicated processor that provides support for key
> diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
> index 32f7b6147905e..8c73f023b6420 100644
> --- a/drivers/crypto/ccp/sev-dev.c
> +++ b/drivers/crypto/ccp/sev-dev.c
> @@ -485,7 +485,7 @@ void snp_free_firmware_page(void *addr)
>  }
>  EXPORT_SYMBOL_GPL(snp_free_firmware_page);
>  
> -static void *sev_fw_alloc(unsigned long len)
> +void *sev_fw_alloc(unsigned long len)
>  {
>  	struct page *page;
>  
> @@ -853,6 +853,10 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
>  	if (WARN_ON_ONCE(!data != !buf_len))
>  		return -EINVAL;
>  
> +	ret = sev_snp_synthetic_error(sev, psp_ret);

A comment similar to what you have in the commit log would be nice above
this call.

> +	if (ret)
> +		return ret;
> +
>  	/*
>  	 * Copy the incoming data to driver's scratch buffer as __pa() will not
>  	 * work for some memory, e.g. vmalloc'd addresses, and @data may not be
> @@ -1523,7 +1527,7 @@ void *psp_copy_user_blob(u64 uaddr, u32 len)
>  }
>  EXPORT_SYMBOL_GPL(psp_copy_user_blob);
>  
> -static int sev_get_api_version(void)
> +int sev_get_api_version(void)
>  {
>  	struct sev_device *sev = psp_master->sev_data;
>  	struct sev_user_data_status status;
> @@ -2320,6 +2324,8 @@ int sev_dev_init(struct psp_device *psp)
>  	if (ret)
>  		goto e_irq;
>  
> +	sev_snp_dev_init_firmware_upload(sev);
> +
>  	dev_notice(dev, "sev enabled\n");
>  
>  	return 0;
> @@ -2398,6 +2404,8 @@ void sev_dev_destroy(struct psp_device *psp)
>  		kref_put(&misc_dev->refcount, sev_exit);
>  
>  	psp_clear_sev_irq_handler(psp);
> +
> +	sev_snp_dev_init_firmware_upload(sev);

Shouldn't this be the destroy call?

>  }
>  
>  static int snp_shutdown_on_panic(struct notifier_block *nb,
> diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h
> index 28add34484ed1..52a423e7df84f 100644
> --- a/drivers/crypto/ccp/sev-dev.h
> +++ b/drivers/crypto/ccp/sev-dev.h
> @@ -59,7 +59,13 @@ struct sev_device {
>  	bool snp_initialized;
>  
>  #ifdef CONFIG_FW_UPLOAD
> +	/* Lock to protect fw_cancel */
> +	struct mutex fw_lock;
> +	struct fw_upload *fwl;
> +	bool fw_cancel;
> +
>  	u32 last_snp_asid;
> +	bool synthetic_restore_required;
>  	u64 *snp_asid_to_gctx_pages_map;
>  	u64 *snp_unbound_gctx_pages;
>  	u32 snp_unbound_gctx_end;
> @@ -72,12 +78,22 @@ void sev_dev_destroy(struct psp_device *psp);
>  void sev_pci_init(void);
>  void sev_pci_exit(void);
>  
> +void *sev_fw_alloc(unsigned long len);
> +int sev_get_api_version(void);
> +int sev_snp_download_firmware_ex(struct sev_device *sev, const u8 *data, u32 size, int *error);
> +
>  #ifdef CONFIG_FW_UPLOAD
>  void snp_cmd_bookkeeping_locked(int cmd, struct sev_device *sev, void *data);
>  int sev_snp_platform_init_firmware_upload(struct sev_device *sev);
> +void sev_snp_dev_init_firmware_upload(struct sev_device *sev);
> +void sev_snp_destroy_firmware_upload(struct sev_device *sev);
> +int sev_snp_synthetic_error(struct sev_device *sev, int *psp_ret);
>  #else
>  static inline void snp_cmd_bookkeeping_locked(int cmd, struct sev_device *sev, void *data) { }
>  static inline int sev_snp_platform_init_firmware_upload(struct sev_device *sev) { return 0; }
> +static inline void sev_snp_dev_init_firmware_upload(struct sev_device *sev) { }
> +static inline void sev_snp_destroy_firmware_upload(struct sev_device *sev) { }
> +static inline int sev_snp_synthetic_error(struct sev_device *sev, int *psp_ret) { return 0; }
>  #endif /* CONFIG_FW_UPLOAD */
>  
>  #endif /* __SEV_DEV_H */
> diff --git a/drivers/crypto/ccp/sev-fw.c b/drivers/crypto/ccp/sev-fw.c
> index 55a5a572da8f1..97ce90f2af29a 100644
> --- a/drivers/crypto/ccp/sev-fw.c
> +++ b/drivers/crypto/ccp/sev-fw.c
> @@ -4,6 +4,7 @@
>   */
>  
>  #include <linux/firmware.h>
> +#include <linux/psp.h>
>  #include <linux/psp-sev.h>
>  
>  #include <asm/sev.h>
> @@ -115,3 +116,215 @@ int sev_snp_platform_init_firmware_upload(struct sev_device *sev)
>  	}
>  	return 0;
>  }
> +
> +static enum fw_upload_err snp_dlfw_ex_prepare(struct fw_upload *fw_upload,
> +					      const u8 *data, u32 size)
> +{
> +	return FW_UPLOAD_ERR_NONE;
> +}
> +
> +static enum fw_upload_err snp_dlfw_ex_poll_complete(struct fw_upload *fw_upload)
> +{
> +	return FW_UPLOAD_ERR_NONE;
> +}
> +
> +/*
> + * This may be called asynchronously with an on-going update.  All other
> + * functions are called sequentially in a single thread. To avoid contention on
> + * register accesses, only update the cancel_request flag. Other functions will
> + * check this flag and handle the cancel request synchronously.
> + */
> +static void snp_dlfw_ex_cancel(struct fw_upload *fw_upload)
> +{
> +	struct sev_device *sev = fw_upload->dd_handle;
> +
> +	mutex_lock(&sev->fw_lock);
> +	sev->fw_cancel = true;
> +	mutex_unlock(&sev->fw_lock);

Does this really need a lock? Also, I don't see it being set to false
ever, so the first time an update is canceled it can never be updated again.

> +}
> +
> +static enum fw_upload_err snp_dlfw_ex_err_translate(struct sev_device *sev, int psp_ret)
> +{
> +	dev_dbg(sev->dev, "Failed to update SEV firmware: %#x\n", psp_ret);

Blank line.

> +	/*
> +	 * Operation error:
> +	 *   HW_ERROR: Critical error. Machine needs repairs now.
> +	 *   RW_ERROR: Severe error. Roll back to the prior version to recover.
> +	 * User error:
> +	 *   FW_INVALID: Bad input for this interface.
> +	 *   BUSY: Wrong machine state to run download_firmware_ex.
> +	 */
> +	switch (psp_ret) {
> +	case SEV_RET_RESTORE_REQUIRED:
> +		dev_warn(sev->dev, "Firmware updated but unusable\n");
> +		dev_warn(sev->dev, "Need to do manual firmware rollback!!!\n");

Please keep to a single message so that other messages don't get in
between when outputted.

> +		return FW_UPLOAD_ERR_RW_ERROR;
> +	case SEV_RET_SHUTDOWN_REQUIRED:
> +		/* No state changes made. Not a hardware error. */
> +		dev_warn(sev->dev, "Firmware image cannot be live updated\n");
> +		return FW_UPLOAD_ERR_FW_INVALID;
> +	case SEV_RET_BAD_VERSION:
> +		/* No state changes made. Not a hardware error. */
> +		dev_warn(sev->dev, "Firmware image is not well formed\n");
> +		return FW_UPLOAD_ERR_FW_INVALID;
> +		/* SEV-specific errors that can still happen. */
> +	case SEV_RET_BAD_SIGNATURE:
> +		/* No state changes made. Not a hardware error. */
> +		dev_warn(sev->dev, "Firmware image signature is bad\n");
> +		return FW_UPLOAD_ERR_FW_INVALID;
> +	case SEV_RET_INVALID_PLATFORM_STATE:
> +		/* Calling at the wrong time. Not a hardware error. */
> +		dev_warn(sev->dev, "Firmware not updated as SEV in INIT state\n");
> +		return FW_UPLOAD_ERR_BUSY;
> +	case SEV_RET_HWSEV_RET_UNSAFE:
> +		dev_err(sev->dev, "Firmware is unstable. Reset your machine!!!\n");
> +		return FW_UPLOAD_ERR_HW_ERROR;
> +		/* Kernel bug cases. */
> +	case SEV_RET_INVALID_PARAM:
> +		dev_err(sev->dev, "Download-firmware-EX invalid parameter\n");
> +		return FW_UPLOAD_ERR_RW_ERROR;
> +	case SEV_RET_INVALID_ADDRESS:
> +		dev_err(sev->dev, "Download-firmware-EX invalid address\n");
> +		return FW_UPLOAD_ERR_RW_ERROR;
> +	default:
> +		dev_err(sev->dev, "Unhandled download_firmware_ex err %d\n", psp_ret);
> +		return FW_UPLOAD_ERR_HW_ERROR;
> +	}
> +}
> +
> +static enum fw_upload_err snp_update_guest_statuses(struct sev_device *sev)

snp_update_guest_contexts

> +{
> +	struct sev_data_snp_guest_status status_data;
> +	void *snp_guest_status;
> +	enum fw_upload_err ret;
> +	int error;
> +
> +	/*
> +	 * Force an update of guest context pages after SEV firmware
> +	 * live update by issuing SNP_GUEST_STATUS on all guest
> +	 * context pages.
> +	 */
> +	snp_guest_status = sev_fw_alloc(PAGE_SIZE);
> +	if (!snp_guest_status)
> +		return FW_UPLOAD_ERR_INVALID_SIZE;
> +
> +	/*
> +	 * After the last bound asid-to-gctx page is snp_unbound_gctx_end-many
> +	 * unbound gctx pages that also need updating.
> +	 */
> +	for (int i = 1; i <= sev->last_snp_asid + sev->snp_unbound_gctx_end; i++) {
> +		if (sev->snp_asid_to_gctx_pages_map[i]) {

Let reduce some indentation and do

		if (!sev->snp_asid_to_gctx_pages_map[i])
			continue;

> +			status_data.gctx_paddr = sev->snp_asid_to_gctx_pages_map[i];
> +			status_data.address = __psp_pa(snp_guest_status);
> +			ret = sev_do_cmd(SEV_CMD_SNP_GUEST_STATUS, &status_data, &error);

... and
		if (!ret)
			continue;

> +			if (ret) {
> +				/*
> +				 * Handle race with SNP VM being destroyed/decommissoned,
> +				 * if guest context page invalid error is returned,
> +				 * assume guest has been destroyed.
> +				 */
> +				if (error == SEV_RET_INVALID_GUEST)
> +					continue;

Add a blank line here.

And add a comment here about synthetic_restore_required being set.

> +				sev->synthetic_restore_required = true;
> +				dev_err(sev->dev, "SNP GCTX update error: %#x\n", error);
> +				dev_err(sev->dev, "Roll back SNP firmware!\n");

Single message, something like:

"SNP guest context update error %#x, recommend SNP firmware roll back."

> +				snp_free_firmware_page(snp_guest_status);

This is done in the goto below, so must be removed.

> +				ret = FW_UPLOAD_ERR_RW_ERROR;
> +				goto fw_err;
> +			}
> +		}
> +	}
> +fw_err:
> +	snp_free_firmware_page(snp_guest_status);
> +	return ret;
> +}
> +
> +static enum fw_upload_err snp_dlfw_ex_write(struct fw_upload *fwl, const u8 *data,
> +					    u32 offset, u32 size, u32 *written)
> +{
> +	struct sev_device *sev = fwl->dd_handle;
> +	u8 api_major, api_minor, build;
> +	int ret, error;
> +	bool cancel;
> +
> +	if (!sev)
> +		return FW_UPLOAD_ERR_HW_ERROR;
> +
> +	mutex_lock(&sev->fw_lock);
> +	cancel = sev->fw_cancel;
> +	mutex_unlock(&sev->fw_lock);

Same question on the mutex, I don't see how this helps with any timing.

> +
> +	if (cancel)
> +		return FW_UPLOAD_ERR_CANCELED;
> +
> +	/*
> +	 * SEV firmware update is a one-shot update operation, the write()
> +	 * callback to be invoked multiple times for the same update is
> +	 * unexpected.
> +	 */
> +	if (offset)
> +		return FW_UPLOAD_ERR_INVALID_SIZE;
> +
> +	if (sev_get_api_version())
> +		return FW_UPLOAD_ERR_HW_ERROR;
> +
> +	api_major = sev->api_major;
> +	api_minor = sev->api_minor;
> +	build     = sev->build;
> +
> +	ret = sev_snp_download_firmware_ex(sev, data, size, &error);
> +	if (ret)
> +		return snp_dlfw_ex_err_translate(sev, error);
> +
> +	ret = snp_update_guest_statuses(sev);
> +	if (ret)
> +		return ret;
> +
> +	sev_get_api_version();
> +	if (api_major != sev->api_major || api_minor != sev->api_minor ||
> +	    build != sev->build) {
> +		dev_info(sev->dev, "SEV firmware updated from %d.%d.%d to %d.%d.%d\n",
> +			 api_major, api_minor, build,
> +			 sev->api_major, sev->api_minor, sev->build);
> +	} else {
> +		dev_info(sev->dev, "SEV firmware same as old %d.%d.%d\n",
> +			 api_major, api_minor, build);

Maybe something like:
"SEV firmware not updated, same as current version %d.%d.%d"

> +	}
> +
> +	*written = size;

Blank line.

> +	return FW_UPLOAD_ERR_NONE;
> +}
> +
> +static const struct fw_upload_ops snp_dlfw_ex_ops = {
> +	.prepare = snp_dlfw_ex_prepare,
> +	.write = snp_dlfw_ex_write,
> +	.poll_complete = snp_dlfw_ex_poll_complete,
> +	.cancel = snp_dlfw_ex_cancel,
> +};
> +
> +void sev_snp_dev_init_firmware_upload(struct sev_device *sev)

snp_init_firmware_upload

> +{
> +	struct fw_upload *fwl;
> +
> +	fwl = firmware_upload_register(THIS_MODULE, sev->dev, "snp_dlfw_ex", &snp_dlfw_ex_ops, sev);
> +

Remove blank line.

> +	if (IS_ERR(fwl))
> +		dev_err(sev->dev, "SEV firmware upload initialization error %ld\n", PTR_ERR(fwl));
> +
> +	sev->fwl = fwl;
> +	mutex_init(&sev->fw_lock);

Probably can't happen, but shouldn't the mutex be initialized before
calling firmware_upload_register()?

> +}
> +
> +void sev_snp_destroy_firmware_upload(struct sev_device *sev)

snp_destroy_firmware_upload

> +{
> +	firmware_upload_unregister(sev->fwl);

Should this check that the sev->fwl is valid before trying to call
firmware_upload_unregister()?

> +}
> +
> +int sev_snp_synthetic_error(struct sev_device *sev, int *psp_ret)
> +{
> +	if (sev->synthetic_restore_required) {
> +		*psp_ret = SEV_RET_RESTORE_REQUIRED;
> +		return -EIO;
> +	}

Blank line.

Thanks,
Tom

> +	return 0;
> +}