Previously, the functions for sending microcode data and retrieving the
next offset were placeholders, as they required handling the specific
mailbox format. Implement them as following:
== Mailbox Format ==
The staging mailbox consists of two primary sections: 'header' and
'data'. While the microcode must be transferred following this format,
the actual data transfer mechanism involves reading and writing to
specific MMIO registers.
== Mailbox Data Registers ==
Unlike conventional interfaces that allocate MMIO space for each data
chunk, the staging interface features a "narrow" interface, using only
two dword-sized registers for read and write operations.
For example, if writing 2 dwords of data to a device. Typically, the
device would expose 2 dwords of "wide" MMIO space. To send the data to
the device:
writel(buf[0], io_addr + 0);
writel(buf[1], io_addr + 1);
But, this interface is a bit different. Instead of having a "wide"
interface where there is separate MMIO space for each word in a
transaction, it has a "narrow" interface where several words are written
to the same spot in MMIO space:
writel(buf[0], io_addr);
writel(buf[1], io_addr);
The same goes for the read side.
== Implementation Summary ==
Given that, introduce two layers of helper functions at first:
* Low-level helpers for reading and writing to data registers directly.
* Wrapper functions for handling mailbox header and data sections.
Using them, implement send_data_chunk() and fetch_next_offset()
functions. Add explicit error and timeout handling routine in
wait_for_transaction(), finishing up the transfer.
Note: The kernel has support for similar mailboxes. But none of them are
compatible with this one. Trying to share code resulted in a bloated
mess, so this code is standalone.
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
V1 -> V2:
* Add lots of code comments and edit the changelog (Dave).
* Encapsulate register read/write operations for processing header and
data sections.
---
arch/x86/kernel/cpu/microcode/intel.c | 148 +++++++++++++++++++++++++-
1 file changed, 144 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 05b5b73e525a..b0d530db72dd 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/io.h>
+#include <linux/pci_ids.h>
#include <asm/cpu_device_id.h>
#include <asm/processor.h>
@@ -42,8 +43,30 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
#define MBOX_CONTROL_OFFSET 0x0
#define MBOX_STATUS_OFFSET 0x4
+#define MBOX_WRDATA_OFFSET 0x8
+#define MBOX_RDDATA_OFFSET 0xc
#define MASK_MBOX_CTRL_ABORT BIT(0)
+#define MASK_MBOX_CTRL_GO BIT(31)
+
+#define MASK_MBOX_STATUS_ERROR BIT(2)
+#define MASK_MBOX_STATUS_READY BIT(31)
+
+#define MASK_MBOX_RESP_SUCCESS BIT(0)
+#define MASK_MBOX_RESP_PROGRESS BIT(1)
+#define MASK_MBOX_RESP_ERROR BIT(2)
+
+#define MBOX_CMD_LOAD 0x3
+#define MBOX_OBJ_STAGING 0xb
+#define DWORD_ALIGN(size) ((size) / sizeof(u32))
+#define MBOX_HEADER(mbox_size) (PCI_VENDOR_ID_INTEL | \
+ (MBOX_OBJ_STAGING << 16) | \
+ ((u64)DWORD_ALIGN(mbox_size) << 32))
+
+/* The size of each mailbox header */
+#define MBOX_HEADER_SIZE sizeof(u64)
+/* The size of staging hardware response */
+#define MBOX_RESPONSE_SIZE sizeof(u64)
/*
* Each microcode image is divided into chunks, each at most
@@ -57,6 +80,7 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
*/
#define MBOX_XACTION_SIZE PAGE_SIZE
#define MBOX_XACTION_MAX(imgsz) ((imgsz) * 2)
+#define MBOX_XACTION_TIMEOUT_MS (10 * MSEC_PER_SEC)
/* Current microcode patch used in early patching on the APs. */
static struct microcode_intel *ucode_patch_va __read_mostly;
@@ -348,6 +372,49 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
return size ? NULL : patch;
}
+static inline u32 read_mbox_dword(void)
+{
+ u32 dword = readl(staging.mmio_base + MBOX_RDDATA_OFFSET);
+
+ /* Acknowledge read completion to the staging firmware */
+ writel(0, staging.mmio_base + MBOX_RDDATA_OFFSET);
+ return dword;
+}
+
+static inline void write_mbox_dword(u32 dword)
+{
+ writel(dword, staging.mmio_base + MBOX_WRDATA_OFFSET);
+}
+
+static inline u64 read_mbox_header(void)
+{
+ u32 high, low;
+
+ low = read_mbox_dword();
+ high = read_mbox_dword();
+
+ return ((u64)high << 32) | low;
+}
+
+static inline void write_mbox_header(u64 value)
+{
+ write_mbox_dword(value);
+ write_mbox_dword(value >> 32);
+}
+
+static inline void write_mbox_data(u32 *chunk, unsigned int chunk_size)
+{
+ int i;
+
+ /*
+ * The MMIO space is mapped as Uncached (UC). Each write arrives
+ * at the device as an individual transaction in program order.
+ * The device can then resemble the sequence accordingly.
+ */
+ for (i = 0; i < DWORD_ALIGN(chunk_size); i++)
+ write_mbox_dword(chunk[i]);
+}
+
/*
* Prepare for a new microcode transfer by resetting both hardware and
* software states.
@@ -392,14 +459,71 @@ static bool can_send_next_chunk(void)
return true;
}
+/*
+ * Wait for the hardware to complete a transaction.
+ * Return true on success, false on failure.
+ */
+static bool wait_for_transaction(void)
+{
+ u32 timeout, status;
+
+ /* Allow time for hardware to complete the operation: */
+ for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) {
+ msleep(1);
+
+ status = readl(staging.mmio_base + MBOX_STATUS_OFFSET);
+ /* Break out early if the hardware is ready: */
+ if (status & MASK_MBOX_STATUS_READY)
+ break;
+ }
+
+ status = readl(staging.mmio_base + MBOX_STATUS_OFFSET);
+
+ /* Check for explicit error response */
+ if (status & MASK_MBOX_STATUS_ERROR) {
+ staging.state = UCODE_ERROR;
+ return false;
+ }
+
+ /*
+ * Hardware is neither responded to the action nor
+ * signaled any error. Treat the case as timeout.
+ */
+ if (!(status & MASK_MBOX_STATUS_READY)) {
+ staging.state = UCODE_TIMEOUT;
+ return false;
+ }
+
+ staging.state = UCODE_OK;
+ return true;
+}
+
/*
* Transmit a chunk of the microcode image to the hardware.
* Return true if the chunk is processed successfully.
*/
static bool send_data_chunk(void)
{
- pr_debug_once("Staging mailbox loading code needs to be implemented.\n");
- return false;
+ u16 mbox_size = MBOX_HEADER_SIZE * 2 + staging.chunk_size;
+ u32 *chunk = staging.ucode_ptr + staging.offset;
+
+ /*
+ * Write 'request' mailbox object in the following order:
+ * - Mailbox header includes total size
+ * - Command header specifies the load operation
+ * - Data section contains a microcode chunk
+ */
+ write_mbox_header(MBOX_HEADER(mbox_size));
+ write_mbox_header(MBOX_CMD_LOAD);
+ write_mbox_data(chunk, staging.chunk_size);
+ staging.bytes_sent += staging.chunk_size;
+
+ /*
+ * Notify the hardware that the mailbox is ready for processing.
+ * The staging firmware will process the request asynchronously.
+ */
+ writel(MASK_MBOX_CTRL_GO, staging.mmio_base + MBOX_CONTROL_OFFSET);
+ return wait_for_transaction();
}
/*
@@ -408,8 +532,24 @@ static bool send_data_chunk(void)
*/
static bool fetch_next_offset(void)
{
- pr_debug_once("Staging mailbox response handling code needs to be implemented.\n\n");
- return false;
+ const u16 mbox_size = MBOX_HEADER_SIZE + MBOX_RESPONSE_SIZE;
+
+ /* All responses begin with the same header value: */
+ WARN_ON_ONCE(read_mbox_header() != MBOX_HEADER(mbox_size));
+
+ /*
+ * The 'response' mailbox contains two dword data:
+ * - First has next offset in microcode image
+ * - Second delivers status flag
+ */
+ staging.offset = read_mbox_dword();
+ if (read_mbox_dword() & MASK_MBOX_RESP_ERROR) {
+ staging.state = UCODE_ERROR;
+ return false;
+ }
+
+ staging.state = UCODE_OK;
+ return true;
}
/*
--
2.45.2
>+/*
>+ * Wait for the hardware to complete a transaction.
>+ * Return true on success, false on failure.
>+ */
>+static bool wait_for_transaction(void)
>+{
>+ u32 timeout, status;
>+
>+ /* Allow time for hardware to complete the operation: */
>+ for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) {
>+ msleep(1);
>+
>+ status = readl(staging.mmio_base + MBOX_STATUS_OFFSET);
>+ /* Break out early if the hardware is ready: */
>+ if (status & MASK_MBOX_STATUS_READY)
>+ break;
>+ }
>+
>+ status = readl(staging.mmio_base + MBOX_STATUS_OFFSET);
why read the STATUS again?
>+
>+ /* Check for explicit error response */
>+ if (status & MASK_MBOX_STATUS_ERROR) {
>+ staging.state = UCODE_ERROR;
>+ return false;
>+ }
>+
>+ /*
>+ * Hardware is neither responded to the action nor
>+ * signaled any error. Treat the case as timeout.
>+ */
>+ if (!(status & MASK_MBOX_STATUS_READY)) {
>+ staging.state = UCODE_TIMEOUT;
>+ return false;
>+ }
>+
>+ staging.state = UCODE_OK;
>+ return true;
>+}
How about:
static enum ucode_state wait_for_transaction(void)
{
u32 timeout, status;
/* Allow time for hardware to complete the operation: */
for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) {
msleep(1);
status = readl(staging.mmio_base + MBOX_STATUS_OFFSET);
if (status & MASK_MBOX_STATUS_READY)
return UCODE_OK;
/* Check for explicit error response */
if (status & MASK_MBOX_STATUS_ERROR)
return UCODE_ERROR;
}
/*
* Hardware is neither responded to the action nor
* signaled any error. Treat the case as timeout.
*/
return UCODE_TIMEOUT;
}
and in send_data_chunk(), do:
staging.state = wait_for_transaction();
return staging.state != UCODE_OK;
It is simpler and requires less code. Even better, send_data_chunk() can just
propagate the ucode_state to its caller.
By the way, checkpatch.pl warns that 'msleep < 20ms can sleep for up to 20ms;
see function description of msleep().' This makes me wonder how the 10ms
timeout was determined but not precisely enforced. Is it arbitrary or selected
for specific reasons?
On 3/26/25 20:32, Chao Gao wrote:
> static enum ucode_state wait_for_transaction(void)
> {
> u32 timeout, status;
>
> /* Allow time for hardware to complete the operation: */
> for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) {
> msleep(1);
>
> status = readl(staging.mmio_base + MBOX_STATUS_OFFSET);
>
> if (status & MASK_MBOX_STATUS_READY)
> return UCODE_OK;
>
> /* Check for explicit error response */
> if (status & MASK_MBOX_STATUS_ERROR)
> return UCODE_ERROR;
> }
>
> /*
> * Hardware is neither responded to the action nor
> * signaled any error. Treat the case as timeout.
> */
> return UCODE_TIMEOUT;
> }
The flow at minimal indent in the function should be the _main_ control
flow: the common case.
The common case here is the "return UCODE_OK". It should not be buried
in a for() loop. I kinda like what Chang had before.
On 3/26/2025 8:32 PM, Chao Gao wrote:
>
> and in send_data_chunk(), do:
>
> staging.state = wait_for_transaction();
> return staging.state != UCODE_OK;
>
> It is simpler and requires less code. Even better, send_data_chunk() can just
> propagate the ucode_state to its caller.
Thanks for the suggestion. To me, this is more of a stylistic choice.
Previously, the call site had a single line:
return wait_for_transaction();
> By the way, checkpatch.pl warns that 'msleep < 20ms can sleep for up to 20ms;
> see function description of msleep().' This makes me wonder how the 10ms
> timeout was determined but not precisely enforced. Is it arbitrary or selected
> for specific reasons?
Yes, I saw that warning. It notes that msleep(1) could sleep for longer
than 1 ms, which I thought was acceptable. The 10 ms timeout was
determined through discussions with the firmware team responsible for
staging.
Thanks,
Chang
Previously, the functions for sending microcode data and retrieving the
next offset were placeholders, as they required handling the specific
mailbox format. Implement them as following:
== Mailbox Format ==
The staging mailbox consists of two primary sections: 'header' and
'data'. While the microcode must be transferred following this format,
the actual data transfer mechanism involves reading and writing to
specific MMIO registers.
== Mailbox Data Registers ==
Unlike conventional interfaces that allocate MMIO space for each data
chunk, the staging interface features a "narrow" interface, using only
two dword-sized registers for read and write operations.
For example, if writing 2 dwords of data to a device. Typically, the
device would expose 2 dwords of "wide" MMIO space. To send the data to
the device:
writel(buf[0], io_addr + 0);
writel(buf[1], io_addr + 1);
But, this interface is a bit different. Instead of having a "wide"
interface where there is separate MMIO space for each word in a
transaction, it has a "narrow" interface where several words are written
to the same spot in MMIO space:
writel(buf[0], io_addr);
writel(buf[1], io_addr);
The same goes for the read side.
== Implementation Summary ==
Given that, introduce two layers of helper functions at first:
* Low-level helpers for reading and writing to data registers directly.
* Wrapper functions for handling mailbox header and data sections.
Using them, implement send_data_chunk() and fetch_next_offset()
functions. Add explicit error and timeout handling routine in
wait_for_transaction(), finishing up the transfer.
Note: The kernel has support for similar mailboxes. But none of them are
compatible with this one. Trying to share code resulted in a bloated
mess, so this code is standalone.
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
V2 -> V2a:
* Adjust the code to align with the global variable removal (Dave [1]).
Note: this quick revision is just intended to ensure that the feedback
has been properly addressed.
[1]: https://lore.kernel.org/lkml/b01224ee-c935-4b08-a76f-5dc49341182d@intel.com/
---
arch/x86/kernel/cpu/microcode/intel.c | 152 +++++++++++++++++++++++++-
1 file changed, 146 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 44b94d4d05f7..ca1847ce1059 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/io.h>
+#include <linux/pci_ids.h>
#include <asm/cpu_device_id.h>
#include <asm/processor.h>
@@ -42,8 +43,30 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
#define MBOX_CONTROL_OFFSET 0x0
#define MBOX_STATUS_OFFSET 0x4
+#define MBOX_WRDATA_OFFSET 0x8
+#define MBOX_RDDATA_OFFSET 0xc
#define MASK_MBOX_CTRL_ABORT BIT(0)
+#define MASK_MBOX_CTRL_GO BIT(31)
+
+#define MASK_MBOX_STATUS_ERROR BIT(2)
+#define MASK_MBOX_STATUS_READY BIT(31)
+
+#define MASK_MBOX_RESP_SUCCESS BIT(0)
+#define MASK_MBOX_RESP_PROGRESS BIT(1)
+#define MASK_MBOX_RESP_ERROR BIT(2)
+
+#define MBOX_CMD_LOAD 0x3
+#define MBOX_OBJ_STAGING 0xb
+#define DWORD_ALIGN(size) ((size) / sizeof(u32))
+#define MBOX_HEADER(mbox_size) (PCI_VENDOR_ID_INTEL | \
+ (MBOX_OBJ_STAGING << 16) | \
+ ((u64)DWORD_ALIGN(mbox_size) << 32))
+
+/* The size of each mailbox header */
+#define MBOX_HEADER_SIZE sizeof(u64)
+/* The size of staging hardware response */
+#define MBOX_RESPONSE_SIZE sizeof(u64)
/*
* Each microcode image is divided into chunks, each at most
@@ -57,6 +80,7 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
*/
#define MBOX_XACTION_SIZE PAGE_SIZE
#define MBOX_XACTION_MAX(imgsz) ((imgsz) * 2)
+#define MBOX_XACTION_TIMEOUT_MS (10 * MSEC_PER_SEC)
/* Current microcode patch used in early patching on the APs. */
static struct microcode_intel *ucode_patch_va __read_mostly;
@@ -345,6 +369,49 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
return size ? NULL : patch;
}
+static inline u32 read_mbox_dword(void __iomem *mmio_base)
+{
+ u32 dword = readl(mmio_base + MBOX_RDDATA_OFFSET);
+
+ /* Acknowledge read completion to the staging firmware */
+ writel(0, mmio_base + MBOX_RDDATA_OFFSET);
+ return dword;
+}
+
+static inline void write_mbox_dword(void __iomem *mmio_base, u32 dword)
+{
+ writel(dword, mmio_base + MBOX_WRDATA_OFFSET);
+}
+
+static inline u64 read_mbox_header(void __iomem *mmio_base)
+{
+ u32 high, low;
+
+ low = read_mbox_dword(mmio_base);
+ high = read_mbox_dword(mmio_base);
+
+ return ((u64)high << 32) | low;
+}
+
+static inline void write_mbox_header(void __iomem *mmio_base, u64 value)
+{
+ write_mbox_dword(mmio_base, value);
+ write_mbox_dword(mmio_base, value >> 32);
+}
+
+static void write_mbox_data(void __iomem *mmio_base, u32 *chunk, unsigned int chunk_size)
+{
+ int i;
+
+ /*
+ * The MMIO space is mapped as Uncached (UC). Each write arrives
+ * at the device as an individual transaction in program order.
+ * The device can then resemble the sequence accordingly.
+ */
+ for (i = 0; i < DWORD_ALIGN(chunk_size); i++)
+ write_mbox_dword(mmio_base, chunk[i]);
+}
+
/*
* Prepare for a new microcode transfer by resetting hardware and
* initializing software states.
@@ -392,24 +459,97 @@ static bool can_send_next_chunk(struct staging_state *ss)
return true;
}
+/*
+ * Wait for the hardware to complete a transaction.
+ * Return true on success, false on failure.
+ */
+static bool wait_for_transaction(struct staging_state *ss)
+{
+ u32 timeout, status;
+
+ /* Allow time for hardware to complete the operation: */
+ for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) {
+ msleep(1);
+
+ status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
+ /* Break out early if the hardware is ready: */
+ if (status & MASK_MBOX_STATUS_READY)
+ break;
+ }
+
+ status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
+
+ /* Check for explicit error response */
+ if (status & MASK_MBOX_STATUS_ERROR) {
+ ss->state = UCODE_ERROR;
+ return false;
+ }
+
+ /*
+ * Hardware is neither responded to the action nor
+ * signaled any error. Treat the case as timeout.
+ */
+ if (!(status & MASK_MBOX_STATUS_READY)) {
+ ss->state = UCODE_TIMEOUT;
+ return false;
+ }
+
+ ss->state = UCODE_OK;
+ return true;
+}
+
/*
* Transmit a chunk of the microcode image to the hardware.
* Return true if the chunk is processed successfully.
*/
-static bool send_data_chunk(struct staging_state *unused)
+static bool send_data_chunk(struct staging_state *ss)
{
- pr_debug_once("Staging mailbox loading code needs to be implemented.\n");
- return false;
+ u16 mbox_size = MBOX_HEADER_SIZE * 2 + ss->chunk_size;
+ u32 *chunk = ss->ucode_ptr + ss->offset;
+
+ /*
+ * Write 'request' mailbox object in the following order:
+ * - Mailbox header includes total size
+ * - Command header specifies the load operation
+ * - Data section contains a microcode chunk
+ */
+ write_mbox_header(ss->mmio_base, MBOX_HEADER(mbox_size));
+ write_mbox_header(ss->mmio_base, MBOX_CMD_LOAD);
+ write_mbox_data(ss->mmio_base, chunk, ss->chunk_size);
+ ss->bytes_sent += ss->chunk_size;
+
+ /*
+ * Notify the hardware that the mailbox is ready for processing.
+ * The staging firmware will process the request asynchronously.
+ */
+ writel(MASK_MBOX_CTRL_GO, ss->mmio_base + MBOX_CONTROL_OFFSET);
+ return wait_for_transaction(ss);
}
/*
* Retrieve the next offset from the hardware response.
* Return true if the response is valid, false otherwise.
*/
-static bool fetch_next_offset(struct staging_state *unused)
+static bool fetch_next_offset(struct staging_state *ss)
{
- pr_debug_once("Staging mailbox response handling code needs to be implemented.\n\n");
- return false;
+ const u16 mbox_size = MBOX_HEADER_SIZE + MBOX_RESPONSE_SIZE;
+
+ /* All responses begin with the same header value: */
+ WARN_ON_ONCE(read_mbox_header(ss->mmio_base) != MBOX_HEADER(mbox_size));
+
+ /*
+ * The 'response' mailbox contains two dword data:
+ * - First has next offset in microcode image
+ * - Second delivers status flag
+ */
+ ss->offset = read_mbox_dword(ss->mmio_base);
+ if (read_mbox_dword(ss->mmio_base) & MASK_MBOX_RESP_ERROR) {
+ ss->state = UCODE_ERROR;
+ return false;
+ }
+
+ ss->state = UCODE_OK;
+ return true;
}
/*
--
2.45.2
© 2016 - 2025 Red Hat, Inc.