Previously, the functions for sending microcode data and retrieving the
next offset were placeholders, as they required handling the specific
mailbox format. Implement them as following:
== Mailbox Format ==
The staging mailbox consists of two primary sections: 'header' and
'data'. While the microcode must be transferred following this format,
the actual data transfer mechanism involves reading and writing to
specific MMIO registers.
== Mailbox Data Registers ==
Unlike conventional interfaces that allocate MMIO space for each data
chunk, the staging interface features a "narrow" interface, using only
two dword-sized registers for read and write operations.
For example, if writing 2 dwords of data to a device. Typically, the
device would expose 2 dwords of "wide" MMIO space. To send the data to
the device:
writel(buf[0], io_addr + 0);
writel(buf[1], io_addr + 1);
But, this interface is a bit different. Instead of having a "wide"
interface where there is separate MMIO space for each word in a
transaction, it has a "narrow" interface where several words are written
to the same spot in MMIO space:
writel(buf[0], io_addr);
writel(buf[1], io_addr);
The same goes for the read side.
== Implementation Summary ==
Given that, introduce two layers of helper functions at first:
* Low-level helpers for reading and writing to data registers directly.
* Wrapper functions for handling mailbox header and data sections.
Using them, implement send_data_chunk() and fetch_next_offset()
functions. Add explicit error and timeout handling routine in
wait_for_transaction(), finishing up the transfer.
Note: The kernel has support for similar mailboxes. But none of them are
compatible with this one. Trying to share code resulted in a bloated
mess, so this code is standalone.
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
V2 -> V3:
* Update code to reflect the removal of a global variable (Dave).
V1 -> V2:
* Add lots of code comments and edit the changelog (Dave).
* Encapsulate register read/write operations for processing header and
data sections.
---
arch/x86/kernel/cpu/microcode/intel.c | 150 ++++++++++++++++++++++++--
1 file changed, 144 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 12910b8f9f8a..542b3abad8e3 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/io.h>
+#include <linux/pci_ids.h>
#include <asm/cpu_device_id.h>
#include <asm/processor.h>
@@ -42,8 +43,30 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
#define MBOX_CONTROL_OFFSET 0x0
#define MBOX_STATUS_OFFSET 0x4
+#define MBOX_WRDATA_OFFSET 0x8
+#define MBOX_RDDATA_OFFSET 0xc
#define MASK_MBOX_CTRL_ABORT BIT(0)
+#define MASK_MBOX_CTRL_GO BIT(31)
+
+#define MASK_MBOX_STATUS_ERROR BIT(2)
+#define MASK_MBOX_STATUS_READY BIT(31)
+
+#define MASK_MBOX_RESP_SUCCESS BIT(0)
+#define MASK_MBOX_RESP_PROGRESS BIT(1)
+#define MASK_MBOX_RESP_ERROR BIT(2)
+
+#define MBOX_CMD_LOAD 0x3
+#define MBOX_OBJ_STAGING 0xb
+#define DWORD_ALIGN(size) ((size) / sizeof(u32))
+#define MBOX_HEADER(mbox_size) (PCI_VENDOR_ID_INTEL | \
+ (MBOX_OBJ_STAGING << 16) | \
+ ((u64)DWORD_ALIGN(mbox_size) << 32))
+
+/* The size of each mailbox header */
+#define MBOX_HEADER_SIZE sizeof(u64)
+/* The size of staging hardware response */
+#define MBOX_RESPONSE_SIZE sizeof(u64)
/*
* Each microcode image is divided into chunks, each at most
@@ -57,6 +80,7 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
*/
#define MBOX_XACTION_SIZE PAGE_SIZE
#define MBOX_XACTION_MAX(imgsz) ((imgsz) * 2)
+#define MBOX_XACTION_TIMEOUT_MS (10 * MSEC_PER_SEC)
/* Current microcode patch used in early patching on the APs. */
static struct microcode_intel *ucode_patch_va __read_mostly;
@@ -345,6 +369,49 @@ static __init struct microcode_intel *scan_microcode(void *data, size_t size,
return size ? NULL : patch;
}
+static inline u32 read_mbox_dword(void __iomem *mmio_base)
+{
+ u32 dword = readl(mmio_base + MBOX_RDDATA_OFFSET);
+
+ /* Acknowledge read completion to the staging hardware */
+ writel(0, mmio_base + MBOX_RDDATA_OFFSET);
+ return dword;
+}
+
+static inline void write_mbox_dword(void __iomem *mmio_base, u32 dword)
+{
+ writel(dword, mmio_base + MBOX_WRDATA_OFFSET);
+}
+
+static inline u64 read_mbox_header(void __iomem *mmio_base)
+{
+ u32 high, low;
+
+ low = read_mbox_dword(mmio_base);
+ high = read_mbox_dword(mmio_base);
+
+ return ((u64)high << 32) | low;
+}
+
+static inline void write_mbox_header(void __iomem *mmio_base, u64 value)
+{
+ write_mbox_dword(mmio_base, value);
+ write_mbox_dword(mmio_base, value >> 32);
+}
+
+static void write_mbox_data(void __iomem *mmio_base, u32 *chunk, unsigned int chunk_size)
+{
+ int i;
+
+ /*
+ * The MMIO space is mapped as Uncached (UC). Each write arrives
+ * at the device as an individual transaction in program order.
+ * The device can then resemble the sequence accordingly.
+ */
+ for (i = 0; i < DWORD_ALIGN(chunk_size); i++)
+ write_mbox_dword(mmio_base, chunk[i]);
+}
+
/*
* Prepare for a new microcode transfer by resetting hardware and
* configuring microcode image info.
@@ -388,15 +455,71 @@ static bool can_send_next_chunk(struct staging_state *ss)
return true;
}
+/*
+ * Wait for the hardware to complete a transaction.
+ * Return true on success, false on failure.
+ */
+static bool wait_for_transaction(struct staging_state *ss)
+{
+ u32 timeout, status;
+
+ /* Allow time for hardware to complete the operation: */
+ for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) {
+ msleep(1);
+
+ status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
+ /* Break out early if the hardware is ready: */
+ if (status & MASK_MBOX_STATUS_READY)
+ break;
+ }
+
+ status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
+
+ /* Check for explicit error response */
+ if (status & MASK_MBOX_STATUS_ERROR) {
+ ss->state = UCODE_ERROR;
+ return false;
+ }
+
+ /*
+ * Hardware is neither responded to the action nor
+ * signaled any error. Treat the case as timeout.
+ */
+ if (!(status & MASK_MBOX_STATUS_READY)) {
+ ss->state = UCODE_TIMEOUT;
+ return false;
+ }
+
+ ss->state = UCODE_OK;
+ return true;
+}
+
/*
* Transmit a chunk of the microcode image to the hardware.
* Return true if the chunk is processed successfully.
*/
static bool send_data_chunk(struct staging_state *ss)
{
- pr_debug_once("Staging mailbox loading code needs to be implemented.\n");
- ss->state = UCODE_ERROR;
- return false;
+ u16 mbox_size = MBOX_HEADER_SIZE * 2 + ss->chunk_size;
+ u32 *chunk = ss->ucode_ptr + ss->offset;
+
+ /*
+ * Write 'request' mailbox object in the following order:
+ * - Mailbox header includes total size
+ * - Command header specifies the load operation
+ * - Data section contains a microcode chunk
+ */
+ write_mbox_header(ss->mmio_base, MBOX_HEADER(mbox_size));
+ write_mbox_header(ss->mmio_base, MBOX_CMD_LOAD);
+ write_mbox_data(ss->mmio_base, chunk, ss->chunk_size);
+ ss->bytes_sent += ss->chunk_size;
+
+ /*
+ * Notify the hardware that the mailbox is ready for processing.
+ * The staging hardware will process the request asynchronously.
+ */
+ writel(MASK_MBOX_CTRL_GO, ss->mmio_base + MBOX_CONTROL_OFFSET);
+ return wait_for_transaction(ss);
}
/*
@@ -405,9 +528,24 @@ static bool send_data_chunk(struct staging_state *ss)
*/
static bool fetch_next_offset(struct staging_state *ss)
{
- pr_debug_once("Staging mailbox response handling code needs to be implemented.\n\n");
- ss->state = UCODE_ERROR;
- return false;
+ const u16 mbox_size = MBOX_HEADER_SIZE + MBOX_RESPONSE_SIZE;
+
+ /* All responses begin with the same header value: */
+ WARN_ON_ONCE(read_mbox_header(ss->mmio_base) != MBOX_HEADER(mbox_size));
+
+ /*
+ * The 'response' mailbox contains two dword data:
+ * - First has next offset in microcode image
+ * - Second delivers status flag
+ */
+ ss->offset = read_mbox_dword(ss->mmio_base);
+ if (read_mbox_dword(ss->mmio_base) & MASK_MBOX_RESP_ERROR) {
+ ss->state = UCODE_ERROR;
+ return false;
+ }
+
+ ss->state = UCODE_OK;
+ return true;
}
/*
--
2.45.2
>+/*
>+ * Wait for the hardware to complete a transaction.
>+ * Return true on success, false on failure.
>+ */
>+static bool wait_for_transaction(struct staging_state *ss)
>+{
>+ u32 timeout, status;
>+
>+ /* Allow time for hardware to complete the operation: */
>+ for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) {
>+ msleep(1);
>+
>+ status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
>+ /* Break out early if the hardware is ready: */
>+ if (status & MASK_MBOX_STATUS_READY)
>+ break;
Shouldn't we exit the loop if the MASK_MBOX_STATUS_ERROR is set, or is the
ERROR bit always set in conjunction with the READY bit?
>+ }
>+
>+ status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
I still think this read is not needed.
>+
>+ /* Check for explicit error response */
>+ if (status & MASK_MBOX_STATUS_ERROR) {
>+ ss->state = UCODE_ERROR;
>+ return false;
>+ }
>+
>+ /*
>+ * Hardware is neither responded to the action nor
>+ * signaled any error. Treat the case as timeout.
>+ */
>+ if (!(status & MASK_MBOX_STATUS_READY)) {
>+ ss->state = UCODE_TIMEOUT;
>+ return false;
>+ }
>+
>+ ss->state = UCODE_OK;
>+ return true;
>+}
On 4/16/2025 7:14 AM, Chao Gao wrote:
>> +/*
>> + * Wait for the hardware to complete a transaction.
>> + * Return true on success, false on failure.
>> + */
>> +static bool wait_for_transaction(struct staging_state *ss)
>> +{
>> + u32 timeout, status;
>> +
>> + /* Allow time for hardware to complete the operation: */
>> + for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) {
>> + msleep(1);
>> +
>> + status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
>> + /* Break out early if the hardware is ready: */
>> + if (status & MASK_MBOX_STATUS_READY)
>> + break;
>
> Shouldn't we exit the loop if the MASK_MBOX_STATUS_ERROR is set, or is the
> ERROR bit always set in conjunction with the READY bit?
>
>> + }
>> +
>> + status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
>
> I still think this read is not needed.
No, if it times out, it could exit without having read the status.
But this is a slow path — instead of trying to save a few cycles, I
wanted to present the logic more clearly:
* Give the hardware enough time, but not forever.
* Oh, we don't need to wait anymore if it's done
* After waiting, check the status and handle it properly.
Isn’t it clearer to read the status after the wait, right before the
error handling?
Also, reading the status inside the loop is just a way to determine
whether to break — not to interpret or act on it. That should come after
the wait completes.
On 4/16/25 10:22, Chang S. Bae wrote:
> On 4/16/2025 7:14 AM, Chao Gao wrote:
>>> +/*
>>> + * Wait for the hardware to complete a transaction.
>>> + * Return true on success, false on failure.
>>> + */
>>> +static bool wait_for_transaction(struct staging_state *ss)
>>> +{
>>> + u32 timeout, status;
>>> +
>>> + /* Allow time for hardware to complete the operation: */
>>> + for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) {
>>> + msleep(1);
>>> +
>>> + status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
>>> + /* Break out early if the hardware is ready: */
>>> + if (status & MASK_MBOX_STATUS_READY)
>>> + break;
>>
>> Shouldn't we exit the loop if the MASK_MBOX_STATUS_ERROR is set, or is
>> the
>> ERROR bit always set in conjunction with the READY bit?
>>
>>> + }
>>> +
>>> + status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
>>
>> I still think this read is not needed.
>
> No, if it times out, it could exit without having read the status.
Can it? There are only two ways out of the loop: the explicit break and
the implicit break from the 'timeout' check. Both happen at the "end" of
the loop, unless the timeout< check failed up front:
for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++){
msleep(1);
status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
if (status & MASK_MBOX_STATUS_READY)
break;
}
> But this is a slow path — instead of trying to save a few cycles, I
> wanted to present the logic more clearly:
>
> * Give the hardware enough time, but not forever.
> * Oh, we don't need to wait anymore if it's done
>
> * After waiting, check the status and handle it properly.
>
> Isn’t it clearer to read the status after the wait, right before the
> error handling?
It's simpler to read the code the way you have it. You don't have to
consider if 'status' could be uninitialized. It makes the "timeout loop"
*COMPLETELY* independent from the rest of the function. It could be:
foo()
{
spin_until_ready();
status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
...
}
for example.
It's less likely to break, too. Let's say someone added a new break:
for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++){
+ if (some_other_condition());
+ break;
msleep(1);
status = readl(ss->mmio_base + MBOX_STATUS_OFFSET);
if (status & MASK_MBOX_STATUS_READY)
break;
}
you'd have a bug on your hands.
I'm fine with the way you have it. I probably would have written it that
way too.
© 2016 - 2025 Red Hat, Inc.