GHCI spec for TDX 1.0 says that the MapGPA call may fail with the R10
error code = TDG.VP.VMCALL_RETRY (1), and the guest must retry this
operation for the pages in the region starting at the GPA specified
in R11.
When a TDX guest runs on Hyper-V, Hyper-V returns the retry error
when hyperv_init() -> swiotlb_update_mem_attributes() ->
set_memory_decrypted() decrypts up to 1GB of swiotlb bounce buffers.
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
Changes in v2:
Used __tdx_hypercall() directly in tdx_map_gpa().
Added a max_retry_cnt of 1000.
Renamed a few variables, e.g., r11 -> map_fail_paddr.
Changes in v3:
Changed max_retry_cnt from 1000 to 3.
Changes in v4:
__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT) -> __tdx_hypercall_ret()
Added Kirill's Acked-by.
Changes in v5:
Added Michael's Reviewed-by.
Changes in v6: None.
arch/x86/coco/tdx/tdx.c | 64 +++++++++++++++++++++++++++++++++--------
1 file changed, 52 insertions(+), 12 deletions(-)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 4c4c6db39eca..5574c91541a2 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -28,6 +28,8 @@
#define TDVMCALL_MAP_GPA 0x10001
#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
+#define TDVMCALL_STATUS_RETRY 1
+
/* MMIO direction */
#define EPT_READ 0
#define EPT_WRITE 1
@@ -788,14 +790,15 @@ static bool try_accept_one(phys_addr_t *start, unsigned long len,
}
/*
- * Inform the VMM of the guest's intent for this physical page: shared with
- * the VMM or private to the guest. The VMM is expected to change its mapping
- * of the page in response.
+ * Notify the VMM about page mapping conversion. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface (GHCI),
+ * section "TDG.VP.VMCALL<MapGPA>".
*/
-static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
{
- phys_addr_t start = __pa(vaddr);
- phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
+ int max_retry_cnt = 3, retry_cnt = 0;
+ struct tdx_hypercall_args args;
+ u64 map_fail_paddr, ret;
if (!enc) {
/* Set the shared (decrypted) bits: */
@@ -803,12 +806,49 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
end |= cc_mkdec(0);
}
- /*
- * Notify the VMM about page mapping conversion. More info about ABI
- * can be found in TDX Guest-Host-Communication Interface (GHCI),
- * section "TDG.VP.VMCALL<MapGPA>"
- */
- if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
+ while (1) {
+ memset(&args, 0, sizeof(args));
+ args.r10 = TDX_HYPERCALL_STANDARD;
+ args.r11 = TDVMCALL_MAP_GPA;
+ args.r12 = start;
+ args.r13 = end - start;
+
+ ret = __tdx_hypercall_ret(&args);
+ if (ret != TDVMCALL_STATUS_RETRY)
+ break;
+ /*
+ * The guest must retry the operation for the pages in the
+ * region starting at the GPA specified in R11. Make sure R11
+ * contains a sane value.
+ */
+ map_fail_paddr = args.r11;
+ if (map_fail_paddr < start || map_fail_paddr >= end)
+ return false;
+
+ if (map_fail_paddr == start) {
+ retry_cnt++;
+ if (retry_cnt > max_retry_cnt)
+ return false;
+ } else {
+ retry_cnt = 0;
+ start = map_fail_paddr;
+ }
+ }
+
+ return !ret;
+}
+
+/*
+ * Inform the VMM of the guest's intent for this physical page: shared with
+ * the VMM or private to the guest. The VMM is expected to change its mapping
+ * of the page in response.
+ */
+static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+{
+ phys_addr_t start = __pa(vaddr);
+ phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
+
+ if (!tdx_map_gpa(start, end, enc))
return false;
/* private->shared conversion requires only MapGPA call */
--
2.25.1
On 5/4/23 15:53, Dexuan Cui wrote:
> - if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
> + while (1) {
> + memset(&args, 0, sizeof(args));
> + args.r10 = TDX_HYPERCALL_STANDARD;
> + args.r11 = TDVMCALL_MAP_GPA;
> + args.r12 = start;
> + args.r13 = end - start;
> +
> + ret = __tdx_hypercall_ret(&args);
> + if (ret != TDVMCALL_STATUS_RETRY)
> + break;
> + /*
> + * The guest must retry the operation for the pages in the
> + * region starting at the GPA specified in R11. Make sure R11
> + * contains a sane value.
> + */
> + map_fail_paddr = args.r11;
> + if (map_fail_paddr < start || map_fail_paddr >= end)
> + return false;
This should probably also say: "r11" comes from the untrusted VMM.
Sanity check it.
Should this *really* be "map_fail_paddr >= end"? Or is "map_fail_paddr
> end" sufficient. In other words, is it really worth failing this if a
VMM said to retry a 0-byte region at the end?
> + if (map_fail_paddr == start) {
> + retry_cnt++;
> + if (retry_cnt > max_retry_cnt)
I think we can spare two bytes in a few spots to make these 'count'
instead of 'cnt'.
> + return false;
> + } else {
> + retry_cnt = 0;
> + start = map_fail_paddr;
> + }
> + }
this fails the "normal operation should be at the lowest indentation"
rule. How about this:
while (retry_count < max_retries) {
...
/* "Consume" a retry without forward progress: */
if (map_fail_paddr == start) {
retry_count++;
continue;
}
start = map_fail_paddr;
retry_count = 0;
}
// plus maybe a wee bit different 'ret' processing
'max_retries' also ends up being a misnomer. You can have as many
retries as there are pages plus 'max_retries'. It's really "maximum
allowed consecutive failures". Maybe it should be "max_retries_per_page".
> From: Dave Hansen <dave.hansen@intel.com>
> Sent: Tuesday, May 23, 2023 2:13 PM
> ...
> On 5/4/23 15:53, Dexuan Cui wrote:
> > - if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
> > + while (1) {
> > + memset(&args, 0, sizeof(args));
> > + args.r10 = TDX_HYPERCALL_STANDARD;
> > + args.r11 = TDVMCALL_MAP_GPA;
> > + args.r12 = start;
> > + args.r13 = end - start;
> > +
> > + ret = __tdx_hypercall_ret(&args);
> > + if (ret != TDVMCALL_STATUS_RETRY)
> > + break;
> > + /*
> > + * The guest must retry the operation for the pages in the
> > + * region starting at the GPA specified in R11. Make sure R11
> > + * contains a sane value.
> > + */
> > + map_fail_paddr = args.r11;
> > + if (map_fail_paddr < start || map_fail_paddr >= end)
> > + return false;
>
> This should probably also say: "r11" comes from the untrusted VMM.
> Sanity check it.
Thanks! I'll use the wording you recommended in the next version.
> Should this *really* be "map_fail_paddr >= end"? Or is
> "map_fail_paddr > end" sufficient. In other words, is it really
> worth failing this if a VMM said to retry a 0-byte region at the end?
According to the GHCI spec, R13 "must be a multiple of 4KB". My
understanding is that R13 should not be 0, and a hypervisor is not
supposed to tell the guest to retry a 0-byte region at the end.
IMHO it should be a hypervisor bug if a hypervisor returns
TDVMCALL_STATUS_RETRY and the returned 'map_fail_paddr' equals
'end' (Note: the valid page range is [start, end - 1]).
Hyper-V returns "invalid parameter" if the length (i.e. args.r13) is 0,
so "retry a 0-byte region at the end" would fail anyway. I guess
other hypervisors may also return an error if the length is 0.
So I'd like to keep the comparison as-is.
> > + if (map_fail_paddr == start) {
> > + retry_cnt++;
> > + if (retry_cnt > max_retry_cnt)
>
> I think we can spare two bytes in a few spots to make these 'count'
> instead of 'cnt'.
Ok, I'll rename the variable 'max_retry_cnt' to 'max_retries_per_page',
and 'retry_cnt' to 'retry_count'.
> > + return false;
> > + } else {
> > + retry_cnt = 0;
> > + start = map_fail_paddr;
> > + }
> > + }
>
> this fails the "normal operation should be at the lowest indentation"
> rule. How about this:
>
> while (retry_count < max_retries) {
> ...
>
> /* "Consume" a retry without forward progress: */
> if (map_fail_paddr == start) {
> retry_count++;
> continue;
> }
>
> start = map_fail_paddr;
> retry_count = 0;
> }
>
> // plus maybe a wee bit different 'ret' processing
>
>
> 'max_retries' also ends up being a misnomer. You can have as many
> retries as there are pages plus 'max_retries'. It's really "maximum
> allowed consecutive failures". Maybe it should be "max_retries_per_page".
Thanks, I'll raname 'max_retries" to 'max_retries_per_page'.
I'll use the beow in the next version.
I added "const" to "int max_retries_per_page".
Please let me know if I missed anything.
+static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
+{
+ const int max_retries_per_page = 3;
+ struct tdx_hypercall_args args;
+ u64 map_fail_paddr, ret;
+ int retry_count = 0;
+
+ if (!enc) {
+ /* Set the shared (decrypted) bits: */
+ start |= cc_mkdec(0);
+ end |= cc_mkdec(0);
+ }
+
+ while (retry_count < max_retries_per_page) {
+ memset(&args, 0, sizeof(args));
+ args.r10 = TDX_HYPERCALL_STANDARD;
+ args.r11 = TDVMCALL_MAP_GPA;
+ args.r12 = start;
+ args.r13 = end - start;
+
+ ret = __tdx_hypercall_ret(&args);
+ if (ret != TDVMCALL_STATUS_RETRY)
+ return !ret;
+ /*
+ * The guest must retry the operation for the pages in the
+ * region starting at the GPA specified in R11. R11 comes
+ * from the untrusted VMM. Sanity check it.
+ */
+ map_fail_paddr = args.r11;
+ if (map_fail_paddr < start || map_fail_paddr >= end)
+ return false;
+
+ /* "Consume" a retry without forward progress */
+ if (map_fail_paddr == start) {
+ retry_count++;
+ continue;
+ }
+
+ start = map_fail_paddr;
+ retry_count = 0;
+ }
+
+ return false;
+}
© 2016 - 2026 Red Hat, Inc.