Implement the iommu_pri_request_page IOMMU operation
and the behavior when receiving a page group response descriptor
Signed-off-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
---
hw/i386/intel_iommu.c | 235 +++++++++++++++++++++++++++++++++
hw/i386/intel_iommu_internal.h | 2 +
2 files changed, 237 insertions(+)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 899655928d..dcc92aae58 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -51,6 +51,8 @@
((ce)->val[1] & VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK)
#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
+#define VTD_CE_GET_PRE(ce) \
+ ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
@@ -1922,6 +1924,7 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_PASID_ENTRY_P] = true,
[VTD_FR_PASID_TABLE_ENTRY_INV] = true,
[VTD_FR_SM_INTERRUPT_ADDR] = true,
+ [VTD_FR_SM_PRE_ABS] = true,
[VTD_FR_FS_NON_CANONICAL] = true,
[VTD_FR_FS_BIT_UPDATE_FAILED] = true,
[VTD_FR_MAX] = false,
@@ -4379,6 +4382,45 @@ static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
return true;
}
+static bool vtd_process_page_group_response_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ VTDAddressSpace *vtd_dev_as;
+
+ if ((inv_desc->lo & VTD_INV_DESC_PGRESP_RSVD_LO) ||
+ (inv_desc->hi & VTD_INV_DESC_PGRESP_RSVD_HI)) {
+ error_report_once("%s: invalid page group response desc: hi=%"PRIx64
+ ", lo=%"PRIx64" (reserved nonzero)", __func__,
+ inv_desc->hi, inv_desc->lo);
+ return false;
+ }
+
+ bool pasid_present = VTD_INV_DESC_PGRESP_PP(inv_desc->lo);
+ uint8_t response_code = VTD_INV_DESC_PGRESP_RC(inv_desc->lo);
+ uint16_t rid = VTD_INV_DESC_PGRESP_RID(inv_desc->lo);
+ uint32_t pasid = VTD_INV_DESC_PGRESP_PASID(inv_desc->lo);
+ uint16_t prgi = VTD_INV_DESC_PGRESP_PRGI(inv_desc->hi);
+
+ if (!pasid_present) {
+ error_report_once("Page group response without PASID is"
+ "not supported yet");
+ return false;
+ }
+
+ vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, rid, pasid);
+ if (!vtd_dev_as) {
+ return true;
+ }
+
+ IOMMUPRIResponse response = {
+ .prgi = prgi,
+ .response_code = response_code
+ };
+ memory_region_notify_pri_iommu(&vtd_dev_as->iommu, &response);
+
+ return true;
+}
+
static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
@@ -4486,6 +4528,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ case VTD_INV_DESC_PGRESP:
+ trace_vtd_inv_desc("page group response", inv_desc.hi, inv_desc.lo);
+ if (!vtd_process_page_group_response_desc(s, &inv_desc)) {
+ return false;
+ }
+ break;
+
default:
error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
" (unknown type)", __func__, inv_desc.hi,
@@ -6138,6 +6187,191 @@ static uint64_t vtd_get_min_page_size(IOMMUMemoryRegion *iommu)
return VTD_PAGE_SIZE;
}
+/* 11.4.11.3 : The number of entries in the page request queue is 2^(PQS + 7) */
+static inline uint64_t vtd_prq_size(IntelIOMMUState *s)
+{
+ return 1ULL << ((vtd_get_quad(s, DMAR_PQA_REG) & VTD_PQA_SIZE) + 7);
+}
+
+/**
+ * Return true if the bit is accessible and correctly set, false otherwise
+ */
+static bool vtd_check_pre_bit(VTDAddressSpace *vtd_as, hwaddr addr,
+ uint16_t sid, bool is_write)
+{
+ int ret;
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ uint8_t bus_n = pci_bus_num(vtd_as->bus);
+ VTDContextEntry ce;
+ bool is_fpd_set = false;
+
+ ret = vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce);
+
+ if (ret) {
+ goto error_report;
+ }
+
+ if (!VTD_CE_GET_PRE(&ce)) {
+ ret = -VTD_FR_SM_PRE_ABS;
+ goto error_get_fpd_and_report;
+ }
+
+ return true;
+
+error_get_fpd_and_report:
+ /* Try to get fpd (may not work but we are already on an error path) */
+ is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+ vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
+error_report:
+ vtd_report_fault(s, -ret, is_fpd_set, sid, addr, is_write,
+ vtd_as->pasid != PCI_NO_PASID, vtd_as->pasid);
+ return false;
+}
+
+/* Logic described in section 7.5 */
+static void vtd_generate_page_request_event(IntelIOMMUState *s,
+ uint32_t old_pr_status)
+{
+ uint32_t current_pectl = vtd_get_long(s, DMAR_PECTL_REG);
+ /*
+ * Hardware evaluates PPR and PRO fields in the Page Request Status Register
+ * and if any of them is set, Page Request Event is not generated
+ */
+ if (old_pr_status & (VTD_PR_STATUS_PRO | VTD_PR_STATUS_PPR)) {
+ return;
+ }
+
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, 0, VTD_PR_PECTL_IP);
+ if (!(current_pectl & VTD_PR_PECTL_IM)) {
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0);
+ vtd_generate_interrupt(s, DMAR_PEADDR_REG, DMAR_PEDATA_REG);
+ }
+}
+
+/* When calling this function, we known that we are in scalable mode */
+static int vtd_pri_perform_implicit_invalidation(VTDAddressSpace *vtd_as,
+ hwaddr addr)
+{
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ VTDContextEntry ce;
+ VTDPASIDEntry pe;
+ uint16_t pgtt;
+ uint16_t domain_id;
+ int ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+ vtd_as->devfn, &ce);
+ if (ret) {
+ return -EINVAL;
+ }
+ ret = vtd_ce_get_rid2pasid_entry(s, &ce, &pe, vtd_as->pasid);
+ if (ret) {
+ return -EINVAL;
+ }
+ pgtt = VTD_PE_GET_TYPE(&pe);
+ domain_id = VTD_SM_PASID_ENTRY_DID(pe.val[1]);
+ ret = 0;
+ switch (pgtt) {
+ case VTD_SM_PASID_ENTRY_FLT:
+ vtd_piotlb_page_invalidate(s, domain_id, vtd_as->pasid, addr, 0, false);
+ break;
+ /* Room for other pgtt values */
+ default:
+ error_report_once("Translation type not supported yet : %d", pgtt);
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/* Page Request Descriptor : 7.4.1.1 */
+static int vtd_iommu_pri_request_page(IOMMUMemoryRegion *iommu, hwaddr addr,
+ bool lpig, uint16_t prgi, bool is_read,
+ bool is_write, bool exec_req,
+ bool priv_req)
+{
+ VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ uint64_t queue_addr_reg = vtd_get_quad(s, DMAR_PQA_REG);
+ uint64_t queue_tail_offset_reg = vtd_get_quad(s, DMAR_PQT_REG);
+ uint64_t new_queue_tail_offset = (
+ (queue_tail_offset_reg + VTD_PQA_ENTRY_SIZE) %
+ (vtd_prq_size(s) * VTD_PQA_ENTRY_SIZE));
+ uint64_t queue_head_offset_reg = vtd_get_quad(s, DMAR_PQH_REG);
+ hwaddr queue_tail = (queue_addr_reg & VTD_PQA_ADDR) + queue_tail_offset_reg;
+ uint32_t old_pr_status = vtd_get_long(s, DMAR_PRS_REG);
+ uint16_t sid = PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn);
+ VTDPRDesc desc;
+
+ if (!(s->ecap & VTD_ECAP_PRS)) {
+ return -EPERM;
+ }
+
+ /*
+ * No need to check if scalable mode is enabled as we already known that
+ * VTD_ECAP_PRS is set (see vtd_decide_config)
+ */
+
+ /* We do not support PRI without PASID */
+ if (vtd_as->pasid == PCI_NO_PASID) {
+ return -EPERM;
+ }
+ if (exec_req && !is_read) {
+ return -EINVAL;
+ }
+
+ /* Check PRE bit in the scalable mode context entry */
+ if (!vtd_check_pre_bit(vtd_as, addr, sid, is_write)) {
+ return -EPERM;
+ }
+
+ if (old_pr_status & VTD_PR_STATUS_PRO) {
+ /*
+ * No action is taken by hardware to report a fault
+ * or generate an event
+ */
+ return -ENOSPC;
+ }
+
+ /* Check for overflow */
+ if (new_queue_tail_offset == queue_head_offset_reg) {
+ vtd_set_clear_mask_long(s, DMAR_PRS_REG, 0, VTD_PR_STATUS_PRO);
+ vtd_generate_page_request_event(s, old_pr_status);
+ return -ENOSPC;
+ }
+
+ if (vtd_pri_perform_implicit_invalidation(vtd_as, addr)) {
+ return -EINVAL;
+ }
+
+ desc.lo = VTD_PRD_TYPE | VTD_PRD_PP(true) | VTD_PRD_RID(sid) |
+ VTD_PRD_PASID(vtd_as->pasid) | VTD_PRD_PMR(priv_req);
+ desc.hi = VTD_PRD_RDR(is_read) | VTD_PRD_WRR(is_write) |
+ VTD_PRD_LPIG(lpig) | VTD_PRD_PRGI(prgi) | VTD_PRD_ADDR(addr);
+
+ desc.lo = cpu_to_le64(desc.lo);
+ desc.hi = cpu_to_le64(desc.hi);
+ if (dma_memory_write(&address_space_memory, queue_tail, &desc, sizeof(desc),
+ MEMTXATTRS_UNSPECIFIED)) {
+ error_report_once("IO error, the PQ tail cannot be updated");
+ return -EIO;
+ }
+
+ /* increment the tail register and set the pending request bit */
+ vtd_set_quad(s, DMAR_PQT_REG, new_queue_tail_offset);
+ /*
+ * read status again so that the kernel does not miss a request.
+ * in some cases, we can trigger an unecessary interrupt but this strategy
+ * drastically improves performance as we don't need to take a lock.
+ */
+ old_pr_status = vtd_get_long(s, DMAR_PRS_REG);
+ if (!(old_pr_status & VTD_PR_STATUS_PPR)) {
+ vtd_set_clear_mask_long(s, DMAR_PRS_REG, 0, VTD_PR_STATUS_PPR);
+ vtd_generate_page_request_event(s, old_pr_status);
+ }
+
+ return 0;
+}
+
static PCIIOMMUOps vtd_iommu_ops = {
.get_address_space = vtd_host_dma_iommu,
.get_address_space_pasid = vtd_host_dma_iommu_pasid,
@@ -6346,6 +6580,7 @@ static void vtd_iommu_memory_region_class_init(ObjectClass *klass,
imrc->replay = vtd_iommu_replay;
imrc->iommu_ats_request_translation = vtd_iommu_ats_request_translation;
imrc->get_min_page_size = vtd_get_min_page_size;
+ imrc->iommu_pri_request_page = vtd_iommu_pri_request_page;
}
static const TypeInfo vtd_iommu_memory_region_info = {
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 9e01251335..b2314c6a95 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -319,6 +319,8 @@ typedef enum VTDFaultReason {
* request while disabled */
VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */
+ VTD_FR_SM_PRE_ABS = 0x47, /* SCT.8 : PRE bit in a present SM CE is 0 */
+
VTD_FR_RTADDR_INV_TTM = 0x31, /* Invalid TTM in RTADDR */
/* PASID directory entry access failure */
VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
--
2.45.1
© 2016 - 2024 Red Hat, Inc.