[RFC PATCH] intel-iommu: Report interrupt remapping faults

David Woodhouse posted 1 patch 1 year, 1 month ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/825854379f8d3b2f6e021f31fb117daab023c8c8.camel@infradead.org
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Peter Xu <peterx@redhat.com>, Jason Wang <jasowang@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>
hw/i386/intel_iommu.c          | 115 +++++++++++++++++++++++++--------
hw/i386/intel_iommu_internal.h |   1 +
2 files changed, 89 insertions(+), 27 deletions(-)
[RFC PATCH] intel-iommu: Report interrupt remapping faults
Posted by David Woodhouse 1 year, 1 month ago
From: David Woodhouse <dwmw@amazon.co.uk>

There is more work to be done here, as pretranslations for the KVM IRQ
routing table can't fault yet; they should be handled in userspace and
the fault raised only when the IRQ actually happens (if indeed the IRTE
is still not valid at that time). But we can work on that later; we can
at least raise faults for the direct case.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---

Seemed like a good place to start.

Utterly untested yet except for building it. Do we have unit tests for
this; anything which will deliberately cause DMA faults that I can
extend to also do IR faults? Or should I resort to just hacking a Linux
kernel to do things wrong?

Also, why does the generic X86IOMMUClass->int_remap function return
VTD-specific values? Shouldn't it just return true or false, or an
actual error from the system errno space?

I also think we're allowing Compatibility Format MSIs when we shouldn't
(when GSTS_CFIS is clear); the reporting of VTD_FR_IR_REQ_COMPAT is
conspicuous in its absence. But I can fix that in a separate commit.


 hw/i386/intel_iommu.c          | 115 +++++++++++++++++++++++++--------
 hw/i386/intel_iommu_internal.h |   1 +
 2 files changed, 89 insertions(+), 27 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index faade7def8..946f6008fe 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -468,21 +468,12 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
 
 /* Must not update F field now, should be done later */
 static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
-                            uint16_t source_id, hwaddr addr,
-                            VTDFaultReason fault, bool is_write,
-                            bool is_pasid, uint32_t pasid)
+                            uint64_t hi, uint64_t lo)
 {
-    uint64_t hi = 0, lo;
     hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
 
     assert(index < DMAR_FRCD_REG_NR);
 
-    lo = VTD_FRCD_FI(addr);
-    hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) |
-         VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid);
-    if (!is_write) {
-        hi |= VTD_FRCD_T;
-    }
     vtd_set_quad_raw(s, frcd_reg_addr, lo);
     vtd_set_quad_raw(s, frcd_reg_addr + 8, hi);
 
@@ -508,17 +499,11 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
 }
 
 /* Log and report an DMAR (address translation) fault to software */
-static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
-                                  hwaddr addr, VTDFaultReason fault,
-                                  bool is_write, bool is_pasid,
-                                  uint32_t pasid)
+static void vtd_report_frcd_fault(IntelIOMMUState *s, uint64_t source_id,
+                                  uint64_t hi, uint64_t lo)
 {
     uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
 
-    assert(fault < VTD_FR_MAX);
-
-    trace_vtd_dmar_fault(source_id, fault, addr, is_write);
-
     if (fsts_reg & VTD_FSTS_PFO) {
         error_report_once("New fault is not recorded due to "
                           "Primary Fault Overflow");
@@ -538,8 +523,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
         return;
     }
 
-    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault,
-                    is_write, is_pasid, pasid);
+    vtd_record_frcd(s, s->next_frcd_reg, hi, lo);
 
     if (fsts_reg & VTD_FSTS_PPF) {
         error_report_once("There are pending faults already, "
@@ -564,6 +548,42 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
     }
 }
 
+/* Log and report an DMAR (address translation) fault to software */
+static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
+                                  hwaddr addr, VTDFaultReason fault,
+                                  bool is_write, bool is_pasid,
+                                  uint32_t pasid)
+{
+    uint64_t hi, lo;
+
+    assert(fault < VTD_FR_MAX);
+
+    trace_vtd_dmar_fault(source_id, fault, addr, is_write);
+
+    lo = VTD_FRCD_FI(addr);
+    hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) |
+         VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid);
+    if (!is_write) {
+        hi |= VTD_FRCD_T;
+    }
+
+    vtd_report_frcd_fault(s, source_id, hi, lo);
+}
+
+
+static void vtd_report_ir_fault(IntelIOMMUState *s, uint64_t source_id,
+                                VTDFaultReason fault, uint16_t index)
+{
+    uint64_t hi, lo;
+
+    lo = VTD_FRCD_IR_IDX(index);
+    hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
+
+    vtd_report_frcd_fault(s, source_id, hi, lo);
+}
+
+#define log_irte_fault(f, sid, i) vtd_report_ir_fault(iommu, sid, f, i)
+
 /* Handle Invalidation Queue Errors of queued invalidation interface error
  * conditions.
  */
@@ -3300,7 +3320,8 @@ static Property vtd_properties[] = {
 
 /* Read IRTE entry with specific index */
 static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
-                        VTD_IR_TableEntry *entry, uint16_t sid)
+                        VTD_IR_TableEntry *entry, uint16_t sid,
+                        bool do_fault)
 {
     static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
         {0xffff, 0xfffb, 0xfff9, 0xfff8};
@@ -3311,6 +3332,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
     if (index >= iommu->intr_size) {
         error_report_once("%s: index too large: ind=0x%x",
                           __func__, index);
+        if (do_fault) {
+            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_INDEX_OVER, index);
+        }
         return -VTD_FR_IR_INDEX_OVER;
     }
 
@@ -3319,17 +3343,33 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
                         entry, sizeof(*entry), MEMTXATTRS_UNSPECIFIED)) {
         error_report_once("%s: read failed: ind=0x%x addr=0x%" PRIx64,
                           __func__, index, addr);
+        if (do_fault) {
+            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ROOT_INVAL, index);
+        }
         return -VTD_FR_IR_ROOT_INVAL;
     }
 
     trace_vtd_ir_irte_get(index, le64_to_cpu(entry->data[1]),
                           le64_to_cpu(entry->data[0]));
 
+	/*
+	 * The remaining potential fault conditions are "qualified" by the
+	 * Fault Processing Disable bit in the IRTE. Even "not present".
+	 * So just clear the do_fault flag if PFD is set, which will
+	 * prevent faults being raised.
+	 */
+	if (entry->irte.fault_disable) {
+		do_fault = false;
+    }
+
     if (!entry->irte.present) {
         error_report_once("%s: detected non-present IRTE "
                           "(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")",
                           __func__, index, le64_to_cpu(entry->data[1]),
                           le64_to_cpu(entry->data[0]));
+        if (do_fault) {
+            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ENTRY_P, index);
+        }
         return -VTD_FR_IR_ENTRY_P;
     }
 
@@ -3339,6 +3379,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
                           "(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")",
                           __func__, index, le64_to_cpu(entry->data[1]),
                           le64_to_cpu(entry->data[0]));
+        if (do_fault) {
+            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_IRTE_RSVD, index);
+        }
         return -VTD_FR_IR_IRTE_RSVD;
     }
 
@@ -3355,6 +3398,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
                 error_report_once("%s: invalid IRTE SID "
                                   "(index=%u, sid=%u, source_id=%u)",
                                   __func__, index, sid, source_id);
+                if (do_fault) {
+                    vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index);
+                }
                 return -VTD_FR_IR_SID_ERR;
             }
             break;
@@ -3367,6 +3413,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
                 error_report_once("%s: invalid SVT_BUS "
                                   "(index=%u, bus=%u, min=%u, max=%u)",
                                   __func__, index, bus, bus_min, bus_max);
+                if (do_fault) {
+                    vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index);
+                }
                 return -VTD_FR_IR_SID_ERR;
             }
             break;
@@ -3376,6 +3425,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
                               "(index=%u, type=%d)", __func__,
                               index, entry->irte.sid_vtype);
             /* Take this as verification failure. */
+            if (do_fault) {
+                vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index);
+            }
             return -VTD_FR_IR_SID_ERR;
         }
     }
@@ -3385,12 +3437,12 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
 
 /* Fetch IRQ information of specific IR index */
 static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
-                             X86IOMMUIrq *irq, uint16_t sid)
+                             X86IOMMUIrq *irq, uint16_t sid, bool do_fault)
 {
     VTD_IR_TableEntry irte = {};
     int ret = 0;
 
-    ret = vtd_irte_get(iommu, index, &irte, sid);
+    ret = vtd_irte_get(iommu, index, &irte, sid, do_fault);
     if (ret) {
         return ret;
     }
@@ -3418,7 +3470,7 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
 static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
                                    MSIMessage *origin,
                                    MSIMessage *translated,
-                                   uint16_t sid)
+                                   uint16_t sid, bool do_fault)
 {
     int ret = 0;
     VTD_IR_MSIAddress addr;
@@ -3437,6 +3489,9 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
     if (origin->address & VTD_MSI_ADDR_HI_MASK) {
         error_report_once("%s: MSI address high 32 bits non-zero detected: "
                           "address=0x%" PRIx64, __func__, origin->address);
+        if (do_fault) {
+            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0);
+        }
         return -VTD_FR_IR_REQ_RSVD;
     }
 
@@ -3444,6 +3499,9 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
     if (addr.addr.__head != 0xfee) {
         error_report_once("%s: MSI address low 32 bit invalid: 0x%" PRIx32,
                           __func__, addr.data);
+        if (do_fault) {
+            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0);
+        }
         return -VTD_FR_IR_REQ_RSVD;
     }
 
@@ -3463,7 +3521,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
         index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE;
     }
 
-    ret = vtd_remap_irq_get(iommu, index, &irq, sid);
+    ret = vtd_remap_irq_get(iommu, index, &irq, sid, do_fault);
     if (ret) {
         return ret;
     }
@@ -3475,6 +3533,9 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
                               "(sid=%u, address=0x%" PRIx64
                               ", data=0x%" PRIx32 ")",
                               __func__, sid, origin->address, origin->data);
+            if (do_fault) {
+                vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0);
+            }
             return -VTD_FR_IR_REQ_RSVD;
         }
     } else {
@@ -3515,7 +3576,7 @@ static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src,
                          MSIMessage *dst, uint16_t sid)
 {
     return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu),
-                                   src, dst, sid);
+                                   src, dst, sid, false);
 }
 
 static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr,
@@ -3541,7 +3602,7 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
         sid = attrs.requester_id;
     }
 
-    ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
+    ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid, true);
     if (ret) {
         /* TODO: report error */
         /* Drop this interrupt */
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index f090e61e11..37db7d44df 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -268,6 +268,7 @@
 #define VTD_FRCD_FI(val)        ((val) & ~0xfffULL)
 #define VTD_FRCD_PV(val)        (((val) & 0xffffULL) << 40)
 #define VTD_FRCD_PP(val)        (((val) & 0x1) << 31)
+#define VTD_FRCD_IR_IDX(val)    (((val) & 0xffffULL) << 48)
 
 /* DMA Remapping Fault Conditions */
 typedef enum VTDFaultReason {
-- 
2.34.1


Re: [RFC PATCH] intel-iommu: Report interrupt remapping faults
Posted by Peter Xu 1 year, 1 month ago
On Fri, Mar 10, 2023 at 05:49:38PM +0000, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> There is more work to be done here, as pretranslations for the KVM IRQ
> routing table can't fault yet; they should be handled in userspace and
> the fault raised only when the IRQ actually happens (if indeed the IRTE
> is still not valid at that time). But we can work on that later; we can
> at least raise faults for the direct case.
> 
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
> 
> Seemed like a good place to start.
> 
> Utterly untested yet except for building it. Do we have unit tests for
> this; anything which will deliberately cause DMA faults that I can
> extend to also do IR faults? Or should I resort to just hacking a Linux
> kernel to do things wrong?
> 

I am not aware of anything besides the test in kvm-unit-tests..

https://gitlab.com/kvm-unit-tests/kvm-unit-tests/-/blob/master/x86/intel-iommu.c

> Also, why does the generic X86IOMMUClass->int_remap function return
> VTD-specific values? Shouldn't it just return true or false, or an
> actual error from the system errno space?

Agree, a boolean seems to be enough. Not a huge problem, I guess.

> 
> I also think we're allowing Compatibility Format MSIs when we shouldn't
> (when GSTS_CFIS is clear); the reporting of VTD_FR_IR_REQ_COMPAT is
> conspicuous in its absence. But I can fix that in a separate commit.

Yes, thanks.

> 
> 
>  hw/i386/intel_iommu.c          | 115 +++++++++++++++++++++++++--------
>  hw/i386/intel_iommu_internal.h |   1 +
>  2 files changed, 89 insertions(+), 27 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index faade7def8..946f6008fe 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -468,21 +468,12 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
>  
>  /* Must not update F field now, should be done later */
>  static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
> -                            uint16_t source_id, hwaddr addr,
> -                            VTDFaultReason fault, bool is_write,
> -                            bool is_pasid, uint32_t pasid)
> +                            uint64_t hi, uint64_t lo)
>  {
> -    uint64_t hi = 0, lo;
>      hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
>  
>      assert(index < DMAR_FRCD_REG_NR);
>  
> -    lo = VTD_FRCD_FI(addr);
> -    hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) |
> -         VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid);
> -    if (!is_write) {
> -        hi |= VTD_FRCD_T;
> -    }
>      vtd_set_quad_raw(s, frcd_reg_addr, lo);
>      vtd_set_quad_raw(s, frcd_reg_addr + 8, hi);
>  
> @@ -508,17 +499,11 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
>  }
>  
>  /* Log and report an DMAR (address translation) fault to software */
> -static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
> -                                  hwaddr addr, VTDFaultReason fault,
> -                                  bool is_write, bool is_pasid,
> -                                  uint32_t pasid)
> +static void vtd_report_frcd_fault(IntelIOMMUState *s, uint64_t source_id,
> +                                  uint64_t hi, uint64_t lo)
>  {
>      uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
>  
> -    assert(fault < VTD_FR_MAX);
> -
> -    trace_vtd_dmar_fault(source_id, fault, addr, is_write);
> -
>      if (fsts_reg & VTD_FSTS_PFO) {
>          error_report_once("New fault is not recorded due to "
>                            "Primary Fault Overflow");
> @@ -538,8 +523,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
>          return;
>      }
>  
> -    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault,
> -                    is_write, is_pasid, pasid);
> +    vtd_record_frcd(s, s->next_frcd_reg, hi, lo);
>  
>      if (fsts_reg & VTD_FSTS_PPF) {
>          error_report_once("There are pending faults already, "
> @@ -564,6 +548,42 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
>      }
>  }
>  
> +/* Log and report an DMAR (address translation) fault to software */
> +static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
> +                                  hwaddr addr, VTDFaultReason fault,
> +                                  bool is_write, bool is_pasid,
> +                                  uint32_t pasid)
> +{
> +    uint64_t hi, lo;
> +
> +    assert(fault < VTD_FR_MAX);
> +
> +    trace_vtd_dmar_fault(source_id, fault, addr, is_write);
> +
> +    lo = VTD_FRCD_FI(addr);
> +    hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) |
> +         VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid);
> +    if (!is_write) {
> +        hi |= VTD_FRCD_T;
> +    }
> +
> +    vtd_report_frcd_fault(s, source_id, hi, lo);
> +}
> +
> +
> +static void vtd_report_ir_fault(IntelIOMMUState *s, uint64_t source_id,
> +                                VTDFaultReason fault, uint16_t index)
> +{
> +    uint64_t hi, lo;
> +
> +    lo = VTD_FRCD_IR_IDX(index);
> +    hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
> +
> +    vtd_report_frcd_fault(s, source_id, hi, lo);
> +}
> +
> +#define log_irte_fault(f, sid, i) vtd_report_ir_fault(iommu, sid, f, i)

This one seems not used.

> +
>  /* Handle Invalidation Queue Errors of queued invalidation interface error
>   * conditions.
>   */
> @@ -3300,7 +3320,8 @@ static Property vtd_properties[] = {
>  
>  /* Read IRTE entry with specific index */
>  static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
> -                        VTD_IR_TableEntry *entry, uint16_t sid)
> +                        VTD_IR_TableEntry *entry, uint16_t sid,
> +                        bool do_fault)
>  {
>      static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
>          {0xffff, 0xfffb, 0xfff9, 0xfff8};
> @@ -3311,6 +3332,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
>      if (index >= iommu->intr_size) {
>          error_report_once("%s: index too large: ind=0x%x",
>                            __func__, index);
> +        if (do_fault) {
> +            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_INDEX_OVER, index);
> +        }

IIUC it's only the fault reason to report, then I am thinking if it's
easier to let the caller taking care of that?

Though we'll need conditions for fault disabled, e.g....

>          return -VTD_FR_IR_INDEX_OVER;
>      }
>  
> @@ -3319,17 +3343,33 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
>                          entry, sizeof(*entry), MEMTXATTRS_UNSPECIFIED)) {
>          error_report_once("%s: read failed: ind=0x%x addr=0x%" PRIx64,
>                            __func__, index, addr);
> +        if (do_fault) {
> +            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ROOT_INVAL, index);
> +        }
>          return -VTD_FR_IR_ROOT_INVAL;
>      }
>  
>      trace_vtd_ir_irte_get(index, le64_to_cpu(entry->data[1]),
>                            le64_to_cpu(entry->data[0]));
>  
> +	/*
> +	 * The remaining potential fault conditions are "qualified" by the
> +	 * Fault Processing Disable bit in the IRTE. Even "not present".
> +	 * So just clear the do_fault flag if PFD is set, which will
> +	 * prevent faults being raised.
> +	 */
> +	if (entry->irte.fault_disable) {
> +		do_fault = false;
> +    }
> +
>      if (!entry->irte.present) {
>          error_report_once("%s: detected non-present IRTE "
>                            "(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")",
>                            __func__, index, le64_to_cpu(entry->data[1]),
>                            le64_to_cpu(entry->data[0]));
> +        if (do_fault) {
> +            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ENTRY_P, index);
> +        }
>          return -VTD_FR_IR_ENTRY_P;

... here IIUC we can do:

  if (entry->irte.fault_disable)
        return 0;
  else
        return -VTD_FR_IR_ENTRY_P;
        
Hence when error occurs we always keep the error report above, and let the
caller report IR fault when <0.  It seems this will at least avoid plenty
of same calls within vtd_irte_get().

I do see a few others outside vtd_irte_get().  In short, it'll be nice to
avoid calling this same pattern in multiple places somehow.

>      }
>  
> @@ -3339,6 +3379,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
>                            "(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")",
>                            __func__, index, le64_to_cpu(entry->data[1]),
>                            le64_to_cpu(entry->data[0]));
> +        if (do_fault) {
> +            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_IRTE_RSVD, index);
> +        }
>          return -VTD_FR_IR_IRTE_RSVD;
>      }
>  
> @@ -3355,6 +3398,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
>                  error_report_once("%s: invalid IRTE SID "
>                                    "(index=%u, sid=%u, source_id=%u)",
>                                    __func__, index, sid, source_id);
> +                if (do_fault) {
> +                    vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index);
> +                }
>                  return -VTD_FR_IR_SID_ERR;
>              }
>              break;
> @@ -3367,6 +3413,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
>                  error_report_once("%s: invalid SVT_BUS "
>                                    "(index=%u, bus=%u, min=%u, max=%u)",
>                                    __func__, index, bus, bus_min, bus_max);
> +                if (do_fault) {
> +                    vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index);
> +                }
>                  return -VTD_FR_IR_SID_ERR;
>              }
>              break;
> @@ -3376,6 +3425,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
>                                "(index=%u, type=%d)", __func__,
>                                index, entry->irte.sid_vtype);
>              /* Take this as verification failure. */
> +            if (do_fault) {
> +                vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index);
> +            }
>              return -VTD_FR_IR_SID_ERR;
>          }
>      }
> @@ -3385,12 +3437,12 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
>  
>  /* Fetch IRQ information of specific IR index */
>  static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
> -                             X86IOMMUIrq *irq, uint16_t sid)
> +                             X86IOMMUIrq *irq, uint16_t sid, bool do_fault)
>  {
>      VTD_IR_TableEntry irte = {};
>      int ret = 0;
>  
> -    ret = vtd_irte_get(iommu, index, &irte, sid);
> +    ret = vtd_irte_get(iommu, index, &irte, sid, do_fault);
>      if (ret) {
>          return ret;
>      }
> @@ -3418,7 +3470,7 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
>  static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
>                                     MSIMessage *origin,
>                                     MSIMessage *translated,
> -                                   uint16_t sid)
> +                                   uint16_t sid, bool do_fault)
>  {
>      int ret = 0;
>      VTD_IR_MSIAddress addr;
> @@ -3437,6 +3489,9 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
>      if (origin->address & VTD_MSI_ADDR_HI_MASK) {
>          error_report_once("%s: MSI address high 32 bits non-zero detected: "
>                            "address=0x%" PRIx64, __func__, origin->address);
> +        if (do_fault) {
> +            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0);
> +        }
>          return -VTD_FR_IR_REQ_RSVD;
>      }
>  
> @@ -3444,6 +3499,9 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
>      if (addr.addr.__head != 0xfee) {
>          error_report_once("%s: MSI address low 32 bit invalid: 0x%" PRIx32,
>                            __func__, addr.data);
> +        if (do_fault) {
> +            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0);
> +        }
>          return -VTD_FR_IR_REQ_RSVD;
>      }
>  
> @@ -3463,7 +3521,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
>          index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE;
>      }
>  
> -    ret = vtd_remap_irq_get(iommu, index, &irq, sid);
> +    ret = vtd_remap_irq_get(iommu, index, &irq, sid, do_fault);
>      if (ret) {
>          return ret;
>      }
> @@ -3475,6 +3533,9 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
>                                "(sid=%u, address=0x%" PRIx64
>                                ", data=0x%" PRIx32 ")",
>                                __func__, sid, origin->address, origin->data);
> +            if (do_fault) {
> +                vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0);
> +            }
>              return -VTD_FR_IR_REQ_RSVD;
>          }
>      } else {
> @@ -3515,7 +3576,7 @@ static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src,
>                           MSIMessage *dst, uint16_t sid)
>  {
>      return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu),
> -                                   src, dst, sid);
> +                                   src, dst, sid, false);
>  }
>  
>  static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr,
> @@ -3541,7 +3602,7 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
>          sid = attrs.requester_id;
>      }
>  
> -    ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
> +    ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid, true);
>      if (ret) {
>          /* TODO: report error */
>          /* Drop this interrupt */
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index f090e61e11..37db7d44df 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -268,6 +268,7 @@
>  #define VTD_FRCD_FI(val)        ((val) & ~0xfffULL)
>  #define VTD_FRCD_PV(val)        (((val) & 0xffffULL) << 40)
>  #define VTD_FRCD_PP(val)        (((val) & 0x1) << 31)
> +#define VTD_FRCD_IR_IDX(val)    (((val) & 0xffffULL) << 48)
>  
>  /* DMA Remapping Fault Conditions */
>  typedef enum VTDFaultReason {
> -- 
> 2.34.1
> 
> 



-- 
Peter Xu
Re: [RFC PATCH] intel-iommu: Report interrupt remapping faults
Posted by David Woodhouse 1 year, 1 month ago
On Fri, 2023-03-10 at 15:57 -0500, Peter Xu wrote:
> On Fri, Mar 10, 2023 at 05:49:38PM +0000, David Woodhouse wrote:
> > From: David Woodhouse <dwmw@amazon.co.uk>
> > 
> > +}
> > +
> > +#define log_irte_fault(f, sid, i) vtd_report_ir_fault(iommu, sid, f, i)
> 
> This one seems not used.

Oops yes, that was supposed to be temporary until I did the
search/replace.

> > +
> >  /* Handle Invalidation Queue Errors of queued invalidation interface error
> >   * conditions.
> >   */
> > @@ -3300,7 +3320,8 @@ static Property vtd_properties[] = {
> >  
> >  /* Read IRTE entry with specific index */
> >  static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
> > -                        VTD_IR_TableEntry *entry, uint16_t sid)
> > +                        VTD_IR_TableEntry *entry, uint16_t sid,
> > +                        bool do_fault)
> >  {
> >      static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
> >          {0xffff, 0xfffb, 0xfff9, 0xfff8};
> > @@ -3311,6 +3332,9 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
> >      if (index >= iommu->intr_size) {
> >          error_report_once("%s: index too large: ind=0x%x",
> >                            __func__, index);
> > +        if (do_fault) {
> > +            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_INDEX_OVER, index);
> > +        }
> 
> IIUC it's only the fault reason to report, then I am thinking if it's
> easier to let the caller taking care of that?
> 
> Though we'll need conditions for fault disabled, e.g....
> 
> >          return -VTD_FR_IR_INDEX_OVER;

Well, remember I want to kill that *return* of the VTD_FR_xxx error, so
although it looks like duplication now, it won't be.


> >      }
> >  
> > @@ -3319,17 +3343,33 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
> >                          entry, sizeof(*entry), MEMTXATTRS_UNSPECIFIED)) {
> >          error_report_once("%s: read failed: ind=0x%x addr=0x%" PRIx64,
> >                            __func__, index, addr);
> > +        if (do_fault) {
> > +            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ROOT_INVAL, index);
> > +        }
> >          return -VTD_FR_IR_ROOT_INVAL;
> >      }
> >  
> >      trace_vtd_ir_irte_get(index, le64_to_cpu(entry->data[1]),
> >                            le64_to_cpu(entry->data[0]));
> >  
> > +       /*
> > +        * The remaining potential fault conditions are "qualified" by the
> > +        * Fault Processing Disable bit in the IRTE. Even "not present".
> > +        * So just clear the do_fault flag if PFD is set, which will
> > +        * prevent faults being raised.
> > +        */
> > +       if (entry->irte.fault_disable) {
> > +               do_fault = false;
> > +    }
> > +
> >      if (!entry->irte.present) {
> >          error_report_once("%s: detected non-present IRTE "
> >                            "(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")",
> >                            __func__, index, le64_to_cpu(entry->data[1]),
> >                            le64_to_cpu(entry->data[0]));
> > +        if (do_fault) {
> > +            vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ENTRY_P, index);
> > +        }
> >          return -VTD_FR_IR_ENTRY_P;
> 
> ... here IIUC we can do:
> 
>   if (entry->irte.fault_disable)
>         return 0;
>   else
>         return -VTD_FR_IR_ENTRY_P;
>         
> Hence when error occurs we always keep the error report above, and let the
> caller report IR fault when <0.  It seems this will at least avoid plenty
> of same calls within vtd_irte_get().
> 
> I do see a few others outside vtd_irte_get().  In short, it'll be nice to
> avoid calling this same pattern in multiple places somehow.

I suppose we could pass the do_fault *into* vtd_report_ir_fault(). It's
a similar pattern to vtd_report_fault() which also takes an is_fpd_set
argument.

(Note: If I could tolerate just passing VTD_FRCD_IR_IDX(index) I think
I could actually just *call* the existing vtd_report_fault() without
having to touch any of the fault reporting functions at all. The bits
do all end up in the right place. It just seemed a bit icky.)

I did briefly ponder just returning the value and then letting the
caller do the report, but then we'd *also* have to make the return code
distinguish between "failed, but do not report a fault" and "succeeded,
and your translation is valid" cases. I figured I preferred it this
way.


> > --- a/hw/i386/intel_iommu_internal.h
> > +++ b/hw/i386/intel_iommu_internal.h
> > @@ -268,6 +268,7 @@
> >  #define VTD_FRCD_FI(val)        ((val) & ~0xfffULL)
> >  #define VTD_FRCD_PV(val)        (((val) & 0xffffULL) << 40)
> >  #define VTD_FRCD_PP(val)        (((val) & 0x1) << 31)
> > +#define VTD_FRCD_IR_IDX(val)    (((val) & 0xffffULL) << 48)


Think 'val' probably needs casting to a (uint64_t) before the shift there.