[PATCH v1 01/17] intel_iommu: Use the latest fault reasons defined by spec

Zhenzhong Duan posted 17 patches 2 months ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Thomas Huth <thuth@redhat.com>, Laurent Vivier <lvivier@redhat.com>
There is a newer version of this series
[PATCH v1 01/17] intel_iommu: Use the latest fault reasons defined by spec
Posted by Zhenzhong Duan 2 months ago
From: Yu Zhang <yu.c.zhang@linux.intel.com>

Spec revision 3.0 or above defines more detailed fault reasons for
scalable mode. So introduce them into emulation code, see spec
section 7.1.2 for details.

Note spec revision has no relation with VERSION register, Guest
kernel should not use that register to judge what features are
supported. Instead cap/ecap bits should be checked.

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h |  9 ++++++++-
 hw/i386/intel_iommu.c          | 25 ++++++++++++++++---------
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index f8cf99bddf..c0ca7b372f 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -311,7 +311,14 @@ typedef enum VTDFaultReason {
                                   * request while disabled */
     VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
 
-    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
+    /* PASID directory entry access failure */
+    VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
+    /* The Present(P) field of pasid directory entry is 0 */
+    VTD_FR_PASID_DIR_ENTRY_P = 0x51,
+    VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
+    /* The Present(P) field of pasid table entry is 0 */
+    VTD_FR_PASID_ENTRY_P = 0x59,
+    VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
 
     /* Output address in the interrupt address range for scalable mode */
     VTD_FR_SM_INTERRUPT_ADDR = 0x87,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 37c21a0aec..e65f5b29a5 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
     addr = pasid_dir_base + index * entry_size;
     if (dma_memory_read(&address_space_memory, addr,
                         pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_DIR_ACCESS_ERR;
     }
 
     pdire->val = le64_to_cpu(pdire->val);
@@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
                                           dma_addr_t addr,
                                           VTDPASIDEntry *pe)
 {
+    uint8_t pgtt;
     uint32_t index;
     dma_addr_t entry_size;
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
@@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
     addr = addr + index * entry_size;
     if (dma_memory_read(&address_space_memory, addr,
                         pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_TABLE_ACCESS_ERR;
     }
     for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
         pe->val[i] = le64_to_cpu(pe->val[i]);
@@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
 
     /* Do translation type check */
     if (!vtd_pe_type_check(x86_iommu, pe)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_TABLE_ENTRY_INV;
     }
 
-    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
-        return -VTD_FR_PASID_TABLE_INV;
+    pgtt = VTD_PE_GET_TYPE(pe);
+    if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
+        !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
+            return -VTD_FR_PASID_TABLE_ENTRY_INV;
     }
 
     return 0;
@@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
     }
 
     if (!vtd_pdire_present(&pdire)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_DIR_ENTRY_P;
     }
 
     ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
@@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
     }
 
     if (!vtd_pe_present(pe)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_ENTRY_P;
     }
 
     return 0;
@@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
     }
 
     if (!vtd_pdire_present(&pdire)) {
-        return -VTD_FR_PASID_TABLE_INV;
+        return -VTD_FR_PASID_DIR_ENTRY_P;
     }
 
     /*
@@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = {
     [VTD_FR_ROOT_ENTRY_RSVD] = false,
     [VTD_FR_PAGING_ENTRY_RSVD] = true,
     [VTD_FR_CONTEXT_ENTRY_TT] = true,
-    [VTD_FR_PASID_TABLE_INV] = false,
+    [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
+    [VTD_FR_PASID_DIR_ENTRY_P] = true,
+    [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
+    [VTD_FR_PASID_ENTRY_P] = true,
+    [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
     [VTD_FR_SM_INTERRUPT_ADDR] = true,
     [VTD_FR_MAX] = false,
 };
-- 
2.34.1
Re: [PATCH v1 01/17] intel_iommu: Use the latest fault reasons defined by spec
Posted by Yi Liu 1 month, 2 weeks ago
On 2024/7/18 16:16, Zhenzhong Duan wrote:
> From: Yu Zhang <yu.c.zhang@linux.intel.com>
> 
> Spec revision 3.0 or above defines more detailed fault reasons for
> scalable mode. So introduce them into emulation code, see spec
> section 7.1.2 for details.
> 
> Note spec revision has no relation with VERSION register, Guest
> kernel should not use that register to judge what features are
> supported. Instead cap/ecap bits should be checked.
> 
> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu_internal.h |  9 ++++++++-
>   hw/i386/intel_iommu.c          | 25 ++++++++++++++++---------
>   2 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index f8cf99bddf..c0ca7b372f 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -311,7 +311,14 @@ typedef enum VTDFaultReason {
>                                     * request while disabled */
>       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>   
> -    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
> +    /* PASID directory entry access failure */
> +    VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
> +    /* The Present(P) field of pasid directory entry is 0 */
> +    VTD_FR_PASID_DIR_ENTRY_P = 0x51,
> +    VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
> +    /* The Present(P) field of pasid table entry is 0 */
> +    VTD_FR_PASID_ENTRY_P = 0x59,
> +    VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
>   
>       /* Output address in the interrupt address range for scalable mode */
>       VTD_FR_SM_INTERRUPT_ADDR = 0x87,
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 37c21a0aec..e65f5b29a5 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
>       addr = pasid_dir_base + index * entry_size;
>       if (dma_memory_read(&address_space_memory, addr,
>                           pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_DIR_ACCESS_ERR;
>       }
>   
>       pdire->val = le64_to_cpu(pdire->val);
> @@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>                                             dma_addr_t addr,
>                                             VTDPASIDEntry *pe)
>   {
> +    uint8_t pgtt;
>       uint32_t index;
>       dma_addr_t entry_size;
>       X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
> @@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>       addr = addr + index * entry_size;
>       if (dma_memory_read(&address_space_memory, addr,
>                           pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_TABLE_ACCESS_ERR;
>       }
>       for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
>           pe->val[i] = le64_to_cpu(pe->val[i]);
> @@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>   
>       /* Do translation type check */
>       if (!vtd_pe_type_check(x86_iommu, pe)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_TABLE_ENTRY_INV;
>       }
>   
> -    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +    pgtt = VTD_PE_GET_TYPE(pe);
> +    if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
> +        !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> +            return -VTD_FR_PASID_TABLE_ENTRY_INV;
>       }
>   
>       return 0;
> @@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>       }
>   
>       if (!vtd_pdire_present(&pdire)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>       }
>   
>       ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
> @@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>       }
>   
>       if (!vtd_pe_present(pe)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_ENTRY_P;
>       }
>   
>       return 0;
> @@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
>       }
>   
>       if (!vtd_pdire_present(&pdire)) {
> -        return -VTD_FR_PASID_TABLE_INV;
> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>       }
>   
>       /*
> @@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = {
>       [VTD_FR_ROOT_ENTRY_RSVD] = false,
>       [VTD_FR_PAGING_ENTRY_RSVD] = true,
>       [VTD_FR_CONTEXT_ENTRY_TT] = true,
> -    [VTD_FR_PASID_TABLE_INV] = false,
> +    [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
> +    [VTD_FR_PASID_DIR_ENTRY_P] = true,
> +    [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
> +    [VTD_FR_PASID_ENTRY_P] = true,
> +    [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
>       [VTD_FR_SM_INTERRUPT_ADDR] = true,
>       [VTD_FR_MAX] = false,
>   };

@Jason, @Michael,

Do you know the rule of setting this table? I noticed it was introduced
since day-1[1]. I didn't see any history discussion on it on lore. So not
quite sure about the purpose of it. Per the usage of this table, it is used
as a filter when the iommu driver has set the FPD bit. If FPD is set, some
errors need not to trigger a trace which is mostly for debug purpose.

I noticed Peter had asked it as well[2]. But I don't think it was clarified
clearly. May we have a clarification for it here? BTW. I didn't see VT-d
spec has any definition on it. So it should just be a software trick. :)

[1] 
https://lore.kernel.org/qemu-devel/1408168544-28605-3-git-send-email-tamlokveer@gmail.com/
[2] https://lore.kernel.org/qemu-devel/20190301065219.GA22229@xz-x1/

-- 
Regards,
Yi Liu
Re: [PATCH v1 01/17] intel_iommu: Use the latest fault reasons defined by spec
Posted by Michael S. Tsirkin 1 month, 2 weeks ago
On Mon, Jul 29, 2024 at 03:39:03PM +0800, Yi Liu wrote:
> On 2024/7/18 16:16, Zhenzhong Duan wrote:
> > From: Yu Zhang <yu.c.zhang@linux.intel.com>
> > 
> > Spec revision 3.0 or above defines more detailed fault reasons for
> > scalable mode. So introduce them into emulation code, see spec
> > section 7.1.2 for details.
> > 
> > Note spec revision has no relation with VERSION register, Guest
> > kernel should not use that register to judge what features are
> > supported. Instead cap/ecap bits should be checked.
> > 
> > Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> > ---
> >   hw/i386/intel_iommu_internal.h |  9 ++++++++-
> >   hw/i386/intel_iommu.c          | 25 ++++++++++++++++---------
> >   2 files changed, 24 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> > index f8cf99bddf..c0ca7b372f 100644
> > --- a/hw/i386/intel_iommu_internal.h
> > +++ b/hw/i386/intel_iommu_internal.h
> > @@ -311,7 +311,14 @@ typedef enum VTDFaultReason {
> >                                     * request while disabled */
> >       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
> > -    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
> > +    /* PASID directory entry access failure */
> > +    VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
> > +    /* The Present(P) field of pasid directory entry is 0 */
> > +    VTD_FR_PASID_DIR_ENTRY_P = 0x51,
> > +    VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
> > +    /* The Present(P) field of pasid table entry is 0 */
> > +    VTD_FR_PASID_ENTRY_P = 0x59,
> > +    VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
> >       /* Output address in the interrupt address range for scalable mode */
> >       VTD_FR_SM_INTERRUPT_ADDR = 0x87,
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 37c21a0aec..e65f5b29a5 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
> >       addr = pasid_dir_base + index * entry_size;
> >       if (dma_memory_read(&address_space_memory, addr,
> >                           pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_DIR_ACCESS_ERR;
> >       }
> >       pdire->val = le64_to_cpu(pdire->val);
> > @@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >                                             dma_addr_t addr,
> >                                             VTDPASIDEntry *pe)
> >   {
> > +    uint8_t pgtt;
> >       uint32_t index;
> >       dma_addr_t entry_size;
> >       X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
> > @@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >       addr = addr + index * entry_size;
> >       if (dma_memory_read(&address_space_memory, addr,
> >                           pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_TABLE_ACCESS_ERR;
> >       }
> >       for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
> >           pe->val[i] = le64_to_cpu(pe->val[i]);
> > @@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
> >       /* Do translation type check */
> >       if (!vtd_pe_type_check(x86_iommu, pe)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_TABLE_ENTRY_INV;
> >       }
> > -    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +    pgtt = VTD_PE_GET_TYPE(pe);
> > +    if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
> > +        !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
> > +            return -VTD_FR_PASID_TABLE_ENTRY_INV;
> >       }
> >       return 0;
> > @@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
> >       }
> >       if (!vtd_pdire_present(&pdire)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_DIR_ENTRY_P;
> >       }
> >       ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
> > @@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
> >       }
> >       if (!vtd_pe_present(pe)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_ENTRY_P;
> >       }
> >       return 0;
> > @@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
> >       }
> >       if (!vtd_pdire_present(&pdire)) {
> > -        return -VTD_FR_PASID_TABLE_INV;
> > +        return -VTD_FR_PASID_DIR_ENTRY_P;
> >       }
> >       /*
> > @@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = {
> >       [VTD_FR_ROOT_ENTRY_RSVD] = false,
> >       [VTD_FR_PAGING_ENTRY_RSVD] = true,
> >       [VTD_FR_CONTEXT_ENTRY_TT] = true,
> > -    [VTD_FR_PASID_TABLE_INV] = false,
> > +    [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
> > +    [VTD_FR_PASID_DIR_ENTRY_P] = true,
> > +    [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
> > +    [VTD_FR_PASID_ENTRY_P] = true,
> > +    [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
> >       [VTD_FR_SM_INTERRUPT_ADDR] = true,
> >       [VTD_FR_MAX] = false,
> >   };
> 
> @Jason, @Michael,
> 
> Do you know the rule of setting this table? I noticed it was introduced
> since day-1[1]. I didn't see any history discussion on it on lore. So not
> quite sure about the purpose of it. Per the usage of this table, it is used
> as a filter when the iommu driver has set the FPD bit. If FPD is set, some
> errors need not to trigger a trace which is mostly for debug purpose.
> 
> I noticed Peter had asked it as well[2]. But I don't think it was clarified
> clearly. May we have a clarification for it here? BTW. I didn't see VT-d
> spec has any definition on it. So it should just be a software trick. :)
> 
> [1] https://lore.kernel.org/qemu-devel/1408168544-28605-3-git-send-email-tamlokveer@gmail.com/
> [2] https://lore.kernel.org/qemu-devel/20190301065219.GA22229@xz-x1/
> 
> -- 
> Regards,
> Yi Liu

Are you asking for a definition of qualified fault conditions?


7.1.1 Non-Recoverable Address Translation Faults
Non-recoverable address translation faults can be detected by remapping hardware for many different
kinds of requests as shown by Table 26. A non-recoverable fault condition is considered “qualified” if
software can suppress reporting of the fault by setting one of the Fault Processing Disable (FPD) bits
available in one or more of the address translation structures (i.e., the context-entry, scalable-mode
context-entry, scalable-mode PASID-directory entry, scalable-mode PASID-table entry). For a request
that encounters a “qualified” non-recoverable fault condition, if the remapping hardware encountered
any translation structure entry with an FPD field value of 1, the remapping hardware must not report
the fault to software. For example, when processing a request that encounters an FPD field with a value
of 1 in the scalable-mode context-entry and encounters any “qualified” fault such as SCT.*, SPD.*, SPT.*,
SFS.*, SSS.*, or SGN.*, the remapping hardware will not report the fault to software. Memory requests
that result in non-recoverable address translation faults are blocked by hardware


-- 
MST


Re: [PATCH v1 01/17] intel_iommu: Use the latest fault reasons defined by spec
Posted by Yi Liu 1 month, 2 weeks ago

On 2024/7/29 16:42, Michael S. Tsirkin wrote:
> On Mon, Jul 29, 2024 at 03:39:03PM +0800, Yi Liu wrote:
>> On 2024/7/18 16:16, Zhenzhong Duan wrote:
>>> From: Yu Zhang <yu.c.zhang@linux.intel.com>
>>>
>>> Spec revision 3.0 or above defines more detailed fault reasons for
>>> scalable mode. So introduce them into emulation code, see spec
>>> section 7.1.2 for details.
>>>
>>> Note spec revision has no relation with VERSION register, Guest
>>> kernel should not use that register to judge what features are
>>> supported. Instead cap/ecap bits should be checked.
>>>
>>> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>    hw/i386/intel_iommu_internal.h |  9 ++++++++-
>>>    hw/i386/intel_iommu.c          | 25 ++++++++++++++++---------
>>>    2 files changed, 24 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>>> index f8cf99bddf..c0ca7b372f 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -311,7 +311,14 @@ typedef enum VTDFaultReason {
>>>                                      * request while disabled */
>>>        VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>> -    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
>>> +    /* PASID directory entry access failure */
>>> +    VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>>> +    /* The Present(P) field of pasid directory entry is 0 */
>>> +    VTD_FR_PASID_DIR_ENTRY_P = 0x51,
>>> +    VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */
>>> +    /* The Present(P) field of pasid table entry is 0 */
>>> +    VTD_FR_PASID_ENTRY_P = 0x59,
>>> +    VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b,  /*Invalid PASID table entry */
>>>        /* Output address in the interrupt address range for scalable mode */
>>>        VTD_FR_SM_INTERRUPT_ADDR = 0x87,
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 37c21a0aec..e65f5b29a5 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
>>>        addr = pasid_dir_base + index * entry_size;
>>>        if (dma_memory_read(&address_space_memory, addr,
>>>                            pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_DIR_ACCESS_ERR;
>>>        }
>>>        pdire->val = le64_to_cpu(pdire->val);
>>> @@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>                                              dma_addr_t addr,
>>>                                              VTDPASIDEntry *pe)
>>>    {
>>> +    uint8_t pgtt;
>>>        uint32_t index;
>>>        dma_addr_t entry_size;
>>>        X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
>>> @@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>        addr = addr + index * entry_size;
>>>        if (dma_memory_read(&address_space_memory, addr,
>>>                            pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_TABLE_ACCESS_ERR;
>>>        }
>>>        for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
>>>            pe->val[i] = le64_to_cpu(pe->val[i]);
>>> @@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
>>>        /* Do translation type check */
>>>        if (!vtd_pe_type_check(x86_iommu, pe)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_TABLE_ENTRY_INV;
>>>        }
>>> -    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +    pgtt = VTD_PE_GET_TYPE(pe);
>>> +    if (pgtt == VTD_SM_PASID_ENTRY_SLT &&
>>> +        !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
>>> +            return -VTD_FR_PASID_TABLE_ENTRY_INV;
>>>        }
>>>        return 0;
>>> @@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>>>        }
>>>        if (!vtd_pdire_present(&pdire)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>>>        }
>>>        ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
>>> @@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>>>        }
>>>        if (!vtd_pe_present(pe)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_ENTRY_P;
>>>        }
>>>        return 0;
>>> @@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
>>>        }
>>>        if (!vtd_pdire_present(&pdire)) {
>>> -        return -VTD_FR_PASID_TABLE_INV;
>>> +        return -VTD_FR_PASID_DIR_ENTRY_P;
>>>        }
>>>        /*
>>> @@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = {
>>>        [VTD_FR_ROOT_ENTRY_RSVD] = false,
>>>        [VTD_FR_PAGING_ENTRY_RSVD] = true,
>>>        [VTD_FR_CONTEXT_ENTRY_TT] = true,
>>> -    [VTD_FR_PASID_TABLE_INV] = false,
>>> +    [VTD_FR_PASID_DIR_ACCESS_ERR] = false,
>>> +    [VTD_FR_PASID_DIR_ENTRY_P] = true,
>>> +    [VTD_FR_PASID_TABLE_ACCESS_ERR] = false,
>>> +    [VTD_FR_PASID_ENTRY_P] = true,
>>> +    [VTD_FR_PASID_TABLE_ENTRY_INV] = true,
>>>        [VTD_FR_SM_INTERRUPT_ADDR] = true,
>>>        [VTD_FR_MAX] = false,
>>>    };
>>
>> @Jason, @Michael,
>>
>> Do you know the rule of setting this table? I noticed it was introduced
>> since day-1[1]. I didn't see any history discussion on it on lore. So not
>> quite sure about the purpose of it. Per the usage of this table, it is used
>> as a filter when the iommu driver has set the FPD bit. If FPD is set, some
>> errors need not to trigger a trace which is mostly for debug purpose.
>>
>> I noticed Peter had asked it as well[2]. But I don't think it was clarified
>> clearly. May we have a clarification for it here? BTW. I didn't see VT-d
>> spec has any definition on it. So it should just be a software trick. :)
>>
>> [1] https://lore.kernel.org/qemu-devel/1408168544-28605-3-git-send-email-tamlokveer@gmail.com/
>> [2] https://lore.kernel.org/qemu-devel/20190301065219.GA22229@xz-x1/
>>
>> -- 
>> Regards,
>> Yi Liu
> 
> Are you asking for a definition of qualified fault conditions?
> 
> 
> 7.1.1 Non-Recoverable Address Translation Faults
> Non-recoverable address translation faults can be detected by remapping hardware for many different
> kinds of requests as shown by Table 26. A non-recoverable fault condition is considered “qualified” if
> software can suppress reporting of the fault by setting one of the Fault Processing Disable (FPD) bits
> available in one or more of the address translation structures (i.e., the context-entry, scalable-mode
> context-entry, scalable-mode PASID-directory entry, scalable-mode PASID-table entry). For a request
> that encounters a “qualified” non-recoverable fault condition, if the remapping hardware encountered
> any translation structure entry with an FPD field value of 1, the remapping hardware must not report
> the fault to software. For example, when processing a request that encounters an FPD field with a value
> of 1 in the scalable-mode context-entry and encounters any “qualified” fault such as SCT.*, SPD.*, SPT.*,
> SFS.*, SSS.*, or SGN.*, the remapping hardware will not report the fault to software. Memory requests
> that result in non-recoverable address translation faults are blocked by hardware
> 

I see. Table 26 has the "qualified" column. It's clear to me now. Thanks. :)

-- 
Regards,
Yi Liu