[Xen-devel] [PATCH v4 00/12] x86: AMD x2APIC support / AMD IOMMU improvements

Jan Beulich posted 12 patches 4 years, 8 months ago
Only 0 patches received!
[Xen-devel] [PATCH v4 00/12] x86: AMD x2APIC support / AMD IOMMU improvements
Posted by Jan Beulich 4 years, 8 months ago
Despite the title this is actually all AMD IOMMU side work; all x86
side adjustments have already been carried out.

The final few patches aren't really x2APIC related, but were found
helpful in the course of the re-work done for this version.

See individual patches for changes from v3.

01: use bit field for extended feature register
02: use bit field for control register
03: use bit field for IRTE
04: pass IOMMU to {get,free,update}_intremap_entry()
05: introduce 128-bit IRTE non-guest-APIC IRTE format
06: split amd_iommu_init_one()
07: allow enabling with IRQ not yet set up
08: adjust setup of internal interrupt for x2APIC mode
09: enable x2APIC mode when available
10: correct IRTE updating
11: don't needlessly log headers when dumping IRTs
12: miscellaneous DTE handling adjustments

Full set of patches once again attached here due to still unresolved
email issues over here.

Jan

AMD/IOMMU: use bit field for extended feature register

This also takes care of several of the shift values wrongly having been
specified as hex rather than dec.

Take the opportunity and
- replace a readl() pair by a single readq(),
- add further fields.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v4: Drop stray/leftover #undef.
v3: Another attempt at deriving masks from bitfields, hopefully better
    liked by clang (mine was fine even with the v2 variant).
v2: Correct sats_sup position and name. Re-base over new earlier patch.

--- a/xen/drivers/passthrough/amd/iommu_detect.c
+++ b/xen/drivers/passthrough/amd/iommu_detect.c
@@ -60,49 +60,76 @@ static int __init get_iommu_capabilities
 
 void __init get_iommu_features(struct amd_iommu *iommu)
 {
-    u32 low, high;
-    int i = 0 ;
     const struct amd_iommu *first;
-    static const char *__initdata feature_str[] = {
-        "- Prefetch Pages Command", 
-        "- Peripheral Page Service Request", 
-        "- X2APIC Supported", 
-        "- NX bit Supported", 
-        "- Guest Translation", 
-        "- Reserved bit [5]",
-        "- Invalidate All Command", 
-        "- Guest APIC supported", 
-        "- Hardware Error Registers", 
-        "- Performance Counters", 
-        NULL
-    };
-
     ASSERT( iommu->mmio_base );
 
     if ( !iommu_has_cap(iommu, PCI_CAP_EFRSUP_SHIFT) )
     {
-        iommu->features = 0;
+        iommu->features.raw = 0;
         return;
     }
 
-    low = readl(iommu->mmio_base + IOMMU_EXT_FEATURE_MMIO_OFFSET);
-    high = readl(iommu->mmio_base + IOMMU_EXT_FEATURE_MMIO_OFFSET + 4);
-
-    iommu->features = ((u64)high << 32) | low;
+    iommu->features.raw =
+        readq(iommu->mmio_base + IOMMU_EXT_FEATURE_MMIO_OFFSET);
 
     /* Don't log the same set of features over and over. */
     first = list_first_entry(&amd_iommu_head, struct amd_iommu, list);
-    if ( iommu != first && iommu->features == first->features )
+    if ( iommu != first && iommu->features.raw == first->features.raw )
         return;
 
     printk("AMD-Vi: IOMMU Extended Features:\n");
 
-    while ( feature_str[i] )
+#define FEAT(fld, str) do {                                    \
+    if ( --((union amd_iommu_ext_features){}).flds.fld > 1 )   \
+        printk( "- " str ": %#x\n", iommu->features.flds.fld); \
+    else if ( iommu->features.flds.fld )                       \
+        printk( "- " str "\n");                                \
+} while ( false )
+
+    FEAT(pref_sup,           "Prefetch Pages Command");
+    FEAT(ppr_sup,            "Peripheral Page Service Request");
+    FEAT(xt_sup,             "x2APIC");
+    FEAT(nx_sup,             "NX bit");
+    FEAT(gappi_sup,          "Guest APIC Physical Processor Interrupt");
+    FEAT(ia_sup,             "Invalidate All Command");
+    FEAT(ga_sup,             "Guest APIC");
+    FEAT(he_sup,             "Hardware Error Registers");
+    FEAT(pc_sup,             "Performance Counters");
+    FEAT(hats,               "Host Address Translation Size");
+
+    if ( iommu->features.flds.gt_sup )
     {
-        if ( amd_iommu_has_feature(iommu, i) )
-            printk( " %s\n", feature_str[i]);
-        i++;
+        FEAT(gats,           "Guest Address Translation Size");
+        FEAT(glx_sup,        "Guest CR3 Root Table Level");
+        FEAT(pas_max,        "Maximum PASID");
     }
+
+    FEAT(smif_sup,           "SMI Filter Register");
+    FEAT(smif_rc,            "SMI Filter Register Count");
+    FEAT(gam_sup,            "Guest Virtual APIC Modes");
+    FEAT(dual_ppr_log_sup,   "Dual PPR Log");
+    FEAT(dual_event_log_sup, "Dual Event Log");
+    FEAT(sats_sup,           "Secure ATS");
+    FEAT(us_sup,             "User / Supervisor Page Protection");
+    FEAT(dev_tbl_seg_sup,    "Device Table Segmentation");
+    FEAT(ppr_early_of_sup,   "PPR Log Overflow Early Warning");
+    FEAT(ppr_auto_rsp_sup,   "PPR Automatic Response");
+    FEAT(marc_sup,           "Memory Access Routing and Control");
+    FEAT(blk_stop_mrk_sup,   "Block StopMark Message");
+    FEAT(perf_opt_sup ,      "Performance Optimization");
+    FEAT(msi_cap_mmio_sup,   "MSI Capability MMIO Access");
+    FEAT(gio_sup,            "Guest I/O Protection");
+    FEAT(ha_sup,             "Host Access");
+    FEAT(eph_sup,            "Enhanced PPR Handling");
+    FEAT(attr_fw_sup,        "Attribute Forward");
+    FEAT(hd_sup,             "Host Dirty");
+    FEAT(inv_iotlb_type_sup, "Invalidate IOTLB Type");
+    FEAT(viommu_sup,         "Virtualized IOMMU");
+    FEAT(vm_guard_io_sup,    "VMGuard I/O Support");
+    FEAT(vm_table_size,      "VM Table Size");
+    FEAT(ga_update_dis_sup,  "Guest Access Bit Update Disable");
+
+#undef FEAT
 }
 
 int __init amd_iommu_detect_one_acpi(
--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -638,7 +638,7 @@ static uint64_t iommu_mmio_read64(struct
         val = reg_to_u64(iommu->reg_status);
         break;
     case IOMMU_EXT_FEATURE_MMIO_OFFSET:
-        val = reg_to_u64(iommu->reg_ext_feature);
+        val = iommu->reg_ext_feature.raw;
         break;
 
     default:
@@ -802,39 +802,26 @@ int guest_iommu_set_base(struct domain *
 /* Initialize mmio read only bits */
 static void guest_iommu_reg_init(struct guest_iommu *iommu)
 {
-    uint32_t lower, upper;
+    union amd_iommu_ext_features ef = {
+        /* Support prefetch */
+        .flds.pref_sup = 1,
+        /* Support PPR log */
+        .flds.ppr_sup = 1,
+        /* Support guest translation */
+        .flds.gt_sup = 1,
+        /* Support invalidate all command */
+        .flds.ia_sup = 1,
+        /* Host translation size has 6 levels */
+        .flds.hats = HOST_ADDRESS_SIZE_6_LEVEL,
+        /* Guest translation size has 6 levels */
+        .flds.gats = GUEST_ADDRESS_SIZE_6_LEVEL,
+        /* Single level gCR3 */
+        .flds.glx_sup = GUEST_CR3_1_LEVEL,
+        /* 9 bit PASID */
+        .flds.pas_max = PASMAX_9_bit,
+    };
 
-    lower = upper = 0;
-    /* Support prefetch */
-    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PREFSUP_SHIFT);
-    /* Support PPR log */
-    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PPRSUP_SHIFT);
-    /* Support guest translation */
-    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_GTSUP_SHIFT);
-    /* Support invalidate all command */
-    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_IASUP_SHIFT);
-
-    /* Host translation size has 6 levels */
-    set_field_in_reg_u32(HOST_ADDRESS_SIZE_6_LEVEL, lower,
-                         IOMMU_EXT_FEATURE_HATS_MASK,
-                         IOMMU_EXT_FEATURE_HATS_SHIFT,
-                         &lower);
-    /* Guest translation size has 6 levels */
-    set_field_in_reg_u32(GUEST_ADDRESS_SIZE_6_LEVEL, lower,
-                         IOMMU_EXT_FEATURE_GATS_MASK,
-                         IOMMU_EXT_FEATURE_GATS_SHIFT,
-                         &lower);
-    /* Single level gCR3 */
-    set_field_in_reg_u32(GUEST_CR3_1_LEVEL, lower,
-                         IOMMU_EXT_FEATURE_GLXSUP_MASK,
-                         IOMMU_EXT_FEATURE_GLXSUP_SHIFT, &lower);
-    /* 9 bit PASID */
-    set_field_in_reg_u32(PASMAX_9_bit, upper,
-                         IOMMU_EXT_FEATURE_PASMAX_MASK,
-                         IOMMU_EXT_FEATURE_PASMAX_SHIFT, &upper);
-
-    iommu->reg_ext_feature.lo = lower;
-    iommu->reg_ext_feature.hi = upper;
+    iommu->reg_ext_feature = ef;
 }
 
 static int guest_iommu_mmio_range(struct vcpu *v, unsigned long addr)
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -882,7 +882,7 @@ static void enable_iommu(struct amd_iomm
     register_iommu_event_log_in_mmio_space(iommu);
     register_iommu_exclusion_range(iommu);
 
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
+    if ( iommu->features.flds.ppr_sup )
         register_iommu_ppr_log_in_mmio_space(iommu);
 
     desc = irq_to_desc(iommu->msi.irq);
@@ -896,15 +896,15 @@ static void enable_iommu(struct amd_iomm
     set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
     set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
 
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
+    if ( iommu->features.flds.ppr_sup )
         set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED);
 
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) )
+    if ( iommu->features.flds.gt_sup )
         set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_ENABLED);
 
     set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
 
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) )
+    if ( iommu->features.flds.ia_sup )
         amd_iommu_flush_all_caches(iommu);
 
     iommu->enabled = 1;
@@ -927,10 +927,10 @@ static void disable_iommu(struct amd_iom
     set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_DISABLED);
     set_iommu_event_log_control(iommu, IOMMU_CONTROL_DISABLED);
 
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
+    if ( iommu->features.flds.ppr_sup )
         set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_DISABLED);
 
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) )
+    if ( iommu->features.flds.gt_sup )
         set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_DISABLED);
 
     set_iommu_translation_control(iommu, IOMMU_CONTROL_DISABLED);
@@ -1026,7 +1026,7 @@ static int __init amd_iommu_init_one(str
 
     get_iommu_features(iommu);
 
-    if ( iommu->features )
+    if ( iommu->features.raw )
         iommuv2_enabled = 1;
 
     if ( allocate_cmd_buffer(iommu) == NULL )
@@ -1035,9 +1035,8 @@ static int __init amd_iommu_init_one(str
     if ( allocate_event_log(iommu) == NULL )
         goto error_out;
 
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
-        if ( allocate_ppr_log(iommu) == NULL )
-            goto error_out;
+    if ( iommu->features.flds.ppr_sup && !allocate_ppr_log(iommu) )
+        goto error_out;
 
     if ( !set_iommu_interrupt_handler(iommu) )
         goto error_out;
@@ -1393,7 +1392,7 @@ void amd_iommu_resume(void)
     }
 
     /* flush all cache entries after iommu re-enabled */
-    if ( !amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) )
+    if ( !iommu->features.flds.ia_sup )
     {
         invalidate_all_devices();
         invalidate_all_domain_pages();
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -83,7 +83,7 @@ struct amd_iommu {
     iommu_cap_t cap;
 
     u8 ht_flags;
-    u64 features;
+    union amd_iommu_ext_features features;
 
     void *mmio_base;
     unsigned long mmio_base_phys;
@@ -175,7 +175,7 @@ struct guest_iommu {
     /* MMIO regs */
     struct mmio_reg         reg_ctrl;              /* MMIO offset 0018h */
     struct mmio_reg         reg_status;            /* MMIO offset 2020h */
-    struct mmio_reg         reg_ext_feature;       /* MMIO offset 0030h */
+    union amd_iommu_ext_features reg_ext_feature;  /* MMIO offset 0030h */
 
     /* guest interrupt settings */
     struct guest_iommu_msi  msi;
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -346,26 +346,57 @@ struct amd_iommu_dte {
 #define IOMMU_EXCLUSION_LIMIT_HIGH_MASK		0xFFFFFFFF
 #define IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT	0
 
-/* Extended Feature Register*/
+/* Extended Feature Register */
 #define IOMMU_EXT_FEATURE_MMIO_OFFSET                   0x30
-#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT                 0x0
-#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT                  0x1
-#define IOMMU_EXT_FEATURE_XTSUP_SHIFT                   0x2
-#define IOMMU_EXT_FEATURE_NXSUP_SHIFT                   0x3
-#define IOMMU_EXT_FEATURE_GTSUP_SHIFT                   0x4
-#define IOMMU_EXT_FEATURE_IASUP_SHIFT                   0x6
-#define IOMMU_EXT_FEATURE_GASUP_SHIFT                   0x7
-#define IOMMU_EXT_FEATURE_HESUP_SHIFT                   0x8
-#define IOMMU_EXT_FEATURE_PCSUP_SHIFT                   0x9
-#define IOMMU_EXT_FEATURE_HATS_SHIFT                    0x10
-#define IOMMU_EXT_FEATURE_HATS_MASK                     0x00000C00
-#define IOMMU_EXT_FEATURE_GATS_SHIFT                    0x12
-#define IOMMU_EXT_FEATURE_GATS_MASK                     0x00003000
-#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT                  0x14
-#define IOMMU_EXT_FEATURE_GLXSUP_MASK                   0x0000C000
 
-#define IOMMU_EXT_FEATURE_PASMAX_SHIFT                  0x0
-#define IOMMU_EXT_FEATURE_PASMAX_MASK                   0x0000001F
+union amd_iommu_ext_features {
+    uint64_t raw;
+    struct {
+        unsigned int pref_sup:1;
+        unsigned int ppr_sup:1;
+        unsigned int xt_sup:1;
+        unsigned int nx_sup:1;
+        unsigned int gt_sup:1;
+        unsigned int gappi_sup:1;
+        unsigned int ia_sup:1;
+        unsigned int ga_sup:1;
+        unsigned int he_sup:1;
+        unsigned int pc_sup:1;
+        unsigned int hats:2;
+        unsigned int gats:2;
+        unsigned int glx_sup:2;
+        unsigned int smif_sup:2;
+        unsigned int smif_rc:3;
+        unsigned int gam_sup:3;
+        unsigned int dual_ppr_log_sup:2;
+        unsigned int :2;
+        unsigned int dual_event_log_sup:2;
+        unsigned int :1;
+        unsigned int sats_sup:1;
+        unsigned int pas_max:5;
+        unsigned int us_sup:1;
+        unsigned int dev_tbl_seg_sup:2;
+        unsigned int ppr_early_of_sup:1;
+        unsigned int ppr_auto_rsp_sup:1;
+        unsigned int marc_sup:2;
+        unsigned int blk_stop_mrk_sup:1;
+        unsigned int perf_opt_sup:1;
+        unsigned int msi_cap_mmio_sup:1;
+        unsigned int :1;
+        unsigned int gio_sup:1;
+        unsigned int ha_sup:1;
+        unsigned int eph_sup:1;
+        unsigned int attr_fw_sup:1;
+        unsigned int hd_sup:1;
+        unsigned int :1;
+        unsigned int inv_iotlb_type_sup:1;
+        unsigned int viommu_sup:1;
+        unsigned int vm_guard_io_sup:1;
+        unsigned int vm_table_size:4;
+        unsigned int ga_update_dis_sup:1;
+        unsigned int :2;
+    } flds;
+};
 
 /* Status Register*/
 #define IOMMU_STATUS_MMIO_OFFSET		0x2020
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -218,13 +218,6 @@ static inline int iommu_has_cap(struct a
     return !!(iommu->cap.header & (1u << bit));
 }
 
-static inline int amd_iommu_has_feature(struct amd_iommu *iommu, uint32_t bit)
-{
-    if ( !iommu_has_cap(iommu, PCI_CAP_EFRSUP_SHIFT) )
-        return 0;
-    return !!(iommu->features & (1U << bit));
-}
-
 /* access tail or head pointer of ring buffer */
 static inline uint32_t iommu_get_rb_pointer(uint32_t reg)
 {
AMD/IOMMU: use bit field for control register

Also introduce a field in struct amd_iommu caching the most recently
written control register. All writes should now happen exclusively from
that cached value, such that it is guaranteed to be up to date.

Take the opportunity and add further fields. Also convert a few boolean
function parameters to bool, such that use of !! can be avoided.

Because of there now being definitions beyond bit 31, writel() also gets
replaced by writeq() when updating hardware.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v3: Switch boolean bitfields to bool.
v2: Add domain_id_pne field. Mention writel() -> writeq() change.

--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -317,7 +317,7 @@ static int do_invalidate_iotlb_pages(str
 
 static int do_completion_wait(struct domain *d, cmd_entry_t *cmd)
 {
-    bool_t com_wait_int_en, com_wait_int, i, s;
+    bool com_wait_int, i, s;
     struct guest_iommu *iommu;
     unsigned long gfn;
     p2m_type_t p2mt;
@@ -354,12 +354,10 @@ static int do_completion_wait(struct dom
         unmap_domain_page(vaddr);
     }
 
-    com_wait_int_en = iommu_get_bit(iommu->reg_ctrl.lo,
-                                    IOMMU_CONTROL_COMP_WAIT_INT_SHIFT);
     com_wait_int = iommu_get_bit(iommu->reg_status.lo,
                                  IOMMU_STATUS_COMP_WAIT_INT_SHIFT);
 
-    if ( com_wait_int_en && com_wait_int )
+    if ( iommu->reg_ctrl.com_wait_int_en && com_wait_int )
         guest_iommu_deliver_msi(d);
 
     return 0;
@@ -521,40 +519,17 @@ static void guest_iommu_process_command(
     return;
 }
 
-static int guest_iommu_write_ctrl(struct guest_iommu *iommu, uint64_t newctrl)
+static int guest_iommu_write_ctrl(struct guest_iommu *iommu, uint64_t val)
 {
-    bool_t cmd_en, event_en, iommu_en, ppr_en, ppr_log_en;
-    bool_t cmd_en_old, event_en_old, iommu_en_old;
-    bool_t cmd_run;
-
-    iommu_en = iommu_get_bit(newctrl,
-                             IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT);
-    iommu_en_old = iommu_get_bit(iommu->reg_ctrl.lo,
-                                 IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT);
-
-    cmd_en = iommu_get_bit(newctrl,
-                           IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT);
-    cmd_en_old = iommu_get_bit(iommu->reg_ctrl.lo,
-                               IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT);
-    cmd_run = iommu_get_bit(iommu->reg_status.lo,
-                            IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT);
-    event_en = iommu_get_bit(newctrl,
-                             IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT);
-    event_en_old = iommu_get_bit(iommu->reg_ctrl.lo,
-                                 IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT);
-
-    ppr_en = iommu_get_bit(newctrl,
-                           IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-    ppr_log_en = iommu_get_bit(newctrl,
-                               IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
+    union amd_iommu_control newctrl = { .raw = val };
 
-    if ( iommu_en )
+    if ( newctrl.iommu_en )
     {
         guest_iommu_enable(iommu);
         guest_iommu_enable_dev_table(iommu);
     }
 
-    if ( iommu_en && cmd_en )
+    if ( newctrl.iommu_en && newctrl.cmd_buf_en )
     {
         guest_iommu_enable_ring_buffer(iommu, &iommu->cmd_buffer,
                                        sizeof(cmd_entry_t));
@@ -562,7 +537,7 @@ static int guest_iommu_write_ctrl(struct
         tasklet_schedule(&iommu->cmd_buffer_tasklet);
     }
 
-    if ( iommu_en && event_en )
+    if ( newctrl.iommu_en && newctrl.event_log_en )
     {
         guest_iommu_enable_ring_buffer(iommu, &iommu->event_log,
                                        sizeof(event_entry_t));
@@ -570,7 +545,7 @@ static int guest_iommu_write_ctrl(struct
         guest_iommu_clear_status(iommu, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT);
     }
 
-    if ( iommu_en && ppr_en && ppr_log_en )
+    if ( newctrl.iommu_en && newctrl.ppr_en && newctrl.ppr_log_en )
     {
         guest_iommu_enable_ring_buffer(iommu, &iommu->ppr_log,
                                        sizeof(ppr_entry_t));
@@ -578,19 +553,21 @@ static int guest_iommu_write_ctrl(struct
         guest_iommu_clear_status(iommu, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT);
     }
 
-    if ( iommu_en && cmd_en_old && !cmd_en )
+    if ( newctrl.iommu_en && iommu->reg_ctrl.cmd_buf_en &&
+         !newctrl.cmd_buf_en )
     {
         /* Disable iommu command processing */
         tasklet_kill(&iommu->cmd_buffer_tasklet);
     }
 
-    if ( event_en_old && !event_en )
+    if ( iommu->reg_ctrl.event_log_en && !newctrl.event_log_en )
         guest_iommu_clear_status(iommu, IOMMU_STATUS_EVENT_LOG_RUN_SHIFT);
 
-    if ( iommu_en_old && !iommu_en )
+    if ( iommu->reg_ctrl.iommu_en && !newctrl.iommu_en )
         guest_iommu_disable(iommu);
 
-    u64_to_reg(&iommu->reg_ctrl, newctrl);
+    iommu->reg_ctrl = newctrl;
+
     return 0;
 }
 
@@ -632,7 +609,7 @@ static uint64_t iommu_mmio_read64(struct
         val = reg_to_u64(iommu->ppr_log.reg_tail);
         break;
     case IOMMU_CONTROL_MMIO_OFFSET:
-        val = reg_to_u64(iommu->reg_ctrl);
+        val = iommu->reg_ctrl.raw;
         break;
     case IOMMU_STATUS_MMIO_OFFSET:
         val = reg_to_u64(iommu->reg_status);
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -41,7 +41,7 @@ LIST_HEAD_READ_MOSTLY(amd_iommu_head);
 struct table_struct device_table;
 bool_t iommuv2_enabled;
 
-static int iommu_has_ht_flag(struct amd_iommu *iommu, u8 mask)
+static bool iommu_has_ht_flag(struct amd_iommu *iommu, u8 mask)
 {
     return iommu->ht_flags & mask;
 }
@@ -69,31 +69,18 @@ static void __init unmap_iommu_mmio_regi
 
 static void set_iommu_ht_flags(struct amd_iommu *iommu)
 {
-    u32 entry;
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
     /* Setup HT flags */
     if ( iommu_has_cap(iommu, PCI_CAP_HT_TUNNEL_SHIFT) )
-        iommu_has_ht_flag(iommu, ACPI_IVHD_TT_ENABLE) ?
-            iommu_set_bit(&entry, IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT) :
-            iommu_clear_bit(&entry, IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT);
-
-    iommu_has_ht_flag(iommu, ACPI_IVHD_RES_PASS_PW) ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT):
-        iommu_clear_bit(&entry, IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT);
-
-    iommu_has_ht_flag(iommu, ACPI_IVHD_ISOC) ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_ISOCHRONOUS_SHIFT):
-        iommu_clear_bit(&entry, IOMMU_CONTROL_ISOCHRONOUS_SHIFT);
-
-    iommu_has_ht_flag(iommu, ACPI_IVHD_PASS_PW) ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT):
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT);
+        iommu->ctrl.ht_tun_en = iommu_has_ht_flag(iommu, ACPI_IVHD_TT_ENABLE);
+
+    iommu->ctrl.pass_pw     = iommu_has_ht_flag(iommu, ACPI_IVHD_PASS_PW);
+    iommu->ctrl.res_pass_pw = iommu_has_ht_flag(iommu, ACPI_IVHD_RES_PASS_PW);
+    iommu->ctrl.isoc        = iommu_has_ht_flag(iommu, ACPI_IVHD_ISOC);
 
     /* Force coherent */
-    iommu_set_bit(&entry, IOMMU_CONTROL_COHERENT_SHIFT);
+    iommu->ctrl.coherent = true;
 
-    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 }
 
 static void register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu)
@@ -205,55 +192,37 @@ static void register_iommu_ppr_log_in_mm
 
 
 static void set_iommu_translation_control(struct amd_iommu *iommu,
-                                                 int enable)
+                                          bool enable)
 {
-    u32 entry;
+    iommu->ctrl.iommu_en = enable;
 
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
-    enable ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT) :
-        iommu_clear_bit(&entry, IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT);
-
-    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 }
 
 static void set_iommu_guest_translation_control(struct amd_iommu *iommu,
-                                                int enable)
+                                                bool enable)
 {
-    u32 entry;
-
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu->ctrl.gt_en = enable;
 
-    enable ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_GT_ENABLE_SHIFT) :
-        iommu_clear_bit(&entry, IOMMU_CONTROL_GT_ENABLE_SHIFT);
-
-    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 
     if ( enable )
         AMD_IOMMU_DEBUG("Guest Translation Enabled.\n");
 }
 
 static void set_iommu_command_buffer_control(struct amd_iommu *iommu,
-                                                    int enable)
+                                             bool enable)
 {
-    u32 entry;
-
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
-    /*reset head and tail pointer manually before enablement */
+    /* Reset head and tail pointer manually before enablement */
     if ( enable )
     {
         writeq(0, iommu->mmio_base + IOMMU_CMD_BUFFER_HEAD_OFFSET);
         writeq(0, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET);
-
-        iommu_set_bit(&entry, IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT);
     }
-    else
-        iommu_clear_bit(&entry, IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT);
 
-    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    iommu->ctrl.cmd_buf_en = enable;
+
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 }
 
 static void register_iommu_exclusion_range(struct amd_iommu *iommu)
@@ -295,57 +264,38 @@ static void register_iommu_exclusion_ran
 }
 
 static void set_iommu_event_log_control(struct amd_iommu *iommu,
-            int enable)
+                                        bool enable)
 {
-    u32 entry;
-
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
-    /*reset head and tail pointer manually before enablement */
+    /* Reset head and tail pointer manually before enablement */
     if ( enable )
     {
         writeq(0, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET);
         writeq(0, iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET);
-
-        iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT);
-    }
-    else
-    {
-        iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT);
     }
 
-    iommu_clear_bit(&entry, IOMMU_CONTROL_COMP_WAIT_INT_SHIFT);
+    iommu->ctrl.event_int_en = enable;
+    iommu->ctrl.event_log_en = enable;
+    iommu->ctrl.com_wait_int_en = false;
 
-    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 }
 
 static void set_iommu_ppr_log_control(struct amd_iommu *iommu,
-                                      int enable)
+                                      bool enable)
 {
-    u32 entry;
-
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
-    /*reset head and tail pointer manually before enablement */
+    /* Reset head and tail pointer manually before enablement */
     if ( enable )
     {
         writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_HEAD_OFFSET);
         writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET);
-
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
-    }
-    else
-    {
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
 
-    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu->ctrl.ppr_en = enable;
+    iommu->ctrl.ppr_int_en = enable;
+    iommu->ctrl.ppr_log_en = enable;
+
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+
     if ( enable )
         AMD_IOMMU_DEBUG("PPR Log Enabled.\n");
 }
@@ -398,7 +348,7 @@ static int iommu_read_log(struct amd_iom
 /* reset event log or ppr log when overflow */
 static void iommu_reset_log(struct amd_iommu *iommu,
                             struct ring_buffer *log,
-                            void (*ctrl_func)(struct amd_iommu *iommu, int))
+                            void (*ctrl_func)(struct amd_iommu *iommu, bool))
 {
     u32 entry;
     int log_run, run_bit;
@@ -615,11 +565,11 @@ static void iommu_check_event_log(struct
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
     else
     {
-        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-        if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) )
+        if ( !iommu->ctrl.event_int_en )
         {
-            entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK;
-            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+            iommu->ctrl.event_int_en = true;
+            writeq(iommu->ctrl.raw,
+                   iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
             /*
              * Re-schedule the tasklet to handle eventual log entries added
              * between reading the log above and re-enabling the interrupt.
@@ -704,11 +654,11 @@ static void iommu_check_ppr_log(struct a
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
     else
     {
-        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-        if ( !(entry & IOMMU_CONTROL_PPR_LOG_INT_MASK) )
+        if ( !iommu->ctrl.ppr_int_en )
         {
-            entry |= IOMMU_CONTROL_PPR_LOG_INT_MASK;
-            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+            iommu->ctrl.ppr_int_en = true;
+            writeq(iommu->ctrl.raw,
+                   iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
             /*
              * Re-schedule the tasklet to handle eventual log entries added
              * between reading the log above and re-enabling the interrupt.
@@ -754,7 +704,6 @@ static void do_amd_iommu_irq(unsigned lo
 static void iommu_interrupt_handler(int irq, void *dev_id,
                                     struct cpu_user_regs *regs)
 {
-    u32 entry;
     unsigned long flags;
     struct amd_iommu *iommu = dev_id;
 
@@ -764,10 +713,9 @@ static void iommu_interrupt_handler(int
      * Silence interrupts from both event and PPR by clearing the
      * enable logging bits in the control register
      */
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
-    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu->ctrl.event_int_en = false;
+    iommu->ctrl.ppr_int_en = false;
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -88,6 +88,8 @@ struct amd_iommu {
     void *mmio_base;
     unsigned long mmio_base_phys;
 
+    union amd_iommu_control ctrl;
+
     struct table_struct dev_table;
     struct ring_buffer cmd_buffer;
     struct ring_buffer event_log;
@@ -173,7 +175,7 @@ struct guest_iommu {
     uint64_t                mmio_base;             /* MMIO base address */
 
     /* MMIO regs */
-    struct mmio_reg         reg_ctrl;              /* MMIO offset 0018h */
+    union amd_iommu_control reg_ctrl;              /* MMIO offset 0018h */
     struct mmio_reg         reg_status;            /* MMIO offset 2020h */
     union amd_iommu_ext_features reg_ext_feature;  /* MMIO offset 0030h */
 
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -295,38 +295,56 @@ struct amd_iommu_dte {
 
 /* Control Register */
 #define IOMMU_CONTROL_MMIO_OFFSET			0x18
-#define IOMMU_CONTROL_TRANSLATION_ENABLE_MASK		0x00000001
-#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT		0
-#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK	0x00000002
-#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT	1
-#define IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK		0x00000004
-#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT		2
-#define IOMMU_CONTROL_EVENT_LOG_INT_MASK		0x00000008
-#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT		3
-#define IOMMU_CONTROL_COMP_WAIT_INT_MASK		0x00000010
-#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT		4
-#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_MASK		0x000000E0
-#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT	5
-#define IOMMU_CONTROL_PASS_POSTED_WRITE_MASK		0x00000100
-#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT		8
-#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_MASK	0x00000200
-#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT	9
-#define IOMMU_CONTROL_COHERENT_MASK			0x00000400
-#define IOMMU_CONTROL_COHERENT_SHIFT			10
-#define IOMMU_CONTROL_ISOCHRONOUS_MASK			0x00000800
-#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
-#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
-#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_MASK		0x00002000
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT		13
-#define IOMMU_CONTROL_PPR_LOG_INT_MASK			0x00004000
-#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT			14
-#define IOMMU_CONTROL_PPR_ENABLE_MASK			0x00008000
-#define IOMMU_CONTROL_PPR_ENABLE_SHIFT			15
-#define IOMMU_CONTROL_GT_ENABLE_MASK			0x00010000
-#define IOMMU_CONTROL_GT_ENABLE_SHIFT			16
-#define IOMMU_CONTROL_RESTART_MASK			0x80000000
-#define IOMMU_CONTROL_RESTART_SHIFT			31
+
+union amd_iommu_control {
+    uint64_t raw;
+    struct {
+        bool iommu_en:1;
+        bool ht_tun_en:1;
+        bool event_log_en:1;
+        bool event_int_en:1;
+        bool com_wait_int_en:1;
+        unsigned int inv_timeout:3;
+        bool pass_pw:1;
+        bool res_pass_pw:1;
+        bool coherent:1;
+        bool isoc:1;
+        bool cmd_buf_en:1;
+        bool ppr_log_en:1;
+        bool ppr_int_en:1;
+        bool ppr_en:1;
+        bool gt_en:1;
+        bool ga_en:1;
+        unsigned int crw:4;
+        bool smif_en:1;
+        bool slf_wb_dis:1;
+        bool smif_log_en:1;
+        unsigned int gam_en:3;
+        bool ga_log_en:1;
+        bool ga_int_en:1;
+        unsigned int dual_ppr_log_en:2;
+        unsigned int dual_event_log_en:2;
+        unsigned int dev_tbl_seg_en:3;
+        unsigned int priv_abrt_en:2;
+        bool ppr_auto_rsp_en:1;
+        bool marc_en:1;
+        bool blk_stop_mrk_en:1;
+        bool ppr_auto_rsp_aon:1;
+        bool domain_id_pne:1;
+        unsigned int :1;
+        bool eph_en:1;
+        unsigned int had_update:2;
+        bool gd_update_dis:1;
+        unsigned int :1;
+        bool xt_en:1;
+        bool int_cap_xt_en:1;
+        bool vcmd_en:1;
+        bool viommu_en:1;
+        bool ga_update_dis:1;
+        bool gappi_en:1;
+        unsigned int :8;
+    };
+};
 
 /* Exclusion Register */
 #define IOMMU_EXCLUSION_BASE_LOW_OFFSET		0x20
AMD/IOMMU: use bit field for IRTE

At the same time restrict its scope to just the single source file
actually using it, and abstract accesses by introducing a union of
pointers. (A union of the actual table entries is not used to make it
impossible to [wrongly, once the 128-bit form gets added] perform
pointer arithmetic / array accesses on derived types.)

Also move away from updating the entries piecemeal: Construct a full new
entry, and write it out.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v4: Re-base. Do away with standalone struct irte_basic.
v3: Switch boolean bitfields to bool.
v2: name {get,free}_intremap_entry()'s last parameter "index" instead of
    "offset". Introduce union irte32.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -24,6 +24,26 @@
 #include <xen/keyhandler.h>
 #include <xen/softirq.h>
 
+union irte32 {
+    uint32_t raw;
+    struct {
+        bool remap_en:1;
+        bool sup_io_pf:1;
+        unsigned int int_type:3;
+        bool rq_eoi:1;
+        bool dm:1;
+        bool guest_mode:1; /* MBZ */
+        unsigned int dest:8;
+        unsigned int vector:8;
+        unsigned int :8;
+    } flds;
+};
+
+union irte_ptr {
+    void *ptr;
+    union irte32 *ptr32;
+};
+
 #define INTREMAP_TABLE_ORDER    1
 #define INTREMAP_LENGTH 0xB
 #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
@@ -102,47 +122,45 @@ static unsigned int alloc_intremap_entry
     return slot;
 }
 
-static u32 *get_intremap_entry(int seg, int bdf, int offset)
+static union irte_ptr get_intremap_entry(unsigned int seg, unsigned int bdf,
+                                         unsigned int index)
 {
-    u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
+    union irte_ptr table = {
+        .ptr = get_ivrs_mappings(seg)[bdf].intremap_table
+    };
+
+    ASSERT(table.ptr && (index < INTREMAP_ENTRIES));
 
-    ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
+    table.ptr32 += index;
 
-    return table + offset;
+    return table;
 }
 
-static void free_intremap_entry(int seg, int bdf, int offset)
-{
-    u32 *entry = get_intremap_entry(seg, bdf, offset);
-
-    memset(entry, 0, sizeof(u32));
-    __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
-}
-
-static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
-    u8 dest_mode, u8 dest)
-{
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
-                            INT_REMAP_ENTRY_REMAPEN_MASK,
-                            INT_REMAP_ENTRY_REMAPEN_SHIFT, entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, *entry,
-                            INT_REMAP_ENTRY_SUPIOPF_MASK,
-                            INT_REMAP_ENTRY_SUPIOPF_SHIFT, entry);
-    set_field_in_reg_u32(int_type, *entry,
-                            INT_REMAP_ENTRY_INTTYPE_MASK,
-                            INT_REMAP_ENTRY_INTTYPE_SHIFT, entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, *entry,
-                            INT_REMAP_ENTRY_REQEOI_MASK,
-                            INT_REMAP_ENTRY_REQEOI_SHIFT, entry);
-    set_field_in_reg_u32((u32)dest_mode, *entry,
-                            INT_REMAP_ENTRY_DM_MASK,
-                            INT_REMAP_ENTRY_DM_SHIFT, entry);
-    set_field_in_reg_u32((u32)dest, *entry,
-                            INT_REMAP_ENTRY_DEST_MAST,
-                            INT_REMAP_ENTRY_DEST_SHIFT, entry);
-    set_field_in_reg_u32((u32)vector, *entry,
-                            INT_REMAP_ENTRY_VECTOR_MASK,
-                            INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
+static void free_intremap_entry(unsigned int seg, unsigned int bdf,
+                                unsigned int index)
+{
+    union irte_ptr entry = get_intremap_entry(seg, bdf, index);
+
+    ACCESS_ONCE(entry.ptr32->raw) = 0;
+
+    __clear_bit(index, get_ivrs_mappings(seg)[bdf].intremap_inuse);
+}
+
+static void update_intremap_entry(union irte_ptr entry, unsigned int vector,
+                                  unsigned int int_type,
+                                  unsigned int dest_mode, unsigned int dest)
+{
+    union irte32 irte = {
+        .flds = {
+            .remap_en = true,
+            .int_type = int_type,
+            .dm = dest_mode,
+            .dest = dest,
+            .vector = vector,
+        },
+    };
+
+    ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
 }
 
 static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
@@ -164,7 +182,7 @@ static int update_intremap_entry_from_io
     u16 *index)
 {
     unsigned long flags;
-    u32* entry;
+    union irte_ptr entry;
     u8 delivery_mode, dest, vector, dest_mode;
     int req_id;
     spinlock_t *lock;
@@ -202,12 +220,8 @@ static int update_intremap_entry_from_io
          * so need to recover vector and delivery mode from IRTE.
          */
         ASSERT(get_rte_index(rte) == offset);
-        vector = get_field_from_reg_u32(*entry,
-                                        INT_REMAP_ENTRY_VECTOR_MASK,
-                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
-        delivery_mode = get_field_from_reg_u32(*entry,
-                                               INT_REMAP_ENTRY_INTTYPE_MASK,
-                                               INT_REMAP_ENTRY_INTTYPE_SHIFT);
+        vector = entry.ptr32->flds.vector;
+        delivery_mode = entry.ptr32->flds.int_type;
     }
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
 
@@ -229,7 +243,7 @@ int __init amd_iommu_setup_ioapic_remapp
 {
     struct IO_APIC_route_entry rte;
     unsigned long flags;
-    u32* entry;
+    union irte_ptr entry;
     int apic, pin;
     u8 delivery_mode, dest, vector, dest_mode;
     u16 seg, bdf, req_id;
@@ -408,16 +422,14 @@ unsigned int amd_iommu_read_ioapic_from_
         u16 bdf = ioapic_sbdf[idx].bdf;
         u16 seg = ioapic_sbdf[idx].seg;
         u16 req_id = get_intremap_requestor_id(seg, bdf);
-        const u32 *entry = get_intremap_entry(seg, req_id, offset);
+        union irte_ptr entry = get_intremap_entry(seg, req_id, offset);
 
         ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
         val &= ~(INTREMAP_ENTRIES - 1);
-        val |= get_field_from_reg_u32(*entry,
-                                      INT_REMAP_ENTRY_INTTYPE_MASK,
-                                      INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
-        val |= get_field_from_reg_u32(*entry,
-                                      INT_REMAP_ENTRY_VECTOR_MASK,
-                                      INT_REMAP_ENTRY_VECTOR_SHIFT);
+        val |= MASK_INSR(entry.ptr32->flds.int_type,
+                         IO_APIC_REDIR_DELIV_MODE_MASK);
+        val |= MASK_INSR(entry.ptr32->flds.vector,
+                         IO_APIC_REDIR_VECTOR_MASK);
     }
 
     return val;
@@ -428,7 +440,7 @@ static int update_intremap_entry_from_ms
     int *remap_index, const struct msi_msg *msg, u32 *data)
 {
     unsigned long flags;
-    u32* entry;
+    union irte_ptr entry;
     u16 req_id, alias_id;
     u8 delivery_mode, dest, vector, dest_mode;
     spinlock_t *lock;
@@ -582,7 +594,7 @@ void amd_iommu_read_msi_from_ire(
     const struct pci_dev *pdev = msi_desc->dev;
     u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
     u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
-    const u32 *entry;
+    union irte_ptr entry;
 
     if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
         return;
@@ -598,12 +610,10 @@ void amd_iommu_read_msi_from_ire(
     }
 
     msg->data &= ~(INTREMAP_ENTRIES - 1);
-    msg->data |= get_field_from_reg_u32(*entry,
-                                        INT_REMAP_ENTRY_INTTYPE_MASK,
-                                        INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
-    msg->data |= get_field_from_reg_u32(*entry,
-                                        INT_REMAP_ENTRY_VECTOR_MASK,
-                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
+    msg->data |= MASK_INSR(entry.ptr32->flds.int_type,
+                           MSI_DATA_DELIVERY_MODE_MASK);
+    msg->data |= MASK_INSR(entry.ptr32->flds.vector,
+                           MSI_DATA_VECTOR_MASK);
 }
 
 int __init amd_iommu_free_intremap_table(
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -469,22 +469,6 @@ struct amd_iommu_pte {
 #define IOMMU_CONTROL_DISABLED	0
 #define IOMMU_CONTROL_ENABLED	1
 
-/* interrupt remapping table */
-#define INT_REMAP_ENTRY_REMAPEN_MASK    0x00000001
-#define INT_REMAP_ENTRY_REMAPEN_SHIFT   0
-#define INT_REMAP_ENTRY_SUPIOPF_MASK    0x00000002
-#define INT_REMAP_ENTRY_SUPIOPF_SHIFT   1
-#define INT_REMAP_ENTRY_INTTYPE_MASK    0x0000001C
-#define INT_REMAP_ENTRY_INTTYPE_SHIFT   2
-#define INT_REMAP_ENTRY_REQEOI_MASK     0x00000020
-#define INT_REMAP_ENTRY_REQEOI_SHIFT    5
-#define INT_REMAP_ENTRY_DM_MASK         0x00000040
-#define INT_REMAP_ENTRY_DM_SHIFT        6
-#define INT_REMAP_ENTRY_DEST_MAST       0x0000FF00
-#define INT_REMAP_ENTRY_DEST_SHIFT      8
-#define INT_REMAP_ENTRY_VECTOR_MASK     0x00FF0000
-#define INT_REMAP_ENTRY_VECTOR_SHIFT    16
-
 #define INV_IOMMU_ALL_PAGES_ADDRESS      ((1ULL << 63) - 1)
 
 #define IOMMU_RING_BUFFER_PTR_MASK                  0x0007FFF0
AMD/IOMMU: pass IOMMU to {get,free,update}_intremap_entry()

The functions will want to know IOMMU properties (specifically the IRTE
size) subsequently.

Rather than introducing a second error path bogusly returning -E... from
amd_iommu_read_ioapic_from_ire(), also change the existing one to follow
VT-d in returning the raw (untranslated) IO-APIC RTE.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v3: New.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -122,11 +122,11 @@ static unsigned int alloc_intremap_entry
     return slot;
 }
 
-static union irte_ptr get_intremap_entry(unsigned int seg, unsigned int bdf,
-                                         unsigned int index)
+static union irte_ptr get_intremap_entry(const struct amd_iommu *iommu,
+                                         unsigned int bdf, unsigned int index)
 {
     union irte_ptr table = {
-        .ptr = get_ivrs_mappings(seg)[bdf].intremap_table
+        .ptr = get_ivrs_mappings(iommu->seg)[bdf].intremap_table
     };
 
     ASSERT(table.ptr && (index < INTREMAP_ENTRIES));
@@ -136,18 +136,19 @@ static union irte_ptr get_intremap_entry
     return table;
 }
 
-static void free_intremap_entry(unsigned int seg, unsigned int bdf,
-                                unsigned int index)
+static void free_intremap_entry(const struct amd_iommu *iommu,
+                                unsigned int bdf, unsigned int index)
 {
-    union irte_ptr entry = get_intremap_entry(seg, bdf, index);
+    union irte_ptr entry = get_intremap_entry(iommu, bdf, index);
 
     ACCESS_ONCE(entry.ptr32->raw) = 0;
 
-    __clear_bit(index, get_ivrs_mappings(seg)[bdf].intremap_inuse);
+    __clear_bit(index, get_ivrs_mappings(iommu->seg)[bdf].intremap_inuse);
 }
 
-static void update_intremap_entry(union irte_ptr entry, unsigned int vector,
-                                  unsigned int int_type,
+static void update_intremap_entry(const struct amd_iommu *iommu,
+                                  union irte_ptr entry,
+                                  unsigned int vector, unsigned int int_type,
                                   unsigned int dest_mode, unsigned int dest)
 {
     union irte32 irte = {
@@ -212,7 +213,7 @@ static int update_intremap_entry_from_io
         lo_update = 1;
     }
 
-    entry = get_intremap_entry(iommu->seg, req_id, offset);
+    entry = get_intremap_entry(iommu, req_id, offset);
     if ( !lo_update )
     {
         /*
@@ -223,7 +224,7 @@ static int update_intremap_entry_from_io
         vector = entry.ptr32->flds.vector;
         delivery_mode = entry.ptr32->flds.int_type;
     }
-    update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
+    update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
 
     spin_unlock_irqrestore(lock, flags);
 
@@ -288,8 +289,8 @@ int __init amd_iommu_setup_ioapic_remapp
             spin_lock_irqsave(lock, flags);
             offset = alloc_intremap_entry(seg, req_id, 1);
             BUG_ON(offset >= INTREMAP_ENTRIES);
-            entry = get_intremap_entry(iommu->seg, req_id, offset);
-            update_intremap_entry(entry, vector,
+            entry = get_intremap_entry(iommu, req_id, offset);
+            update_intremap_entry(iommu, entry, vector,
                                   delivery_mode, dest_mode, dest);
             spin_unlock_irqrestore(lock, flags);
 
@@ -413,7 +414,7 @@ unsigned int amd_iommu_read_ioapic_from_
 
     idx = ioapic_id_to_index(IO_APIC_ID(apic));
     if ( idx == MAX_IO_APICS )
-        return -EINVAL;
+        return val;
 
     offset = ioapic_sbdf[idx].pin_2_idx[pin];
 
@@ -422,9 +423,13 @@ unsigned int amd_iommu_read_ioapic_from_
         u16 bdf = ioapic_sbdf[idx].bdf;
         u16 seg = ioapic_sbdf[idx].seg;
         u16 req_id = get_intremap_requestor_id(seg, bdf);
-        union irte_ptr entry = get_intremap_entry(seg, req_id, offset);
+        const struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
+        union irte_ptr entry;
 
+        if ( !iommu )
+            return val;
         ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
+        entry = get_intremap_entry(iommu, req_id, offset);
         val &= ~(INTREMAP_ENTRIES - 1);
         val |= MASK_INSR(entry.ptr32->flds.int_type,
                          IO_APIC_REDIR_DELIV_MODE_MASK);
@@ -454,7 +459,7 @@ static int update_intremap_entry_from_ms
         lock = get_intremap_lock(iommu->seg, req_id);
         spin_lock_irqsave(lock, flags);
         for ( i = 0; i < nr; ++i )
-            free_intremap_entry(iommu->seg, req_id, *remap_index + i);
+            free_intremap_entry(iommu, req_id, *remap_index + i);
         spin_unlock_irqrestore(lock, flags);
         goto done;
     }
@@ -479,8 +484,8 @@ static int update_intremap_entry_from_ms
         *remap_index = offset;
     }
 
-    entry = get_intremap_entry(iommu->seg, req_id, offset);
-    update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
+    entry = get_intremap_entry(iommu, req_id, offset);
+    update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
     spin_unlock_irqrestore(lock, flags);
 
     *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
@@ -594,12 +599,13 @@ void amd_iommu_read_msi_from_ire(
     const struct pci_dev *pdev = msi_desc->dev;
     u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
     u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+    const struct amd_iommu *iommu = _find_iommu_for_device(seg, bdf);
     union irte_ptr entry;
 
-    if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
+    if ( IS_ERR_OR_NULL(iommu) )
         return;
 
-    entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
+    entry = get_intremap_entry(iommu, get_dma_requestor_id(seg, bdf), offset);
 
     if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
     {
AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format

This is in preparation of actually enabling x2APIC mode, which requires
this wider IRTE format to be used.

A specific remark regarding the first hunk changing
amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
tables when creating new one"). Other code introduced by that change has
meanwhile disappeared or further changed, and I wonder if - rather than
adding an x2apic_enabled check to the conditional - the bypass couldn't
be deleted altogether. For now the goal is to affect the non-x2APIC
paths as little as possible.

Take the liberty and use the new "fresh" flag to suppress an unneeded
flush in update_intremap_entry_from_ioapic().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Re-base. Do away with standalone struct irte_full. Use smp_wmb().
v3: Avoid unrelated type changes in update_intremap_entry_from_ioapic().
    Drop irte_mode enum and variable. Convert INTREMAP_TABLE_ORDER into
    a static helper. Comment barrier() uses. Switch boolean bitfields to
    bool.
v2: Add cast in get_full_dest(). Re-base over changes earlier in the
    series. Don't use cmpxchg16b. Use barrier() instead of wmb().
---
Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming)
mode is unavailable in x2APIC mode, but they've confirmed this to be a
mistake on their part.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -39,12 +39,36 @@ union irte32 {
     } flds;
 };
 
+union irte128 {
+    uint64_t raw[2];
+    struct {
+        bool remap_en:1;
+        bool sup_io_pf:1;
+        unsigned int int_type:3;
+        bool rq_eoi:1;
+        bool dm:1;
+        bool guest_mode:1; /* MBZ */
+        unsigned int dest_lo:24;
+        unsigned int :32;
+        unsigned int vector:8;
+        unsigned int :24;
+        unsigned int :24;
+        unsigned int dest_hi:8;
+    } full;
+};
+
 union irte_ptr {
     void *ptr;
     union irte32 *ptr32;
+    union irte128 *ptr128;
 };
 
-#define INTREMAP_TABLE_ORDER    1
+union irte_cptr {
+    const void *ptr;
+    const union irte32 *ptr32;
+    const union irte128 *ptr128;
+} __transparent__;
+
 #define INTREMAP_LENGTH 0xB
 #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
 
@@ -57,6 +81,13 @@ unsigned int nr_ioapic_sbdf;
 
 static void dump_intremap_tables(unsigned char key);
 
+static unsigned int __init intremap_table_order(const struct amd_iommu *iommu)
+{
+    return iommu->ctrl.ga_en
+           ? get_order_from_bytes(INTREMAP_ENTRIES * sizeof(union irte128))
+           : get_order_from_bytes(INTREMAP_ENTRIES * sizeof(union irte32));
+}
+
 unsigned int ioapic_id_to_index(unsigned int apic_id)
 {
     unsigned int idx;
@@ -131,7 +162,10 @@ static union irte_ptr get_intremap_entry
 
     ASSERT(table.ptr && (index < INTREMAP_ENTRIES));
 
-    table.ptr32 += index;
+    if ( iommu->ctrl.ga_en )
+        table.ptr128 += index;
+    else
+        table.ptr32 += index;
 
     return table;
 }
@@ -141,7 +175,22 @@ static void free_intremap_entry(const st
 {
     union irte_ptr entry = get_intremap_entry(iommu, bdf, index);
 
-    ACCESS_ONCE(entry.ptr32->raw) = 0;
+    if ( iommu->ctrl.ga_en )
+    {
+        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
+        /*
+         * Low half (containing RemapEn) needs to be cleared first.  Note that
+         * strictly speaking smp_wmb() isn't enough, as conceptually it expands
+         * to just barrier() when !CONFIG_SMP.  But wmb() would be more than we
+         * need, since the IOMMU is a cache-coherent entity on the bus.  And
+         * given that we don't allow CONFIG_SMP to be turned off, the SMP
+         * variant will do.
+         */
+        smp_wmb();
+        entry.ptr128->raw[1] = 0;
+    }
+    else
+        ACCESS_ONCE(entry.ptr32->raw) = 0;
 
     __clear_bit(index, get_ivrs_mappings(iommu->seg)[bdf].intremap_inuse);
 }
@@ -151,17 +200,44 @@ static void update_intremap_entry(const
                                   unsigned int vector, unsigned int int_type,
                                   unsigned int dest_mode, unsigned int dest)
 {
-    union irte32 irte = {
-        .flds = {
-            .remap_en = true,
-            .int_type = int_type,
-            .dm = dest_mode,
-            .dest = dest,
-            .vector = vector,
-        },
-    };
+    if ( iommu->ctrl.ga_en )
+    {
+        union irte128 irte = {
+            .full = {
+                .remap_en = true,
+                .int_type = int_type,
+                .dm = dest_mode,
+                .dest_lo = dest,
+                .dest_hi = dest >> 24,
+                .vector = vector,
+            },
+        };
+
+        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
+        /*
+         * Low half, in particular RemapEn, needs to be cleared first.  See
+         * comment in free_intremap_entry() regarding the choice of barrier.
+         */
+        smp_wmb();
+        entry.ptr128->raw[1] = irte.raw[1];
+        /* High half needs to be set before low one (containing RemapEn). */
+        smp_wmb();
+        ACCESS_ONCE(entry.ptr128->raw[0]) = irte.raw[0];
+    }
+    else
+    {
+        union irte32 irte = {
+            .flds = {
+                .remap_en = true,
+                .int_type = int_type,
+                .dm = dest_mode,
+                .dest = dest,
+                .vector = vector,
+            },
+        };
 
-    ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
+        ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
+    }
 }
 
 static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
@@ -175,6 +251,11 @@ static inline void set_rte_index(struct
     rte->delivery_mode = offset >> 8;
 }
 
+static inline unsigned int get_full_dest(const union irte128 *entry)
+{
+    return entry->full.dest_lo | ((unsigned int)entry->full.dest_hi << 24);
+}
+
 static int update_intremap_entry_from_ioapic(
     int bdf,
     struct amd_iommu *iommu,
@@ -184,10 +265,11 @@ static int update_intremap_entry_from_io
 {
     unsigned long flags;
     union irte_ptr entry;
-    u8 delivery_mode, dest, vector, dest_mode;
+    uint8_t delivery_mode, vector, dest_mode;
     int req_id;
     spinlock_t *lock;
-    unsigned int offset;
+    unsigned int dest, offset;
+    bool fresh = false;
 
     req_id = get_intremap_requestor_id(iommu->seg, bdf);
     lock = get_intremap_lock(iommu->seg, req_id);
@@ -195,7 +277,7 @@ static int update_intremap_entry_from_io
     delivery_mode = rte->delivery_mode;
     vector = rte->vector;
     dest_mode = rte->dest_mode;
-    dest = rte->dest.logical.logical_dest;
+    dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest;
 
     spin_lock_irqsave(lock, flags);
 
@@ -210,25 +292,40 @@ static int update_intremap_entry_from_io
             return -ENOSPC;
         }
         *index = offset;
-        lo_update = 1;
+        fresh = true;
     }
 
     entry = get_intremap_entry(iommu, req_id, offset);
-    if ( !lo_update )
+    if ( fresh )
+        /* nothing */;
+    else if ( !lo_update )
     {
         /*
          * Low half of incoming RTE is already in remapped format,
          * so need to recover vector and delivery mode from IRTE.
          */
         ASSERT(get_rte_index(rte) == offset);
-        vector = entry.ptr32->flds.vector;
+        if ( iommu->ctrl.ga_en )
+            vector = entry.ptr128->full.vector;
+        else
+            vector = entry.ptr32->flds.vector;
+        /* The IntType fields match for both formats. */
         delivery_mode = entry.ptr32->flds.int_type;
     }
+    else if ( x2apic_enabled )
+    {
+        /*
+         * High half of incoming RTE was read from the I/O APIC and hence may
+         * not hold the full destination, so need to recover full destination
+         * from IRTE.
+         */
+        dest = get_full_dest(entry.ptr128);
+    }
     update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
 
     spin_unlock_irqrestore(lock, flags);
 
-    if ( iommu->enabled )
+    if ( iommu->enabled && !fresh )
     {
         spin_lock_irqsave(&iommu->lock, flags);
         amd_iommu_flush_intremap(iommu, req_id);
@@ -286,6 +383,18 @@ int __init amd_iommu_setup_ioapic_remapp
             dest_mode = rte.dest_mode;
             dest = rte.dest.logical.logical_dest;
 
+            if ( iommu->ctrl.xt_en )
+            {
+                /*
+                 * In x2APIC mode we have no way of discovering the high 24
+                 * bits of the destination of an already enabled interrupt.
+                 * We come here earlier than for xAPIC mode, so no interrupts
+                 * should have been set up before.
+                 */
+                AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC mode\n",
+                                IO_APIC_ID(apic), pin);
+            }
+
             spin_lock_irqsave(lock, flags);
             offset = alloc_intremap_entry(seg, req_id, 1);
             BUG_ON(offset >= INTREMAP_ENTRIES);
@@ -320,7 +429,8 @@ void amd_iommu_ioapic_update_ire(
     struct IO_APIC_route_entry new_rte = { 0 };
     unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
     unsigned int pin = (reg - 0x10) / 2;
-    int saved_mask, seg, bdf, rc;
+    int seg, bdf, rc;
+    bool saved_mask, fresh = false;
     struct amd_iommu *iommu;
     unsigned int idx;
 
@@ -362,12 +472,22 @@ void amd_iommu_ioapic_update_ire(
         *(((u32 *)&new_rte) + 1) = value;
     }
 
-    if ( new_rte.mask &&
-         ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
+    if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
     {
         ASSERT(saved_mask);
-        __io_apic_write(apic, reg, value);
-        return;
+
+        /*
+         * There's nowhere except the IRTE to store a full 32-bit destination,
+         * so we may not bypass entry allocation and updating of the low RTE
+         * half in the (usual) case of the high RTE half getting written first.
+         */
+        if ( new_rte.mask && !x2apic_enabled )
+        {
+            __io_apic_write(apic, reg, value);
+            return;
+        }
+
+        fresh = true;
     }
 
     /* mask the interrupt while we change the intremap table */
@@ -396,8 +516,12 @@ void amd_iommu_ioapic_update_ire(
     if ( reg == rte_lo )
         return;
 
-    /* unmask the interrupt after we have updated the intremap table */
-    if ( !saved_mask )
+    /*
+     * Unmask the interrupt after we have updated the intremap table. Also
+     * write the low half if a fresh entry was allocated for a high half
+     * update in x2APIC mode.
+     */
+    if ( !saved_mask || (x2apic_enabled && fresh) )
     {
         old_rte.mask = saved_mask;
         __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
@@ -411,31 +535,40 @@ unsigned int amd_iommu_read_ioapic_from_
     unsigned int offset;
     unsigned int val = __io_apic_read(apic, reg);
     unsigned int pin = (reg - 0x10) / 2;
+    uint16_t seg, bdf, req_id;
+    const struct amd_iommu *iommu;
+    union irte_ptr entry;
 
     idx = ioapic_id_to_index(IO_APIC_ID(apic));
     if ( idx == MAX_IO_APICS )
         return val;
 
     offset = ioapic_sbdf[idx].pin_2_idx[pin];
+    if ( offset >= INTREMAP_ENTRIES )
+        return val;
 
-    if ( !(reg & 1) && offset < INTREMAP_ENTRIES )
-    {
-        u16 bdf = ioapic_sbdf[idx].bdf;
-        u16 seg = ioapic_sbdf[idx].seg;
-        u16 req_id = get_intremap_requestor_id(seg, bdf);
-        const struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
-        union irte_ptr entry;
+    seg = ioapic_sbdf[idx].seg;
+    bdf = ioapic_sbdf[idx].bdf;
+    iommu = find_iommu_for_device(seg, bdf);
+    if ( !iommu )
+        return val;
+    req_id = get_intremap_requestor_id(seg, bdf);
+    entry = get_intremap_entry(iommu, req_id, offset);
 
-        if ( !iommu )
-            return val;
+    if ( !(reg & 1) )
+    {
         ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
-        entry = get_intremap_entry(iommu, req_id, offset);
         val &= ~(INTREMAP_ENTRIES - 1);
+        /* The IntType fields match for both formats. */
         val |= MASK_INSR(entry.ptr32->flds.int_type,
                          IO_APIC_REDIR_DELIV_MODE_MASK);
-        val |= MASK_INSR(entry.ptr32->flds.vector,
+        val |= MASK_INSR(iommu->ctrl.ga_en
+                         ? entry.ptr128->full.vector
+                         : entry.ptr32->flds.vector,
                          IO_APIC_REDIR_VECTOR_MASK);
     }
+    else if ( x2apic_enabled )
+        val = get_full_dest(entry.ptr128);
 
     return val;
 }
@@ -447,9 +580,9 @@ static int update_intremap_entry_from_ms
     unsigned long flags;
     union irte_ptr entry;
     u16 req_id, alias_id;
-    u8 delivery_mode, dest, vector, dest_mode;
+    uint8_t delivery_mode, vector, dest_mode;
     spinlock_t *lock;
-    unsigned int offset, i;
+    unsigned int dest, offset, i;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -470,7 +603,12 @@ static int update_intremap_entry_from_ms
     dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
-    dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
+
+    if ( x2apic_enabled )
+        dest = msg->dest32;
+    else
+        dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK);
+
     offset = *remap_index;
     if ( offset >= INTREMAP_ENTRIES )
     {
@@ -616,10 +754,21 @@ void amd_iommu_read_msi_from_ire(
     }
 
     msg->data &= ~(INTREMAP_ENTRIES - 1);
+    /* The IntType fields match for both formats. */
     msg->data |= MASK_INSR(entry.ptr32->flds.int_type,
                            MSI_DATA_DELIVERY_MODE_MASK);
-    msg->data |= MASK_INSR(entry.ptr32->flds.vector,
-                           MSI_DATA_VECTOR_MASK);
+    if ( iommu->ctrl.ga_en )
+    {
+        msg->data |= MASK_INSR(entry.ptr128->full.vector,
+                               MSI_DATA_VECTOR_MASK);
+        msg->dest32 = get_full_dest(entry.ptr128);
+    }
+    else
+    {
+        msg->data |= MASK_INSR(entry.ptr32->flds.vector,
+                               MSI_DATA_VECTOR_MASK);
+        msg->dest32 = entry.ptr32->flds.dest;
+    }
 }
 
 int __init amd_iommu_free_intremap_table(
@@ -631,7 +780,7 @@ int __init amd_iommu_free_intremap_table
 
     if ( tb )
     {
-        __free_amd_iommu_tables(tb, INTREMAP_TABLE_ORDER);
+        __free_amd_iommu_tables(tb, intremap_table_order(iommu));
         ivrs_mapping->intremap_table = NULL;
     }
 
@@ -641,10 +790,10 @@ int __init amd_iommu_free_intremap_table
 void *__init amd_iommu_alloc_intremap_table(
     const struct amd_iommu *iommu, unsigned long **inuse_map)
 {
-    void *tb;
-    tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
+    void *tb = __alloc_amd_iommu_tables(intremap_table_order(iommu));
+
     BUG_ON(tb == NULL);
-    memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+    memset(tb, 0, PAGE_SIZE << intremap_table_order(iommu));
     *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
     BUG_ON(*inuse_map == NULL);
     return tb;
@@ -685,18 +834,29 @@ int __init amd_setup_hpet_msi(struct msi
     return rc;
 }
 
-static void dump_intremap_table(const u32 *table)
+static void dump_intremap_table(const struct amd_iommu *iommu,
+                                union irte_cptr tbl)
 {
-    u32 count;
+    unsigned int count;
 
-    if ( !table )
+    if ( !tbl.ptr )
         return;
 
     for ( count = 0; count < INTREMAP_ENTRIES; count++ )
     {
-        if ( !table[count] )
-            continue;
-        printk("    IRTE[%03x] %08x\n", count, table[count]);
+        if ( iommu->ctrl.ga_en )
+        {
+            if ( !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1] )
+                continue;
+            printk("    IRTE[%03x] %016lx_%016lx\n",
+                   count, tbl.ptr128[count].raw[1], tbl.ptr128[count].raw[0]);
+        }
+        else
+        {
+            if ( !tbl.ptr32[count].raw )
+                continue;
+            printk("    IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw);
+        }
     }
 }
 
@@ -714,7 +874,7 @@ static int dump_intremap_mapping(const s
            PCI_FUNC(ivrs_mapping->dte_requestor_id));
 
     spin_lock_irqsave(&(ivrs_mapping->intremap_lock), flags);
-    dump_intremap_table(ivrs_mapping->intremap_table);
+    dump_intremap_table(iommu, ivrs_mapping->intremap_table);
     spin_unlock_irqrestore(&(ivrs_mapping->intremap_lock), flags);
 
     process_pending_softirqs();
@@ -733,6 +893,8 @@ static void dump_intremap_tables(unsigne
     printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
 
     spin_lock_irqsave(&shared_intremap_lock, flags);
-    dump_intremap_table(shared_intremap_table);
+    dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
+                                         list),
+                        shared_intremap_table);
     spin_unlock_irqrestore(&shared_intremap_lock, flags);
 }
AMD/IOMMU: split amd_iommu_init_one()

Mapping the MMIO space and obtaining feature information needs to happen
slightly earlier, such that for x2APIC support we can set XTEn prior to
calling amd_iommu_update_ivrs_mapping_acpi() and
amd_iommu_setup_ioapic_remapping().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -969,14 +969,6 @@ static void * __init allocate_ppr_log(st
 
 static int __init amd_iommu_init_one(struct amd_iommu *iommu)
 {
-    if ( map_iommu_mmio_region(iommu) != 0 )
-        goto error_out;
-
-    get_iommu_features(iommu);
-
-    if ( iommu->features.raw )
-        iommuv2_enabled = 1;
-
     if ( allocate_cmd_buffer(iommu) == NULL )
         goto error_out;
 
@@ -1201,6 +1193,23 @@ static bool_t __init amd_sp5100_erratum2
     return 0;
 }
 
+static int __init amd_iommu_prepare_one(struct amd_iommu *iommu)
+{
+    int rc = alloc_ivrs_mappings(iommu->seg);
+
+    if ( !rc )
+        rc = map_iommu_mmio_region(iommu);
+    if ( rc )
+        return rc;
+
+    get_iommu_features(iommu);
+
+    if ( iommu->features.raw )
+        iommuv2_enabled = true;
+
+    return 0;
+}
+
 int __init amd_iommu_init(void)
 {
     struct amd_iommu *iommu;
@@ -1231,7 +1240,7 @@ int __init amd_iommu_init(void)
     radix_tree_init(&ivrs_maps);
     for_each_amd_iommu ( iommu )
     {
-        rc = alloc_ivrs_mappings(iommu->seg);
+        rc = amd_iommu_prepare_one(iommu);
         if ( rc )
             goto error_out;
     }
AMD/IOMMU: allow enabling with IRQ not yet set up

Early enabling (to enter x2APIC mode) requires deferring of the IRQ
setup. Code to actually do that setup in the x2APIC case will get added
subsequently.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v3: Re-base.

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -813,7 +813,6 @@ static void amd_iommu_erratum_746_workar
 static void enable_iommu(struct amd_iommu *iommu)
 {
     unsigned long flags;
-    struct irq_desc *desc;
 
     spin_lock_irqsave(&iommu->lock, flags);
 
@@ -833,19 +832,27 @@ static void enable_iommu(struct amd_iomm
     if ( iommu->features.flds.ppr_sup )
         register_iommu_ppr_log_in_mmio_space(iommu);
 
-    desc = irq_to_desc(iommu->msi.irq);
-    spin_lock(&desc->lock);
-    set_msi_affinity(desc, NULL);
-    spin_unlock(&desc->lock);
+    if ( iommu->msi.irq > 0 )
+    {
+        struct irq_desc *desc = irq_to_desc(iommu->msi.irq);
+
+        spin_lock(&desc->lock);
+        set_msi_affinity(desc, NULL);
+        spin_unlock(&desc->lock);
+    }
 
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
 
     set_iommu_ht_flags(iommu);
     set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
-    set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
 
-    if ( iommu->features.flds.ppr_sup )
-        set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED);
+    if ( iommu->msi.irq > 0 )
+    {
+        set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
+
+        if ( iommu->features.flds.ppr_sup )
+            set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED);
+    }
 
     if ( iommu->features.flds.gt_sup )
         set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_ENABLED);
AMD/IOMMU: adjust setup of internal interrupt for x2APIC mode

In order to be able to express all possible destinations we need to make
use of this non-MSI-capability based mechanism. The new IRQ controller
structure can re-use certain MSI functions, though.

For now general and PPR interrupts still share a single vector, IRQ, and
hence handler.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v4: Re-base.
v3: Re-base.

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -472,6 +472,44 @@ static hw_irq_controller iommu_maskable_
     .set_affinity = set_msi_affinity,
 };
 
+static void set_x2apic_affinity(struct irq_desc *desc, const cpumask_t *mask)
+{
+    struct amd_iommu *iommu = desc->action->dev_id;
+    unsigned int dest = set_desc_affinity(desc, mask);
+    union amd_iommu_x2apic_control ctrl = {};
+    unsigned long flags;
+
+    if ( dest == BAD_APICID )
+        return;
+
+    msi_compose_msg(desc->arch.vector, NULL, &iommu->msi.msg);
+    iommu->msi.msg.dest32 = dest;
+
+    ctrl.dest_mode = MASK_EXTR(iommu->msi.msg.address_lo,
+                               MSI_ADDR_DESTMODE_MASK);
+    ctrl.int_type = MASK_EXTR(iommu->msi.msg.data,
+                              MSI_DATA_DELIVERY_MODE_MASK);
+    ctrl.vector = desc->arch.vector;
+    ctrl.dest_lo = dest;
+    ctrl.dest_hi = dest >> 24;
+
+    spin_lock_irqsave(&iommu->lock, flags);
+    writeq(ctrl.raw, iommu->mmio_base + IOMMU_XT_INT_CTRL_MMIO_OFFSET);
+    writeq(ctrl.raw, iommu->mmio_base + IOMMU_XT_PPR_INT_CTRL_MMIO_OFFSET);
+    spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static hw_irq_controller iommu_x2apic_type = {
+    .typename     = "IOMMU-x2APIC",
+    .startup      = irq_startup_none,
+    .shutdown     = irq_shutdown_none,
+    .enable       = irq_enable_none,
+    .disable      = irq_disable_none,
+    .ack          = ack_nonmaskable_msi_irq,
+    .end          = end_nonmaskable_msi_irq,
+    .set_affinity = set_x2apic_affinity,
+};
+
 static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
 {
     u16 domain_id, device_id, flags;
@@ -726,8 +764,6 @@ static void iommu_interrupt_handler(int
 static bool_t __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
 {
     int irq, ret;
-    hw_irq_controller *handler;
-    u16 control;
 
     irq = create_irq(NUMA_NO_NODE);
     if ( irq <= 0 )
@@ -747,19 +783,42 @@ static bool_t __init set_iommu_interrupt
                         PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
         return 0;
     }
-    control = pci_conf_read16(PCI_SBDF2(iommu->seg, iommu->bdf),
-                              iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS);
-    iommu->msi.msi.nvec = 1;
-    if ( is_mask_bit_support(control) )
-    {
-        iommu->msi.msi_attrib.maskbit = 1;
-        iommu->msi.msi.mpos = msi_mask_bits_reg(iommu->msi.msi_attrib.pos,
-                                                is_64bit_address(control));
-        handler = &iommu_maskable_msi_type;
+
+    if ( iommu->ctrl.int_cap_xt_en )
+    {
+        struct irq_desc *desc = irq_to_desc(irq);
+
+        iommu->msi.msi_attrib.pos = MSI_TYPE_IOMMU;
+        iommu->msi.msi_attrib.maskbit = 0;
+        iommu->msi.msi_attrib.is_64 = 1;
+
+        desc->msi_desc = &iommu->msi;
+        desc->handler = &iommu_x2apic_type;
+
+        ret = 0;
     }
     else
-        handler = &iommu_msi_type;
-    ret = __setup_msi_irq(irq_to_desc(irq), &iommu->msi, handler);
+    {
+        hw_irq_controller *handler;
+        u16 control;
+
+        control = pci_conf_read16(PCI_SBDF2(iommu->seg, iommu->bdf),
+                                  iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS);
+
+        iommu->msi.msi.nvec = 1;
+        if ( is_mask_bit_support(control) )
+        {
+            iommu->msi.msi_attrib.maskbit = 1;
+            iommu->msi.msi.mpos = msi_mask_bits_reg(iommu->msi.msi_attrib.pos,
+                                                    is_64bit_address(control));
+            handler = &iommu_maskable_msi_type;
+        }
+        else
+            handler = &iommu_msi_type;
+
+        ret = __setup_msi_irq(irq_to_desc(irq), &iommu->msi, handler);
+    }
+
     if ( !ret )
         ret = request_irq(irq, 0, iommu_interrupt_handler, "amd_iommu", iommu);
     if ( ret )
@@ -837,8 +896,19 @@ static void enable_iommu(struct amd_iomm
         struct irq_desc *desc = irq_to_desc(iommu->msi.irq);
 
         spin_lock(&desc->lock);
-        set_msi_affinity(desc, NULL);
-        spin_unlock(&desc->lock);
+
+        if ( iommu->ctrl.int_cap_xt_en )
+        {
+            set_x2apic_affinity(desc, NULL);
+            spin_unlock(&desc->lock);
+        }
+        else
+        {
+            set_msi_affinity(desc, NULL);
+            spin_unlock(&desc->lock);
+
+            amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
+        }
     }
 
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
@@ -878,7 +948,9 @@ static void disable_iommu(struct amd_iom
         return;
     }
 
-    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
+    if ( !iommu->ctrl.int_cap_xt_en )
+        amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
+
     set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_DISABLED);
     set_iommu_event_log_control(iommu, IOMMU_CONTROL_DISABLED);
 
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -416,6 +416,25 @@ union amd_iommu_ext_features {
     } flds;
 };
 
+/* x2APIC Control Registers */
+#define IOMMU_XT_INT_CTRL_MMIO_OFFSET		0x0170
+#define IOMMU_XT_PPR_INT_CTRL_MMIO_OFFSET	0x0178
+#define IOMMU_XT_GA_INT_CTRL_MMIO_OFFSET	0x0180
+
+union amd_iommu_x2apic_control {
+    uint64_t raw;
+    struct {
+        unsigned int :2;
+        unsigned int dest_mode:1;
+        unsigned int :5;
+        unsigned int dest_lo:24;
+        unsigned int vector:8;
+        unsigned int int_type:1; /* DM in IOMMU spec 3.04 */
+        unsigned int :15;
+        unsigned int dest_hi:8;
+    };
+};
+
 /* Status Register*/
 #define IOMMU_STATUS_MMIO_OFFSET		0x2020
 #define IOMMU_STATUS_EVENT_OVERFLOW_MASK	0x00000001
AMD/IOMMU: enable x2APIC mode when available

In order for the CPUs to use x2APIC mode, the IOMMU(s) first need to be
switched into suitable state.

The post-AP-bringup IRQ affinity adjustment is done also for the non-
x2APIC case, matching what VT-d does.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v4: Re-base.
v3: Set GAEn (and other control register bits) earlier. Also clear the
    bits enabled here in amd_iommu_init_cleanup(). Re-base. Pass NULL
    CPU mask to set_{x2apic,msi}_affinity().
v2: Drop cpu_has_cx16 check. Add comment.
---
TBD: Instead of the system_state check in iov_enable_xt() the function
     could also zap its own hook pointer, at which point it could also
     become __init. This would, however, require that either
     resume_x2apic() be bound to ignore iommu_enable_x2apic() errors
     forever, or that iommu_enable_x2apic() be slightly re-arranged to
     not return -EOPNOTSUPP when finding a NULL hook during resume.

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -833,6 +833,30 @@ static bool_t __init set_iommu_interrupt
     return 1;
 }
 
+int iov_adjust_irq_affinities(void)
+{
+    const struct amd_iommu *iommu;
+
+    if ( !iommu_enabled )
+        return 0;
+
+    for_each_amd_iommu ( iommu )
+    {
+        struct irq_desc *desc = irq_to_desc(iommu->msi.irq);
+        unsigned long flags;
+
+        spin_lock_irqsave(&desc->lock, flags);
+        if ( iommu->ctrl.int_cap_xt_en )
+            set_x2apic_affinity(desc, NULL);
+        else
+            set_msi_affinity(desc, NULL);
+        spin_unlock_irqrestore(&desc->lock, flags);
+    }
+
+    return 0;
+}
+__initcall(iov_adjust_irq_affinities);
+
 /*
  * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
  * Workaround:
@@ -1046,7 +1070,7 @@ static void * __init allocate_ppr_log(st
                                 IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log");
 }
 
-static int __init amd_iommu_init_one(struct amd_iommu *iommu)
+static int __init amd_iommu_init_one(struct amd_iommu *iommu, bool intr)
 {
     if ( allocate_cmd_buffer(iommu) == NULL )
         goto error_out;
@@ -1057,7 +1081,7 @@ static int __init amd_iommu_init_one(str
     if ( iommu->features.flds.ppr_sup && !allocate_ppr_log(iommu) )
         goto error_out;
 
-    if ( !set_iommu_interrupt_handler(iommu) )
+    if ( intr && !set_iommu_interrupt_handler(iommu) )
         goto error_out;
 
     /* To make sure that device_table.buffer has been successfully allocated */
@@ -1086,8 +1110,16 @@ static void __init amd_iommu_init_cleanu
     list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
     {
         list_del(&iommu->list);
+
+        iommu->ctrl.ga_en = 0;
+        iommu->ctrl.xt_en = 0;
+        iommu->ctrl.int_cap_xt_en = 0;
+
         if ( iommu->enabled )
             disable_iommu(iommu);
+        else if ( iommu->mmio_base )
+            writeq(iommu->ctrl.raw,
+                   iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 
         deallocate_ring_buffer(&iommu->cmd_buffer);
         deallocate_ring_buffer(&iommu->event_log);
@@ -1289,7 +1321,7 @@ static int __init amd_iommu_prepare_one(
     return 0;
 }
 
-int __init amd_iommu_init(void)
+int __init amd_iommu_prepare(bool xt)
 {
     struct amd_iommu *iommu;
     int rc = -ENODEV;
@@ -1304,9 +1336,14 @@ int __init amd_iommu_init(void)
     if ( unlikely(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) )
         goto error_out;
 
+    /* Have we been here before? */
+    if ( ivhd_type )
+        return 0;
+
     rc = amd_iommu_get_supported_ivhd_type();
     if ( rc < 0 )
         goto error_out;
+    BUG_ON(!rc);
     ivhd_type = rc;
 
     rc = amd_iommu_get_ivrs_dev_entries();
@@ -1322,9 +1359,37 @@ int __init amd_iommu_init(void)
         rc = amd_iommu_prepare_one(iommu);
         if ( rc )
             goto error_out;
+
+        rc = -ENODEV;
+        if ( xt && (!iommu->features.flds.ga_sup || !iommu->features.flds.xt_sup) )
+            goto error_out;
+    }
+
+    for_each_amd_iommu ( iommu )
+    {
+        /* NB: There's no need to actually write these out right here. */
+        iommu->ctrl.ga_en |= xt;
+        iommu->ctrl.xt_en = xt;
+        iommu->ctrl.int_cap_xt_en = xt;
     }
 
     rc = amd_iommu_update_ivrs_mapping_acpi();
+
+ error_out:
+    if ( rc )
+    {
+        amd_iommu_init_cleanup();
+        ivhd_type = 0;
+    }
+
+    return rc;
+}
+
+int __init amd_iommu_init(bool xt)
+{
+    struct amd_iommu *iommu;
+    int rc = amd_iommu_prepare(xt);
+
     if ( rc )
         goto error_out;
 
@@ -1350,7 +1415,12 @@ int __init amd_iommu_init(void)
     /* per iommu initialization  */
     for_each_amd_iommu ( iommu )
     {
-        rc = amd_iommu_init_one(iommu);
+        /*
+         * Setting up of the IOMMU interrupts cannot occur yet at the (very
+         * early) time we get here when enabling x2APIC mode. Suppress it
+         * here, and do it explicitly in amd_iommu_init_interrupt().
+         */
+        rc = amd_iommu_init_one(iommu, !xt);
         if ( rc )
             goto error_out;
     }
@@ -1362,6 +1432,40 @@ error_out:
     return rc;
 }
 
+int __init amd_iommu_init_interrupt(void)
+{
+    struct amd_iommu *iommu;
+    int rc = 0;
+
+    for_each_amd_iommu ( iommu )
+    {
+        struct irq_desc *desc;
+
+        if ( !set_iommu_interrupt_handler(iommu) )
+        {
+            rc = -EIO;
+            break;
+        }
+
+        desc = irq_to_desc(iommu->msi.irq);
+
+        spin_lock(&desc->lock);
+        ASSERT(iommu->ctrl.int_cap_xt_en);
+        set_x2apic_affinity(desc, &cpu_online_map);
+        spin_unlock(&desc->lock);
+
+        set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
+
+        if ( iommu->features.flds.ppr_sup )
+            set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED);
+    }
+
+    if ( rc )
+        amd_iommu_init_cleanup();
+
+    return rc;
+}
+
 static void invalidate_all_domain_pages(void)
 {
     struct domain *d;
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -799,6 +799,35 @@ void *__init amd_iommu_alloc_intremap_ta
     return tb;
 }
 
+bool __init iov_supports_xt(void)
+{
+    unsigned int apic;
+
+    if ( !iommu_enable || !iommu_intremap )
+        return false;
+
+    if ( amd_iommu_prepare(true) )
+        return false;
+
+    for ( apic = 0; apic < nr_ioapics; apic++ )
+    {
+        unsigned int idx = ioapic_id_to_index(IO_APIC_ID(apic));
+
+        if ( idx == MAX_IO_APICS )
+            return false;
+
+        if ( !find_iommu_for_device(ioapic_sbdf[idx].seg,
+                                    ioapic_sbdf[idx].bdf) )
+        {
+            AMD_IOMMU_DEBUG("No IOMMU for IO-APIC %#x (ID %x)\n",
+                            apic, IO_APIC_ID(apic));
+            return false;
+        }
+    }
+
+    return true;
+}
+
 int __init amd_setup_hpet_msi(struct msi_desc *msi_desc)
 {
     spinlock_t *lock;
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -170,7 +170,8 @@ static int __init iov_detect(void)
     if ( !iommu_enable && !iommu_intremap )
         return 0;
 
-    if ( amd_iommu_init() != 0 )
+    else if ( (init_done ? amd_iommu_init_interrupt()
+                         : amd_iommu_init(false)) != 0 )
     {
         printk("AMD-Vi: Error initialization\n");
         return -ENODEV;
@@ -184,6 +185,25 @@ static int __init iov_detect(void)
     return 0;
 }
 
+static int iov_enable_xt(void)
+{
+    int rc;
+
+    if ( system_state >= SYS_STATE_active )
+        return 0;
+
+    if ( (rc = amd_iommu_init(true)) != 0 )
+    {
+        printk("AMD-Vi: Error %d initializing for x2APIC mode\n", rc);
+        /* -ENXIO has special meaning to the caller - convert it. */
+        return rc != -ENXIO ? rc : -ENODATA;
+    }
+
+    init_done = true;
+
+    return 0;
+}
+
 int amd_iommu_alloc_root(struct domain_iommu *hd)
 {
     if ( unlikely(!hd->arch.root_table) )
@@ -557,11 +577,13 @@ static const struct iommu_ops __initcons
     .free_page_table = deallocate_page_table,
     .reassign_device = reassign_device,
     .get_device_group_id = amd_iommu_group_id,
+    .enable_x2apic = iov_enable_xt,
     .update_ire_from_apic = amd_iommu_ioapic_update_ire,
     .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
     .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
     .read_msi_from_ire = amd_iommu_read_msi_from_ire,
     .setup_hpet_msi = amd_setup_hpet_msi,
+    .adjust_irq_affinities = iov_adjust_irq_affinities,
     .suspend = amd_iommu_suspend,
     .resume = amd_iommu_resume,
     .crash_shutdown = amd_iommu_crash_shutdown,
@@ -571,4 +593,5 @@ static const struct iommu_ops __initcons
 static const struct iommu_init_ops __initconstrel _iommu_init_ops = {
     .ops = &_iommu_ops,
     .setup = iov_detect,
+    .supports_x2apic = iov_supports_xt,
 };
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -48,8 +48,11 @@ int amd_iommu_detect_acpi(void);
 void get_iommu_features(struct amd_iommu *iommu);
 
 /* amd-iommu-init functions */
-int amd_iommu_init(void);
+int amd_iommu_prepare(bool xt);
+int amd_iommu_init(bool xt);
+int amd_iommu_init_interrupt(void);
 int amd_iommu_update_ivrs_mapping_acpi(void);
+int iov_adjust_irq_affinities(void);
 
 /* mapping functions */
 int __must_check amd_iommu_map_page(struct domain *d, dfn_t dfn,
@@ -93,6 +96,7 @@ void amd_iommu_flush_all_caches(struct a
 struct amd_iommu *find_iommu_for_device(int seg, int bdf);
 
 /* interrupt remapping */
+bool iov_supports_xt(void);
 int amd_iommu_setup_ioapic_remapping(void);
 void *amd_iommu_alloc_intremap_table(
     const struct amd_iommu *, unsigned long **);
AMD/IOMMU: correct IRTE updating

Flushing didn't get done along the lines of what the specification says.
Mark entries to be updated as not remapped (which will result in
interrupt requests to get target aborted, but the interrupts should be
masked anyway at that point in time), issue the flush, and only then
write the new entry.

In update_intremap_entry_from_msi_msg() also fold the duplicate initial
lock determination and acquire into just a single instance.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
RFC: Putting the flush invocations in loops isn't overly nice, but I
     don't think this can really be abused, since callers up the stack
     hold further locks. Nevertheless I'd like to ask for better
     suggestions.
---
v4: Re-base.
v3: Remove stale parts of description. Re-base.
v2: Parts morphed into earlier patch.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -213,15 +213,13 @@ static void update_intremap_entry(const
             },
         };
 
-        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
+        ASSERT(!entry.ptr128->full.remap_en);
+        entry.ptr128->raw[1] = irte.raw[1];
         /*
-         * Low half, in particular RemapEn, needs to be cleared first.  See
+         * High half needs to be set before low one (containing RemapEn).  See
          * comment in free_intremap_entry() regarding the choice of barrier.
          */
         smp_wmb();
-        entry.ptr128->raw[1] = irte.raw[1];
-        /* High half needs to be set before low one (containing RemapEn). */
-        smp_wmb();
         ACCESS_ONCE(entry.ptr128->raw[0]) = irte.raw[0];
     }
     else
@@ -296,6 +294,20 @@ static int update_intremap_entry_from_io
     }
 
     entry = get_intremap_entry(iommu, req_id, offset);
+
+    /* The RemapEn fields match for all formats. */
+    while ( iommu->enabled && entry.ptr32->flds.remap_en )
+    {
+        entry.ptr32->flds.remap_en = false;
+        spin_unlock(lock);
+
+        spin_lock(&iommu->lock);
+        amd_iommu_flush_intremap(iommu, req_id);
+        spin_unlock(&iommu->lock);
+
+        spin_lock(lock);
+    }
+
     if ( fresh )
         /* nothing */;
     else if ( !lo_update )
@@ -325,13 +337,6 @@ static int update_intremap_entry_from_io
 
     spin_unlock_irqrestore(lock, flags);
 
-    if ( iommu->enabled && !fresh )
-    {
-        spin_lock_irqsave(&iommu->lock, flags);
-        amd_iommu_flush_intremap(iommu, req_id);
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
     set_rte_index(rte, offset);
 
     return 0;
@@ -587,19 +592,27 @@ static int update_intremap_entry_from_ms
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
 
+    lock = get_intremap_lock(iommu->seg, req_id);
+    spin_lock_irqsave(lock, flags);
+
     if ( msg == NULL )
     {
-        lock = get_intremap_lock(iommu->seg, req_id);
-        spin_lock_irqsave(lock, flags);
         for ( i = 0; i < nr; ++i )
             free_intremap_entry(iommu, req_id, *remap_index + i);
         spin_unlock_irqrestore(lock, flags);
-        goto done;
-    }
 
-    lock = get_intremap_lock(iommu->seg, req_id);
+        if ( iommu->enabled )
+        {
+            spin_lock_irqsave(&iommu->lock, flags);
+            amd_iommu_flush_intremap(iommu, req_id);
+            if ( alias_id != req_id )
+                amd_iommu_flush_intremap(iommu, alias_id);
+            spin_unlock_irqrestore(&iommu->lock, flags);
+        }
+
+        return 0;
+    }
 
-    spin_lock_irqsave(lock, flags);
     dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
@@ -623,6 +636,22 @@ static int update_intremap_entry_from_ms
     }
 
     entry = get_intremap_entry(iommu, req_id, offset);
+
+    /* The RemapEn fields match for all formats. */
+    while ( iommu->enabled && entry.ptr32->flds.remap_en )
+    {
+        entry.ptr32->flds.remap_en = false;
+        spin_unlock(lock);
+
+        spin_lock(&iommu->lock);
+        amd_iommu_flush_intremap(iommu, req_id);
+        if ( alias_id != req_id )
+            amd_iommu_flush_intremap(iommu, alias_id);
+        spin_unlock(&iommu->lock);
+
+        spin_lock(lock);
+    }
+
     update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
     spin_unlock_irqrestore(lock, flags);
 
@@ -642,16 +671,6 @@ static int update_intremap_entry_from_ms
                get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
     }
 
-done:
-    if ( iommu->enabled )
-    {
-        spin_lock_irqsave(&iommu->lock, flags);
-        amd_iommu_flush_intremap(iommu, req_id);
-        if ( alias_id != req_id )
-            amd_iommu_flush_intremap(iommu, alias_id);
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
     return 0;
 }
 
AMD/IOMMU: don't needlessly log headers when dumping IRTs

Log SBDF headers only when there are actual IRTEs to log. This is
particularly important for the total volume of output when the ACPI
tables describe far more than just the existing devices. On my Rome
system so far there was one line for every function of every device on
all 256 buses of segment 0, with extremely few exceptions (like the
IOMMUs themselves).

Also only log one of the "per-device" or "shared" overall headers.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: New.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -883,7 +883,8 @@ int __init amd_setup_hpet_msi(struct msi
 }
 
 static void dump_intremap_table(const struct amd_iommu *iommu,
-                                union irte_cptr tbl)
+                                union irte_cptr tbl,
+                                const struct ivrs_mappings *ivrs_mapping)
 {
     unsigned int count;
 
@@ -892,19 +893,25 @@ static void dump_intremap_table(const st
 
     for ( count = 0; count < INTREMAP_ENTRIES; count++ )
     {
-        if ( iommu->ctrl.ga_en )
-        {
-            if ( !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1] )
+        if ( iommu->ctrl.ga_en
+             ? !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1]
+             : !tbl.ptr32[count].raw )
                 continue;
+
+        if ( ivrs_mapping )
+        {
+            printk("  %04x:%02x:%02x:%u:\n", iommu->seg,
+                   PCI_BUS(ivrs_mapping->dte_requestor_id),
+                   PCI_SLOT(ivrs_mapping->dte_requestor_id),
+                   PCI_FUNC(ivrs_mapping->dte_requestor_id));
+            ivrs_mapping = NULL;
+        }
+
+        if ( iommu->ctrl.ga_en )
             printk("    IRTE[%03x] %016lx_%016lx\n",
                    count, tbl.ptr128[count].raw[1], tbl.ptr128[count].raw[0]);
-        }
         else
-        {
-            if ( !tbl.ptr32[count].raw )
-                continue;
             printk("    IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw);
-        }
     }
 }
 
@@ -916,13 +923,8 @@ static int dump_intremap_mapping(const s
     if ( !ivrs_mapping )
         return 0;
 
-    printk("  %04x:%02x:%02x:%u:\n", iommu->seg,
-           PCI_BUS(ivrs_mapping->dte_requestor_id),
-           PCI_SLOT(ivrs_mapping->dte_requestor_id),
-           PCI_FUNC(ivrs_mapping->dte_requestor_id));
-
     spin_lock_irqsave(&(ivrs_mapping->intremap_lock), flags);
-    dump_intremap_table(iommu, ivrs_mapping->intremap_table);
+    dump_intremap_table(iommu, ivrs_mapping->intremap_table, ivrs_mapping);
     spin_unlock_irqrestore(&(ivrs_mapping->intremap_lock), flags);
 
     process_pending_softirqs();
@@ -932,17 +934,22 @@ static int dump_intremap_mapping(const s
 
 static void dump_intremap_tables(unsigned char key)
 {
-    unsigned long flags;
-
-    printk("--- Dumping Per-dev IOMMU Interrupt Remapping Table ---\n");
+    if ( !shared_intremap_table )
+    {
+        printk("--- Dumping Per-dev IOMMU Interrupt Remapping Table ---\n");
 
-    iterate_ivrs_entries(dump_intremap_mapping);
+        iterate_ivrs_entries(dump_intremap_mapping);
+    }
+    else
+    {
+        unsigned long flags;
 
-    printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
+        printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
 
-    spin_lock_irqsave(&shared_intremap_lock, flags);
-    dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
-                                         list),
-                        shared_intremap_table);
-    spin_unlock_irqrestore(&shared_intremap_lock, flags);
+        spin_lock_irqsave(&shared_intremap_lock, flags);
+        dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
+                                             list),
+                            shared_intremap_table, NULL);
+        spin_unlock_irqrestore(&shared_intremap_lock, flags);
+    }
 }
AMD/IOMMU: miscellaneous DTE handling adjustments

First and foremost switch boolean fields to bool. Adjust a few related
function parameters as well. Then
- in amd_iommu_set_intremap_table() don't use literal numbers,
- in iommu_dte_add_device_entry() use a compound literal instead of many
  assignments,
- in amd_iommu_setup_domain_device()
  - eliminate a pointless local variable,
  - use || instead of && when deciding whether to clear an entry,
  - clear the I field without any checking of ATS / IOTLB state,
- leave reserved fields unnamed.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: New.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -69,8 +69,7 @@ union irte_cptr {
     const union irte128 *ptr128;
 } __transparent__;
 
-#define INTREMAP_LENGTH 0xB
-#define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
+#define INTREMAP_ENTRIES (1 << IOMMU_INTREMAP_LENGTH)
 
 struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
 struct hpet_sbdf hpet_sbdf;
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -101,51 +101,52 @@ static unsigned int set_iommu_pte_presen
 
 void amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
                                    uint64_t root_ptr, uint16_t domain_id,
-                                   uint8_t paging_mode, uint8_t valid)
+                                   uint8_t paging_mode, bool valid)
 {
     dte->domain_id = domain_id;
     dte->pt_root = paddr_to_pfn(root_ptr);
-    dte->iw = 1;
-    dte->ir = 1;
+    dte->iw = true;
+    dte->ir = true;
     dte->paging_mode = paging_mode;
-    dte->tv = 1;
+    dte->tv = true;
     dte->v = valid;
 }
 
 void __init amd_iommu_set_intremap_table(
-    struct amd_iommu_dte *dte, uint64_t intremap_ptr, uint8_t int_valid)
+    struct amd_iommu_dte *dte, uint64_t intremap_ptr, bool valid)
 {
     dte->it_root = intremap_ptr >> 6;
-    dte->int_tab_len = 0xb; /* 2048 entries */
-    dte->int_ctl = 2; /* fixed and arbitrated interrupts remapped */
-    dte->ig = 0; /* unmapped interrupt results io page faults */
-    dte->iv = int_valid;
+    dte->int_tab_len = IOMMU_INTREMAP_LENGTH;
+    dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED;
+    dte->ig = false; /* unmapped interrupts result in i/o page faults */
+    dte->iv = valid;
 }
 
 void __init iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
-                                       struct ivrs_mappings *ivrs_dev)
+                                       const struct ivrs_mappings *ivrs_dev)
 {
     uint8_t flags = ivrs_dev->device_flags;
 
-    memset(dte, 0, sizeof(*dte));
-
-    dte->init_pass = MASK_EXTR(flags, ACPI_IVHD_INIT_PASS);
-    dte->ext_int_pass = MASK_EXTR(flags, ACPI_IVHD_EINT_PASS);
-    dte->nmi_pass = MASK_EXTR(flags, ACPI_IVHD_NMI_PASS);
-    dte->lint0_pass = MASK_EXTR(flags, ACPI_IVHD_LINT0_PASS);
-    dte->lint1_pass = MASK_EXTR(flags, ACPI_IVHD_LINT1_PASS);
-    dte->sys_mgt = MASK_EXTR(flags, ACPI_IVHD_SYSTEM_MGMT);
-    dte->ex = ivrs_dev->dte_allow_exclusion;
+    *dte = (struct amd_iommu_dte){
+        .init_pass = flags & ACPI_IVHD_INIT_PASS,
+        .ext_int_pass = flags & ACPI_IVHD_EINT_PASS,
+        .nmi_pass = flags & ACPI_IVHD_NMI_PASS,
+        .lint0_pass = flags & ACPI_IVHD_LINT0_PASS,
+        .lint1_pass = flags & ACPI_IVHD_LINT1_PASS,
+        .ioctl = IOMMU_DEV_TABLE_IO_CONTROL_ABORTED,
+        .sys_mgt = MASK_EXTR(flags, ACPI_IVHD_SYSTEM_MGMT),
+        .ex = ivrs_dev->dte_allow_exclusion,
+    };
 }
 
 void iommu_dte_set_guest_cr3(struct amd_iommu_dte *dte, uint16_t dom_id,
-                             uint64_t gcr3_mfn, uint8_t gv, uint8_t glx)
+                             uint64_t gcr3_mfn, bool gv, uint8_t glx)
 {
 #define GCR3_MASK(hi, lo) (((1ul << ((hi) + 1)) - 1) & ~((1ul << (lo)) - 1))
 #define GCR3_SHIFT(lo) ((lo) - PAGE_SHIFT)
 
     /* I bit must be set when gcr3 is enabled */
-    dte->i = 1;
+    dte->i = true;
 
     dte->gcr3_trp_14_12 = (gcr3_mfn & GCR3_MASK(14, 12)) >> GCR3_SHIFT(12);
     dte->gcr3_trp_30_15 = (gcr3_mfn & GCR3_MASK(30, 15)) >> GCR3_SHIFT(15);
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -93,7 +93,6 @@ static void amd_iommu_setup_domain_devic
     struct amd_iommu_dte *table, *dte;
     unsigned long flags;
     int req_id, valid = 1;
-    int dte_i = 0;
     u8 bus = pdev->bus;
     const struct domain_iommu *hd = dom_iommu(domain);
 
@@ -103,9 +102,6 @@ static void amd_iommu_setup_domain_devic
     if ( iommu_hwdom_passthrough && is_hardware_domain(domain) )
         valid = 0;
 
-    if ( ats_enabled )
-        dte_i = 1;
-
     /* get device-table entry */
     req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
     table = iommu->dev_table.buffer;
@@ -122,7 +118,7 @@ static void amd_iommu_setup_domain_devic
 
         if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
              iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
-            dte->i = dte_i;
+            dte->i = ats_enabled;
 
         amd_iommu_flush_device(iommu, req_id);
 
@@ -288,14 +284,11 @@ void amd_iommu_disable_domain_device(str
     dte = &table[req_id];
 
     spin_lock_irqsave(&iommu->lock, flags);
-    if ( dte->tv && dte->v )
+    if ( dte->tv || dte->v )
     {
-        dte->tv = 0;
-        dte->v = 0;
-
-        if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
-             iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
-            dte->i = 0;
+        dte->tv = false;
+        dte->v = false;
+        dte->i = false;
 
         amd_iommu_flush_device(iommu, req_id);
 
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -107,57 +107,60 @@
 #define IOMMU_DEV_TABLE_INT_CONTROL_FORWARDED	0x1
 #define IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED	0x2
 
+/* For now we always allocate maximum possible interrupt remapping tables. */
+#define IOMMU_INTREMAP_LENGTH			0xB
+
 struct amd_iommu_dte {
     /* 0 - 63 */
-    uint64_t v:1;
-    uint64_t tv:1;
-    uint64_t reserved0:5;
-    uint64_t had:2;
-    uint64_t paging_mode:3;
+    bool v:1;
+    bool tv:1;
+    unsigned int :5;
+    unsigned int had:2;
+    unsigned int paging_mode:3;
     uint64_t pt_root:40;
-    uint64_t ppr:1;
-    uint64_t gprp:1;
-    uint64_t giov:1;
-    uint64_t gv:1;
-    uint64_t glx:2;
-    uint64_t gcr3_trp_14_12:3;
-    uint64_t ir:1;
-    uint64_t iw:1;
-    uint64_t reserved1:1;
+    bool ppr:1;
+    bool gprp:1;
+    bool giov:1;
+    bool gv:1;
+    unsigned int glx:2;
+    unsigned int gcr3_trp_14_12:3;
+    bool ir:1;
+    bool iw:1;
+    unsigned int :1;
 
     /* 64 - 127 */
-    uint64_t domain_id:16;
-    uint64_t gcr3_trp_30_15:16;
-    uint64_t i:1;
-    uint64_t se:1;
-    uint64_t sa:1;
-    uint64_t ioctl:2;
-    uint64_t cache:1;
-    uint64_t sd:1;
-    uint64_t ex:1;
-    uint64_t sys_mgt:2;
-    uint64_t reserved2:1;
-    uint64_t gcr3_trp_51_31:21;
+    unsigned int domain_id:16;
+    unsigned int gcr3_trp_30_15:16;
+    bool i:1;
+    bool se:1;
+    bool sa:1;
+    unsigned int ioctl:2;
+    bool cache:1;
+    bool sd:1;
+    bool ex:1;
+    unsigned int sys_mgt:2;
+    unsigned int :1;
+    unsigned int gcr3_trp_51_31:21;
 
     /* 128 - 191 */
-    uint64_t iv:1;
-    uint64_t int_tab_len:4;
-    uint64_t ig:1;
+    bool iv:1;
+    unsigned int int_tab_len:4;
+    bool ig:1;
     uint64_t it_root:46;
-    uint64_t reserved3:4;
-    uint64_t init_pass:1;
-    uint64_t ext_int_pass:1;
-    uint64_t nmi_pass:1;
-    uint64_t reserved4:1;
-    uint64_t int_ctl:2;
-    uint64_t lint0_pass:1;
-    uint64_t lint1_pass:1;
+    unsigned int :4;
+    bool init_pass:1;
+    bool ext_int_pass:1;
+    bool nmi_pass:1;
+    unsigned int :1;
+    unsigned int int_ctl:2;
+    bool lint0_pass:1;
+    bool lint1_pass:1;
 
     /* 192 - 255 */
-    uint64_t reserved5:54;
-    uint64_t attr_v:1;
-    uint64_t mode0_fc:1;
-    uint64_t snoop_attr:8;
+    uint64_t :54;
+    bool attr_v:1;
+    bool mode0_fc:1;
+    unsigned int snoop_attr:8;
 };
 
 /* Command Buffer */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -73,14 +73,14 @@ int __must_check amd_iommu_flush_iotlb_a
 int get_dma_requestor_id(uint16_t seg, uint16_t bdf);
 void amd_iommu_set_intremap_table(struct amd_iommu_dte *dte,
                                   uint64_t intremap_ptr,
-                                  uint8_t int_valid);
+                                  bool valid);
 void amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
 				   uint64_t root_ptr, uint16_t domain_id,
-				   uint8_t paging_mode, uint8_t valid);
+				   uint8_t paging_mode, bool valid);
 void iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
-                                struct ivrs_mappings *ivrs_dev);
+                                const struct ivrs_mappings *ivrs_dev);
 void iommu_dte_set_guest_cr3(struct amd_iommu_dte *dte, uint16_t dom_id,
-                             uint64_t gcr3_mfn, uint8_t gv, uint8_t glx);
+                             uint64_t gcr3_mfn, bool gv, uint8_t glx);
 
 /* send cmd to iommu */
 void amd_iommu_flush_all_pages(struct domain *d);
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 01/12] AMD/IOMMU: use bit field for extended feature register
Posted by Jan Beulich 4 years, 8 months ago
This also takes care of several of the shift values wrongly having been
specified as hex rather than dec.

Take the opportunity and
- replace a readl() pair by a single readq(),
- add further fields.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v4: Drop stray/leftover #undef.
v3: Another attempt at deriving masks from bitfields, hopefully better
     liked by clang (mine was fine even with the v2 variant).
v2: Correct sats_sup position and name. Re-base over new earlier patch.

--- a/xen/drivers/passthrough/amd/iommu_detect.c
+++ b/xen/drivers/passthrough/amd/iommu_detect.c
@@ -60,49 +60,76 @@ static int __init get_iommu_capabilities
  
  void __init get_iommu_features(struct amd_iommu *iommu)
  {
-    u32 low, high;
-    int i = 0 ;
      const struct amd_iommu *first;
-    static const char *__initdata feature_str[] = {
-        "- Prefetch Pages Command",
-        "- Peripheral Page Service Request",
-        "- X2APIC Supported",
-        "- NX bit Supported",
-        "- Guest Translation",
-        "- Reserved bit [5]",
-        "- Invalidate All Command",
-        "- Guest APIC supported",
-        "- Hardware Error Registers",
-        "- Performance Counters",
-        NULL
-    };
-
      ASSERT( iommu->mmio_base );
  
      if ( !iommu_has_cap(iommu, PCI_CAP_EFRSUP_SHIFT) )
      {
-        iommu->features = 0;
+        iommu->features.raw = 0;
          return;
      }
  
-    low = readl(iommu->mmio_base + IOMMU_EXT_FEATURE_MMIO_OFFSET);
-    high = readl(iommu->mmio_base + IOMMU_EXT_FEATURE_MMIO_OFFSET + 4);
-
-    iommu->features = ((u64)high << 32) | low;
+    iommu->features.raw =
+        readq(iommu->mmio_base + IOMMU_EXT_FEATURE_MMIO_OFFSET);
  
      /* Don't log the same set of features over and over. */
      first = list_first_entry(&amd_iommu_head, struct amd_iommu, list);
-    if ( iommu != first && iommu->features == first->features )
+    if ( iommu != first && iommu->features.raw == first->features.raw )
          return;
  
      printk("AMD-Vi: IOMMU Extended Features:\n");
  
-    while ( feature_str[i] )
+#define FEAT(fld, str) do {                                    \
+    if ( --((union amd_iommu_ext_features){}).flds.fld > 1 )   \
+        printk( "- " str ": %#x\n", iommu->features.flds.fld); \
+    else if ( iommu->features.flds.fld )                       \
+        printk( "- " str "\n");                                \
+} while ( false )
+
+    FEAT(pref_sup,           "Prefetch Pages Command");
+    FEAT(ppr_sup,            "Peripheral Page Service Request");
+    FEAT(xt_sup,             "x2APIC");
+    FEAT(nx_sup,             "NX bit");
+    FEAT(gappi_sup,          "Guest APIC Physical Processor Interrupt");
+    FEAT(ia_sup,             "Invalidate All Command");
+    FEAT(ga_sup,             "Guest APIC");
+    FEAT(he_sup,             "Hardware Error Registers");
+    FEAT(pc_sup,             "Performance Counters");
+    FEAT(hats,               "Host Address Translation Size");
+
+    if ( iommu->features.flds.gt_sup )
      {
-        if ( amd_iommu_has_feature(iommu, i) )
-            printk( " %s\n", feature_str[i]);
-        i++;
+        FEAT(gats,           "Guest Address Translation Size");
+        FEAT(glx_sup,        "Guest CR3 Root Table Level");
+        FEAT(pas_max,        "Maximum PASID");
      }
+
+    FEAT(smif_sup,           "SMI Filter Register");
+    FEAT(smif_rc,            "SMI Filter Register Count");
+    FEAT(gam_sup,            "Guest Virtual APIC Modes");
+    FEAT(dual_ppr_log_sup,   "Dual PPR Log");
+    FEAT(dual_event_log_sup, "Dual Event Log");
+    FEAT(sats_sup,           "Secure ATS");
+    FEAT(us_sup,             "User / Supervisor Page Protection");
+    FEAT(dev_tbl_seg_sup,    "Device Table Segmentation");
+    FEAT(ppr_early_of_sup,   "PPR Log Overflow Early Warning");
+    FEAT(ppr_auto_rsp_sup,   "PPR Automatic Response");
+    FEAT(marc_sup,           "Memory Access Routing and Control");
+    FEAT(blk_stop_mrk_sup,   "Block StopMark Message");
+    FEAT(perf_opt_sup ,      "Performance Optimization");
+    FEAT(msi_cap_mmio_sup,   "MSI Capability MMIO Access");
+    FEAT(gio_sup,            "Guest I/O Protection");
+    FEAT(ha_sup,             "Host Access");
+    FEAT(eph_sup,            "Enhanced PPR Handling");
+    FEAT(attr_fw_sup,        "Attribute Forward");
+    FEAT(hd_sup,             "Host Dirty");
+    FEAT(inv_iotlb_type_sup, "Invalidate IOTLB Type");
+    FEAT(viommu_sup,         "Virtualized IOMMU");
+    FEAT(vm_guard_io_sup,    "VMGuard I/O Support");
+    FEAT(vm_table_size,      "VM Table Size");
+    FEAT(ga_update_dis_sup,  "Guest Access Bit Update Disable");
+
+#undef FEAT
  }
  
  int __init amd_iommu_detect_one_acpi(
--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -638,7 +638,7 @@ static uint64_t iommu_mmio_read64(struct
          val = reg_to_u64(iommu->reg_status);
          break;
      case IOMMU_EXT_FEATURE_MMIO_OFFSET:
-        val = reg_to_u64(iommu->reg_ext_feature);
+        val = iommu->reg_ext_feature.raw;
          break;
  
      default:
@@ -802,39 +802,26 @@ int guest_iommu_set_base(struct domain *
  /* Initialize mmio read only bits */
  static void guest_iommu_reg_init(struct guest_iommu *iommu)
  {
-    uint32_t lower, upper;
+    union amd_iommu_ext_features ef = {
+        /* Support prefetch */
+        .flds.pref_sup = 1,
+        /* Support PPR log */
+        .flds.ppr_sup = 1,
+        /* Support guest translation */
+        .flds.gt_sup = 1,
+        /* Support invalidate all command */
+        .flds.ia_sup = 1,
+        /* Host translation size has 6 levels */
+        .flds.hats = HOST_ADDRESS_SIZE_6_LEVEL,
+        /* Guest translation size has 6 levels */
+        .flds.gats = GUEST_ADDRESS_SIZE_6_LEVEL,
+        /* Single level gCR3 */
+        .flds.glx_sup = GUEST_CR3_1_LEVEL,
+        /* 9 bit PASID */
+        .flds.pas_max = PASMAX_9_bit,
+    };
  
-    lower = upper = 0;
-    /* Support prefetch */
-    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PREFSUP_SHIFT);
-    /* Support PPR log */
-    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PPRSUP_SHIFT);
-    /* Support guest translation */
-    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_GTSUP_SHIFT);
-    /* Support invalidate all command */
-    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_IASUP_SHIFT);
-
-    /* Host translation size has 6 levels */
-    set_field_in_reg_u32(HOST_ADDRESS_SIZE_6_LEVEL, lower,
-                         IOMMU_EXT_FEATURE_HATS_MASK,
-                         IOMMU_EXT_FEATURE_HATS_SHIFT,
-                         &lower);
-    /* Guest translation size has 6 levels */
-    set_field_in_reg_u32(GUEST_ADDRESS_SIZE_6_LEVEL, lower,
-                         IOMMU_EXT_FEATURE_GATS_MASK,
-                         IOMMU_EXT_FEATURE_GATS_SHIFT,
-                         &lower);
-    /* Single level gCR3 */
-    set_field_in_reg_u32(GUEST_CR3_1_LEVEL, lower,
-                         IOMMU_EXT_FEATURE_GLXSUP_MASK,
-                         IOMMU_EXT_FEATURE_GLXSUP_SHIFT, &lower);
-    /* 9 bit PASID */
-    set_field_in_reg_u32(PASMAX_9_bit, upper,
-                         IOMMU_EXT_FEATURE_PASMAX_MASK,
-                         IOMMU_EXT_FEATURE_PASMAX_SHIFT, &upper);
-
-    iommu->reg_ext_feature.lo = lower;
-    iommu->reg_ext_feature.hi = upper;
+    iommu->reg_ext_feature = ef;
  }
  
  static int guest_iommu_mmio_range(struct vcpu *v, unsigned long addr)
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -882,7 +882,7 @@ static void enable_iommu(struct amd_iomm
      register_iommu_event_log_in_mmio_space(iommu);
      register_iommu_exclusion_range(iommu);
  
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
+    if ( iommu->features.flds.ppr_sup )
          register_iommu_ppr_log_in_mmio_space(iommu);
  
      desc = irq_to_desc(iommu->msi.irq);
@@ -896,15 +896,15 @@ static void enable_iommu(struct amd_iomm
      set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
      set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
  
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
+    if ( iommu->features.flds.ppr_sup )
          set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED);
  
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) )
+    if ( iommu->features.flds.gt_sup )
          set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_ENABLED);
  
      set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
  
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) )
+    if ( iommu->features.flds.ia_sup )
          amd_iommu_flush_all_caches(iommu);
  
      iommu->enabled = 1;
@@ -927,10 +927,10 @@ static void disable_iommu(struct amd_iom
      set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_DISABLED);
      set_iommu_event_log_control(iommu, IOMMU_CONTROL_DISABLED);
  
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
+    if ( iommu->features.flds.ppr_sup )
          set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_DISABLED);
  
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) )
+    if ( iommu->features.flds.gt_sup )
          set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_DISABLED);
  
      set_iommu_translation_control(iommu, IOMMU_CONTROL_DISABLED);
@@ -1026,7 +1026,7 @@ static int __init amd_iommu_init_one(str
  
      get_iommu_features(iommu);
  
-    if ( iommu->features )
+    if ( iommu->features.raw )
          iommuv2_enabled = 1;
  
      if ( allocate_cmd_buffer(iommu) == NULL )
@@ -1035,9 +1035,8 @@ static int __init amd_iommu_init_one(str
      if ( allocate_event_log(iommu) == NULL )
          goto error_out;
  
-    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
-        if ( allocate_ppr_log(iommu) == NULL )
-            goto error_out;
+    if ( iommu->features.flds.ppr_sup && !allocate_ppr_log(iommu) )
+        goto error_out;
  
      if ( !set_iommu_interrupt_handler(iommu) )
          goto error_out;
@@ -1393,7 +1392,7 @@ void amd_iommu_resume(void)
      }
  
      /* flush all cache entries after iommu re-enabled */
-    if ( !amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) )
+    if ( !iommu->features.flds.ia_sup )
      {
          invalidate_all_devices();
          invalidate_all_domain_pages();
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -83,7 +83,7 @@ struct amd_iommu {
      iommu_cap_t cap;
  
      u8 ht_flags;
-    u64 features;
+    union amd_iommu_ext_features features;
  
      void *mmio_base;
      unsigned long mmio_base_phys;
@@ -175,7 +175,7 @@ struct guest_iommu {
      /* MMIO regs */
      struct mmio_reg         reg_ctrl;              /* MMIO offset 0018h */
      struct mmio_reg         reg_status;            /* MMIO offset 2020h */
-    struct mmio_reg         reg_ext_feature;       /* MMIO offset 0030h */
+    union amd_iommu_ext_features reg_ext_feature;  /* MMIO offset 0030h */
  
      /* guest interrupt settings */
      struct guest_iommu_msi  msi;
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -346,26 +346,57 @@ struct amd_iommu_dte {
  #define IOMMU_EXCLUSION_LIMIT_HIGH_MASK		0xFFFFFFFF
  #define IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT	0
  
-/* Extended Feature Register*/
+/* Extended Feature Register */
  #define IOMMU_EXT_FEATURE_MMIO_OFFSET                   0x30
-#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT                 0x0
-#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT                  0x1
-#define IOMMU_EXT_FEATURE_XTSUP_SHIFT                   0x2
-#define IOMMU_EXT_FEATURE_NXSUP_SHIFT                   0x3
-#define IOMMU_EXT_FEATURE_GTSUP_SHIFT                   0x4
-#define IOMMU_EXT_FEATURE_IASUP_SHIFT                   0x6
-#define IOMMU_EXT_FEATURE_GASUP_SHIFT                   0x7
-#define IOMMU_EXT_FEATURE_HESUP_SHIFT                   0x8
-#define IOMMU_EXT_FEATURE_PCSUP_SHIFT                   0x9
-#define IOMMU_EXT_FEATURE_HATS_SHIFT                    0x10
-#define IOMMU_EXT_FEATURE_HATS_MASK                     0x00000C00
-#define IOMMU_EXT_FEATURE_GATS_SHIFT                    0x12
-#define IOMMU_EXT_FEATURE_GATS_MASK                     0x00003000
-#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT                  0x14
-#define IOMMU_EXT_FEATURE_GLXSUP_MASK                   0x0000C000
  
-#define IOMMU_EXT_FEATURE_PASMAX_SHIFT                  0x0
-#define IOMMU_EXT_FEATURE_PASMAX_MASK                   0x0000001F
+union amd_iommu_ext_features {
+    uint64_t raw;
+    struct {
+        unsigned int pref_sup:1;
+        unsigned int ppr_sup:1;
+        unsigned int xt_sup:1;
+        unsigned int nx_sup:1;
+        unsigned int gt_sup:1;
+        unsigned int gappi_sup:1;
+        unsigned int ia_sup:1;
+        unsigned int ga_sup:1;
+        unsigned int he_sup:1;
+        unsigned int pc_sup:1;
+        unsigned int hats:2;
+        unsigned int gats:2;
+        unsigned int glx_sup:2;
+        unsigned int smif_sup:2;
+        unsigned int smif_rc:3;
+        unsigned int gam_sup:3;
+        unsigned int dual_ppr_log_sup:2;
+        unsigned int :2;
+        unsigned int dual_event_log_sup:2;
+        unsigned int :1;
+        unsigned int sats_sup:1;
+        unsigned int pas_max:5;
+        unsigned int us_sup:1;
+        unsigned int dev_tbl_seg_sup:2;
+        unsigned int ppr_early_of_sup:1;
+        unsigned int ppr_auto_rsp_sup:1;
+        unsigned int marc_sup:2;
+        unsigned int blk_stop_mrk_sup:1;
+        unsigned int perf_opt_sup:1;
+        unsigned int msi_cap_mmio_sup:1;
+        unsigned int :1;
+        unsigned int gio_sup:1;
+        unsigned int ha_sup:1;
+        unsigned int eph_sup:1;
+        unsigned int attr_fw_sup:1;
+        unsigned int hd_sup:1;
+        unsigned int :1;
+        unsigned int inv_iotlb_type_sup:1;
+        unsigned int viommu_sup:1;
+        unsigned int vm_guard_io_sup:1;
+        unsigned int vm_table_size:4;
+        unsigned int ga_update_dis_sup:1;
+        unsigned int :2;
+    } flds;
+};
  
  /* Status Register*/
  #define IOMMU_STATUS_MMIO_OFFSET		0x2020
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -218,13 +218,6 @@ static inline int iommu_has_cap(struct a
      return !!(iommu->cap.header & (1u << bit));
  }
  
-static inline int amd_iommu_has_feature(struct amd_iommu *iommu, uint32_t bit)
-{
-    if ( !iommu_has_cap(iommu, PCI_CAP_EFRSUP_SHIFT) )
-        return 0;
-    return !!(iommu->features & (1U << bit));
-}
-
  /* access tail or head pointer of ring buffer */
  static inline uint32_t iommu_get_rb_pointer(uint32_t reg)
  {

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 01/12] AMD/IOMMU: use bit field for extended feature register
Posted by Woods, Brian 4 years, 8 months ago
On Thu, Jul 25, 2019 at 01:29:16PM +0000, Jan Beulich wrote:
> This also takes care of several of the shift values wrongly having been
> specified as hex rather than dec.
> 
> Take the opportunity and
> - replace a readl() pair by a single readq(),
> - add further fields.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

Acked-by: Brian Woods <brian.woods@amd.com>

> ---
> v4: Drop stray/leftover #undef.
> v3: Another attempt at deriving masks from bitfields, hopefully better
>      liked by clang (mine was fine even with the v2 variant).
> v2: Correct sats_sup position and name. Re-base over new earlier patch.
> 
> --- a/xen/drivers/passthrough/amd/iommu_detect.c
> +++ b/xen/drivers/passthrough/amd/iommu_detect.c
> @@ -60,49 +60,76 @@ static int __init get_iommu_capabilities
>   
>   void __init get_iommu_features(struct amd_iommu *iommu)
>   {
> -    u32 low, high;
> -    int i = 0 ;
>       const struct amd_iommu *first;
> -    static const char *__initdata feature_str[] = {
> -        "- Prefetch Pages Command",
> -        "- Peripheral Page Service Request",
> -        "- X2APIC Supported",
> -        "- NX bit Supported",
> -        "- Guest Translation",
> -        "- Reserved bit [5]",
> -        "- Invalidate All Command",
> -        "- Guest APIC supported",
> -        "- Hardware Error Registers",
> -        "- Performance Counters",
> -        NULL
> -    };
> -
>       ASSERT( iommu->mmio_base );
>   
>       if ( !iommu_has_cap(iommu, PCI_CAP_EFRSUP_SHIFT) )
>       {
> -        iommu->features = 0;
> +        iommu->features.raw = 0;
>           return;
>       }
>   
> -    low = readl(iommu->mmio_base + IOMMU_EXT_FEATURE_MMIO_OFFSET);
> -    high = readl(iommu->mmio_base + IOMMU_EXT_FEATURE_MMIO_OFFSET + 4);
> -
> -    iommu->features = ((u64)high << 32) | low;
> +    iommu->features.raw =
> +        readq(iommu->mmio_base + IOMMU_EXT_FEATURE_MMIO_OFFSET);
>   
>       /* Don't log the same set of features over and over. */
>       first = list_first_entry(&amd_iommu_head, struct amd_iommu, list);
> -    if ( iommu != first && iommu->features == first->features )
> +    if ( iommu != first && iommu->features.raw == first->features.raw )
>           return;
>   
>       printk("AMD-Vi: IOMMU Extended Features:\n");
>   
> -    while ( feature_str[i] )
> +#define FEAT(fld, str) do {                                    \
> +    if ( --((union amd_iommu_ext_features){}).flds.fld > 1 )   \
> +        printk( "- " str ": %#x\n", iommu->features.flds.fld); \
> +    else if ( iommu->features.flds.fld )                       \
> +        printk( "- " str "\n");                                \
> +} while ( false )
> +
> +    FEAT(pref_sup,           "Prefetch Pages Command");
> +    FEAT(ppr_sup,            "Peripheral Page Service Request");
> +    FEAT(xt_sup,             "x2APIC");
> +    FEAT(nx_sup,             "NX bit");
> +    FEAT(gappi_sup,          "Guest APIC Physical Processor Interrupt");
> +    FEAT(ia_sup,             "Invalidate All Command");
> +    FEAT(ga_sup,             "Guest APIC");
> +    FEAT(he_sup,             "Hardware Error Registers");
> +    FEAT(pc_sup,             "Performance Counters");
> +    FEAT(hats,               "Host Address Translation Size");
> +
> +    if ( iommu->features.flds.gt_sup )
>       {
> -        if ( amd_iommu_has_feature(iommu, i) )
> -            printk( " %s\n", feature_str[i]);
> -        i++;
> +        FEAT(gats,           "Guest Address Translation Size");
> +        FEAT(glx_sup,        "Guest CR3 Root Table Level");
> +        FEAT(pas_max,        "Maximum PASID");
>       }
> +
> +    FEAT(smif_sup,           "SMI Filter Register");
> +    FEAT(smif_rc,            "SMI Filter Register Count");
> +    FEAT(gam_sup,            "Guest Virtual APIC Modes");
> +    FEAT(dual_ppr_log_sup,   "Dual PPR Log");
> +    FEAT(dual_event_log_sup, "Dual Event Log");
> +    FEAT(sats_sup,           "Secure ATS");
> +    FEAT(us_sup,             "User / Supervisor Page Protection");
> +    FEAT(dev_tbl_seg_sup,    "Device Table Segmentation");
> +    FEAT(ppr_early_of_sup,   "PPR Log Overflow Early Warning");
> +    FEAT(ppr_auto_rsp_sup,   "PPR Automatic Response");
> +    FEAT(marc_sup,           "Memory Access Routing and Control");
> +    FEAT(blk_stop_mrk_sup,   "Block StopMark Message");
> +    FEAT(perf_opt_sup ,      "Performance Optimization");
> +    FEAT(msi_cap_mmio_sup,   "MSI Capability MMIO Access");
> +    FEAT(gio_sup,            "Guest I/O Protection");
> +    FEAT(ha_sup,             "Host Access");
> +    FEAT(eph_sup,            "Enhanced PPR Handling");
> +    FEAT(attr_fw_sup,        "Attribute Forward");
> +    FEAT(hd_sup,             "Host Dirty");
> +    FEAT(inv_iotlb_type_sup, "Invalidate IOTLB Type");
> +    FEAT(viommu_sup,         "Virtualized IOMMU");
> +    FEAT(vm_guard_io_sup,    "VMGuard I/O Support");
> +    FEAT(vm_table_size,      "VM Table Size");
> +    FEAT(ga_update_dis_sup,  "Guest Access Bit Update Disable");
> +
> +#undef FEAT
>   }
>   
>   int __init amd_iommu_detect_one_acpi(
> --- a/xen/drivers/passthrough/amd/iommu_guest.c
> +++ b/xen/drivers/passthrough/amd/iommu_guest.c
> @@ -638,7 +638,7 @@ static uint64_t iommu_mmio_read64(struct
>           val = reg_to_u64(iommu->reg_status);
>           break;
>       case IOMMU_EXT_FEATURE_MMIO_OFFSET:
> -        val = reg_to_u64(iommu->reg_ext_feature);
> +        val = iommu->reg_ext_feature.raw;
>           break;
>   
>       default:
> @@ -802,39 +802,26 @@ int guest_iommu_set_base(struct domain *
>   /* Initialize mmio read only bits */
>   static void guest_iommu_reg_init(struct guest_iommu *iommu)
>   {
> -    uint32_t lower, upper;
> +    union amd_iommu_ext_features ef = {
> +        /* Support prefetch */
> +        .flds.pref_sup = 1,
> +        /* Support PPR log */
> +        .flds.ppr_sup = 1,
> +        /* Support guest translation */
> +        .flds.gt_sup = 1,
> +        /* Support invalidate all command */
> +        .flds.ia_sup = 1,
> +        /* Host translation size has 6 levels */
> +        .flds.hats = HOST_ADDRESS_SIZE_6_LEVEL,
> +        /* Guest translation size has 6 levels */
> +        .flds.gats = GUEST_ADDRESS_SIZE_6_LEVEL,
> +        /* Single level gCR3 */
> +        .flds.glx_sup = GUEST_CR3_1_LEVEL,
> +        /* 9 bit PASID */
> +        .flds.pas_max = PASMAX_9_bit,
> +    };
>   
> -    lower = upper = 0;
> -    /* Support prefetch */
> -    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PREFSUP_SHIFT);
> -    /* Support PPR log */
> -    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PPRSUP_SHIFT);
> -    /* Support guest translation */
> -    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_GTSUP_SHIFT);
> -    /* Support invalidate all command */
> -    iommu_set_bit(&lower,IOMMU_EXT_FEATURE_IASUP_SHIFT);
> -
> -    /* Host translation size has 6 levels */
> -    set_field_in_reg_u32(HOST_ADDRESS_SIZE_6_LEVEL, lower,
> -                         IOMMU_EXT_FEATURE_HATS_MASK,
> -                         IOMMU_EXT_FEATURE_HATS_SHIFT,
> -                         &lower);
> -    /* Guest translation size has 6 levels */
> -    set_field_in_reg_u32(GUEST_ADDRESS_SIZE_6_LEVEL, lower,
> -                         IOMMU_EXT_FEATURE_GATS_MASK,
> -                         IOMMU_EXT_FEATURE_GATS_SHIFT,
> -                         &lower);
> -    /* Single level gCR3 */
> -    set_field_in_reg_u32(GUEST_CR3_1_LEVEL, lower,
> -                         IOMMU_EXT_FEATURE_GLXSUP_MASK,
> -                         IOMMU_EXT_FEATURE_GLXSUP_SHIFT, &lower);
> -    /* 9 bit PASID */
> -    set_field_in_reg_u32(PASMAX_9_bit, upper,
> -                         IOMMU_EXT_FEATURE_PASMAX_MASK,
> -                         IOMMU_EXT_FEATURE_PASMAX_SHIFT, &upper);
> -
> -    iommu->reg_ext_feature.lo = lower;
> -    iommu->reg_ext_feature.hi = upper;
> +    iommu->reg_ext_feature = ef;
>   }
>   
>   static int guest_iommu_mmio_range(struct vcpu *v, unsigned long addr)
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -882,7 +882,7 @@ static void enable_iommu(struct amd_iomm
>       register_iommu_event_log_in_mmio_space(iommu);
>       register_iommu_exclusion_range(iommu);
>   
> -    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
> +    if ( iommu->features.flds.ppr_sup )
>           register_iommu_ppr_log_in_mmio_space(iommu);
>   
>       desc = irq_to_desc(iommu->msi.irq);
> @@ -896,15 +896,15 @@ static void enable_iommu(struct amd_iomm
>       set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
>       set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
>   
> -    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
> +    if ( iommu->features.flds.ppr_sup )
>           set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED);
>   
> -    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) )
> +    if ( iommu->features.flds.gt_sup )
>           set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_ENABLED);
>   
>       set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
>   
> -    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) )
> +    if ( iommu->features.flds.ia_sup )
>           amd_iommu_flush_all_caches(iommu);
>   
>       iommu->enabled = 1;
> @@ -927,10 +927,10 @@ static void disable_iommu(struct amd_iom
>       set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_DISABLED);
>       set_iommu_event_log_control(iommu, IOMMU_CONTROL_DISABLED);
>   
> -    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
> +    if ( iommu->features.flds.ppr_sup )
>           set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_DISABLED);
>   
> -    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) )
> +    if ( iommu->features.flds.gt_sup )
>           set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_DISABLED);
>   
>       set_iommu_translation_control(iommu, IOMMU_CONTROL_DISABLED);
> @@ -1026,7 +1026,7 @@ static int __init amd_iommu_init_one(str
>   
>       get_iommu_features(iommu);
>   
> -    if ( iommu->features )
> +    if ( iommu->features.raw )
>           iommuv2_enabled = 1;
>   
>       if ( allocate_cmd_buffer(iommu) == NULL )
> @@ -1035,9 +1035,8 @@ static int __init amd_iommu_init_one(str
>       if ( allocate_event_log(iommu) == NULL )
>           goto error_out;
>   
> -    if ( amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
> -        if ( allocate_ppr_log(iommu) == NULL )
> -            goto error_out;
> +    if ( iommu->features.flds.ppr_sup && !allocate_ppr_log(iommu) )
> +        goto error_out;
>   
>       if ( !set_iommu_interrupt_handler(iommu) )
>           goto error_out;
> @@ -1393,7 +1392,7 @@ void amd_iommu_resume(void)
>       }
>   
>       /* flush all cache entries after iommu re-enabled */
> -    if ( !amd_iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) )
> +    if ( !iommu->features.flds.ia_sup )
>       {
>           invalidate_all_devices();
>           invalidate_all_domain_pages();
> --- a/xen/include/asm-x86/amd-iommu.h
> +++ b/xen/include/asm-x86/amd-iommu.h
> @@ -83,7 +83,7 @@ struct amd_iommu {
>       iommu_cap_t cap;
>   
>       u8 ht_flags;
> -    u64 features;
> +    union amd_iommu_ext_features features;
>   
>       void *mmio_base;
>       unsigned long mmio_base_phys;
> @@ -175,7 +175,7 @@ struct guest_iommu {
>       /* MMIO regs */
>       struct mmio_reg         reg_ctrl;              /* MMIO offset 0018h */
>       struct mmio_reg         reg_status;            /* MMIO offset 2020h */
> -    struct mmio_reg         reg_ext_feature;       /* MMIO offset 0030h */
> +    union amd_iommu_ext_features reg_ext_feature;  /* MMIO offset 0030h */
>   
>       /* guest interrupt settings */
>       struct guest_iommu_msi  msi;
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> @@ -346,26 +346,57 @@ struct amd_iommu_dte {
>   #define IOMMU_EXCLUSION_LIMIT_HIGH_MASK		0xFFFFFFFF
>   #define IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT	0
>   
> -/* Extended Feature Register*/
> +/* Extended Feature Register */
>   #define IOMMU_EXT_FEATURE_MMIO_OFFSET                   0x30
> -#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT                 0x0
> -#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT                  0x1
> -#define IOMMU_EXT_FEATURE_XTSUP_SHIFT                   0x2
> -#define IOMMU_EXT_FEATURE_NXSUP_SHIFT                   0x3
> -#define IOMMU_EXT_FEATURE_GTSUP_SHIFT                   0x4
> -#define IOMMU_EXT_FEATURE_IASUP_SHIFT                   0x6
> -#define IOMMU_EXT_FEATURE_GASUP_SHIFT                   0x7
> -#define IOMMU_EXT_FEATURE_HESUP_SHIFT                   0x8
> -#define IOMMU_EXT_FEATURE_PCSUP_SHIFT                   0x9
> -#define IOMMU_EXT_FEATURE_HATS_SHIFT                    0x10
> -#define IOMMU_EXT_FEATURE_HATS_MASK                     0x00000C00
> -#define IOMMU_EXT_FEATURE_GATS_SHIFT                    0x12
> -#define IOMMU_EXT_FEATURE_GATS_MASK                     0x00003000
> -#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT                  0x14
> -#define IOMMU_EXT_FEATURE_GLXSUP_MASK                   0x0000C000
>   
> -#define IOMMU_EXT_FEATURE_PASMAX_SHIFT                  0x0
> -#define IOMMU_EXT_FEATURE_PASMAX_MASK                   0x0000001F
> +union amd_iommu_ext_features {
> +    uint64_t raw;
> +    struct {
> +        unsigned int pref_sup:1;
> +        unsigned int ppr_sup:1;
> +        unsigned int xt_sup:1;
> +        unsigned int nx_sup:1;
> +        unsigned int gt_sup:1;
> +        unsigned int gappi_sup:1;
> +        unsigned int ia_sup:1;
> +        unsigned int ga_sup:1;
> +        unsigned int he_sup:1;
> +        unsigned int pc_sup:1;
> +        unsigned int hats:2;
> +        unsigned int gats:2;
> +        unsigned int glx_sup:2;
> +        unsigned int smif_sup:2;
> +        unsigned int smif_rc:3;
> +        unsigned int gam_sup:3;
> +        unsigned int dual_ppr_log_sup:2;
> +        unsigned int :2;
> +        unsigned int dual_event_log_sup:2;
> +        unsigned int :1;
> +        unsigned int sats_sup:1;
> +        unsigned int pas_max:5;
> +        unsigned int us_sup:1;
> +        unsigned int dev_tbl_seg_sup:2;
> +        unsigned int ppr_early_of_sup:1;
> +        unsigned int ppr_auto_rsp_sup:1;
> +        unsigned int marc_sup:2;
> +        unsigned int blk_stop_mrk_sup:1;
> +        unsigned int perf_opt_sup:1;
> +        unsigned int msi_cap_mmio_sup:1;
> +        unsigned int :1;
> +        unsigned int gio_sup:1;
> +        unsigned int ha_sup:1;
> +        unsigned int eph_sup:1;
> +        unsigned int attr_fw_sup:1;
> +        unsigned int hd_sup:1;
> +        unsigned int :1;
> +        unsigned int inv_iotlb_type_sup:1;
> +        unsigned int viommu_sup:1;
> +        unsigned int vm_guard_io_sup:1;
> +        unsigned int vm_table_size:4;
> +        unsigned int ga_update_dis_sup:1;
> +        unsigned int :2;
> +    } flds;
> +};
>   
>   /* Status Register*/
>   #define IOMMU_STATUS_MMIO_OFFSET		0x2020
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> @@ -218,13 +218,6 @@ static inline int iommu_has_cap(struct a
>       return !!(iommu->cap.header & (1u << bit));
>   }
>   
> -static inline int amd_iommu_has_feature(struct amd_iommu *iommu, uint32_t bit)
> -{
> -    if ( !iommu_has_cap(iommu, PCI_CAP_EFRSUP_SHIFT) )
> -        return 0;
> -    return !!(iommu->features & (1U << bit));
> -}
> -
>   /* access tail or head pointer of ring buffer */
>   static inline uint32_t iommu_get_rb_pointer(uint32_t reg)
>   {
> 

-- 
Brian Woods

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] Ping: [PATCH v4 01/12] AMD/IOMMU: use bit field for extended feature register
Posted by Jan Beulich 4 years, 8 months ago
On 25.07.2019 15:29, Jan Beulich wrote:
> This also takes care of several of the shift values wrongly having been
> specified as hex rather than dec.
> 
> Take the opportunity and
> - replace a readl() pair by a single readq(),
> - add further fields.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

Brian, Suravee,

getting your ack here would unblock a fair part of the rest of
this series.

Thanks, Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 02/12] AMD/IOMMU: use bit field for control register
Posted by Jan Beulich 4 years, 8 months ago
Also introduce a field in struct amd_iommu caching the most recently
written control register. All writes should now happen exclusively from
that cached value, such that it is guaranteed to be up to date.

Take the opportunity and add further fields. Also convert a few boolean
function parameters to bool, such that use of !! can be avoided.

Because of there now being definitions beyond bit 31, writel() also gets
replaced by writeq() when updating hardware.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v3: Switch boolean bitfields to bool.
v2: Add domain_id_pne field. Mention writel() -> writeq() change.

--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -317,7 +317,7 @@ static int do_invalidate_iotlb_pages(str
  
  static int do_completion_wait(struct domain *d, cmd_entry_t *cmd)
  {
-    bool_t com_wait_int_en, com_wait_int, i, s;
+    bool com_wait_int, i, s;
      struct guest_iommu *iommu;
      unsigned long gfn;
      p2m_type_t p2mt;
@@ -354,12 +354,10 @@ static int do_completion_wait(struct dom
          unmap_domain_page(vaddr);
      }
  
-    com_wait_int_en = iommu_get_bit(iommu->reg_ctrl.lo,
-                                    IOMMU_CONTROL_COMP_WAIT_INT_SHIFT);
      com_wait_int = iommu_get_bit(iommu->reg_status.lo,
                                   IOMMU_STATUS_COMP_WAIT_INT_SHIFT);
  
-    if ( com_wait_int_en && com_wait_int )
+    if ( iommu->reg_ctrl.com_wait_int_en && com_wait_int )
          guest_iommu_deliver_msi(d);
  
      return 0;
@@ -521,40 +519,17 @@ static void guest_iommu_process_command(
      return;
  }
  
-static int guest_iommu_write_ctrl(struct guest_iommu *iommu, uint64_t newctrl)
+static int guest_iommu_write_ctrl(struct guest_iommu *iommu, uint64_t val)
  {
-    bool_t cmd_en, event_en, iommu_en, ppr_en, ppr_log_en;
-    bool_t cmd_en_old, event_en_old, iommu_en_old;
-    bool_t cmd_run;
-
-    iommu_en = iommu_get_bit(newctrl,
-                             IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT);
-    iommu_en_old = iommu_get_bit(iommu->reg_ctrl.lo,
-                                 IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT);
-
-    cmd_en = iommu_get_bit(newctrl,
-                           IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT);
-    cmd_en_old = iommu_get_bit(iommu->reg_ctrl.lo,
-                               IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT);
-    cmd_run = iommu_get_bit(iommu->reg_status.lo,
-                            IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT);
-    event_en = iommu_get_bit(newctrl,
-                             IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT);
-    event_en_old = iommu_get_bit(iommu->reg_ctrl.lo,
-                                 IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT);
-
-    ppr_en = iommu_get_bit(newctrl,
-                           IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-    ppr_log_en = iommu_get_bit(newctrl,
-                               IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
+    union amd_iommu_control newctrl = { .raw = val };
  
-    if ( iommu_en )
+    if ( newctrl.iommu_en )
      {
          guest_iommu_enable(iommu);
          guest_iommu_enable_dev_table(iommu);
      }
  
-    if ( iommu_en && cmd_en )
+    if ( newctrl.iommu_en && newctrl.cmd_buf_en )
      {
          guest_iommu_enable_ring_buffer(iommu, &iommu->cmd_buffer,
                                         sizeof(cmd_entry_t));
@@ -562,7 +537,7 @@ static int guest_iommu_write_ctrl(struct
          tasklet_schedule(&iommu->cmd_buffer_tasklet);
      }
  
-    if ( iommu_en && event_en )
+    if ( newctrl.iommu_en && newctrl.event_log_en )
      {
          guest_iommu_enable_ring_buffer(iommu, &iommu->event_log,
                                         sizeof(event_entry_t));
@@ -570,7 +545,7 @@ static int guest_iommu_write_ctrl(struct
          guest_iommu_clear_status(iommu, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT);
      }
  
-    if ( iommu_en && ppr_en && ppr_log_en )
+    if ( newctrl.iommu_en && newctrl.ppr_en && newctrl.ppr_log_en )
      {
          guest_iommu_enable_ring_buffer(iommu, &iommu->ppr_log,
                                         sizeof(ppr_entry_t));
@@ -578,19 +553,21 @@ static int guest_iommu_write_ctrl(struct
          guest_iommu_clear_status(iommu, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT);
      }
  
-    if ( iommu_en && cmd_en_old && !cmd_en )
+    if ( newctrl.iommu_en && iommu->reg_ctrl.cmd_buf_en &&
+         !newctrl.cmd_buf_en )
      {
          /* Disable iommu command processing */
          tasklet_kill(&iommu->cmd_buffer_tasklet);
      }
  
-    if ( event_en_old && !event_en )
+    if ( iommu->reg_ctrl.event_log_en && !newctrl.event_log_en )
          guest_iommu_clear_status(iommu, IOMMU_STATUS_EVENT_LOG_RUN_SHIFT);
  
-    if ( iommu_en_old && !iommu_en )
+    if ( iommu->reg_ctrl.iommu_en && !newctrl.iommu_en )
          guest_iommu_disable(iommu);
  
-    u64_to_reg(&iommu->reg_ctrl, newctrl);
+    iommu->reg_ctrl = newctrl;
+
      return 0;
  }
  
@@ -632,7 +609,7 @@ static uint64_t iommu_mmio_read64(struct
          val = reg_to_u64(iommu->ppr_log.reg_tail);
          break;
      case IOMMU_CONTROL_MMIO_OFFSET:
-        val = reg_to_u64(iommu->reg_ctrl);
+        val = iommu->reg_ctrl.raw;
          break;
      case IOMMU_STATUS_MMIO_OFFSET:
          val = reg_to_u64(iommu->reg_status);
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -41,7 +41,7 @@ LIST_HEAD_READ_MOSTLY(amd_iommu_head);
  struct table_struct device_table;
  bool_t iommuv2_enabled;
  
-static int iommu_has_ht_flag(struct amd_iommu *iommu, u8 mask)
+static bool iommu_has_ht_flag(struct amd_iommu *iommu, u8 mask)
  {
      return iommu->ht_flags & mask;
  }
@@ -69,31 +69,18 @@ static void __init unmap_iommu_mmio_regi
  
  static void set_iommu_ht_flags(struct amd_iommu *iommu)
  {
-    u32 entry;
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
      /* Setup HT flags */
      if ( iommu_has_cap(iommu, PCI_CAP_HT_TUNNEL_SHIFT) )
-        iommu_has_ht_flag(iommu, ACPI_IVHD_TT_ENABLE) ?
-            iommu_set_bit(&entry, IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT) :
-            iommu_clear_bit(&entry, IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT);
-
-    iommu_has_ht_flag(iommu, ACPI_IVHD_RES_PASS_PW) ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT):
-        iommu_clear_bit(&entry, IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT);
-
-    iommu_has_ht_flag(iommu, ACPI_IVHD_ISOC) ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_ISOCHRONOUS_SHIFT):
-        iommu_clear_bit(&entry, IOMMU_CONTROL_ISOCHRONOUS_SHIFT);
-
-    iommu_has_ht_flag(iommu, ACPI_IVHD_PASS_PW) ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT):
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT);
+        iommu->ctrl.ht_tun_en = iommu_has_ht_flag(iommu, ACPI_IVHD_TT_ENABLE);
+
+    iommu->ctrl.pass_pw     = iommu_has_ht_flag(iommu, ACPI_IVHD_PASS_PW);
+    iommu->ctrl.res_pass_pw = iommu_has_ht_flag(iommu, ACPI_IVHD_RES_PASS_PW);
+    iommu->ctrl.isoc        = iommu_has_ht_flag(iommu, ACPI_IVHD_ISOC);
  
      /* Force coherent */
-    iommu_set_bit(&entry, IOMMU_CONTROL_COHERENT_SHIFT);
+    iommu->ctrl.coherent = true;
  
-    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
  }
  
  static void register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu)
@@ -205,55 +192,37 @@ static void register_iommu_ppr_log_in_mm
  
  
  static void set_iommu_translation_control(struct amd_iommu *iommu,
-                                                 int enable)
+                                          bool enable)
  {
-    u32 entry;
+    iommu->ctrl.iommu_en = enable;
  
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
-    enable ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT) :
-        iommu_clear_bit(&entry, IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT);
-
-    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
  }
  
  static void set_iommu_guest_translation_control(struct amd_iommu *iommu,
-                                                int enable)
+                                                bool enable)
  {
-    u32 entry;
-
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu->ctrl.gt_en = enable;
  
-    enable ?
-        iommu_set_bit(&entry, IOMMU_CONTROL_GT_ENABLE_SHIFT) :
-        iommu_clear_bit(&entry, IOMMU_CONTROL_GT_ENABLE_SHIFT);
-
-    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
  
      if ( enable )
          AMD_IOMMU_DEBUG("Guest Translation Enabled.\n");
  }
  
  static void set_iommu_command_buffer_control(struct amd_iommu *iommu,
-                                                    int enable)
+                                             bool enable)
  {
-    u32 entry;
-
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
-    /*reset head and tail pointer manually before enablement */
+    /* Reset head and tail pointer manually before enablement */
      if ( enable )
      {
          writeq(0, iommu->mmio_base + IOMMU_CMD_BUFFER_HEAD_OFFSET);
          writeq(0, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET);
-
-        iommu_set_bit(&entry, IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT);
      }
-    else
-        iommu_clear_bit(&entry, IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT);
  
-    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    iommu->ctrl.cmd_buf_en = enable;
+
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
  }
  
  static void register_iommu_exclusion_range(struct amd_iommu *iommu)
@@ -295,57 +264,38 @@ static void register_iommu_exclusion_ran
  }
  
  static void set_iommu_event_log_control(struct amd_iommu *iommu,
-            int enable)
+                                        bool enable)
  {
-    u32 entry;
-
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
-    /*reset head and tail pointer manually before enablement */
+    /* Reset head and tail pointer manually before enablement */
      if ( enable )
      {
          writeq(0, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET);
          writeq(0, iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET);
-
-        iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT);
-    }
-    else
-    {
-        iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT);
      }
  
-    iommu_clear_bit(&entry, IOMMU_CONTROL_COMP_WAIT_INT_SHIFT);
+    iommu->ctrl.event_int_en = enable;
+    iommu->ctrl.event_log_en = enable;
+    iommu->ctrl.com_wait_int_en = false;
  
-    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
  }
  
  static void set_iommu_ppr_log_control(struct amd_iommu *iommu,
-                                      int enable)
+                                      bool enable)
  {
-    u32 entry;
-
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
-    /*reset head and tail pointer manually before enablement */
+    /* Reset head and tail pointer manually before enablement */
      if ( enable )
      {
          writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_HEAD_OFFSET);
          writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET);
-
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
-    }
-    else
-    {
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
      }
  
-    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu->ctrl.ppr_en = enable;
+    iommu->ctrl.ppr_int_en = enable;
+    iommu->ctrl.ppr_log_en = enable;
+
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+
      if ( enable )
          AMD_IOMMU_DEBUG("PPR Log Enabled.\n");
  }
@@ -398,7 +348,7 @@ static int iommu_read_log(struct amd_iom
  /* reset event log or ppr log when overflow */
  static void iommu_reset_log(struct amd_iommu *iommu,
                              struct ring_buffer *log,
-                            void (*ctrl_func)(struct amd_iommu *iommu, int))
+                            void (*ctrl_func)(struct amd_iommu *iommu, bool))
  {
      u32 entry;
      int log_run, run_bit;
@@ -615,11 +565,11 @@ static void iommu_check_event_log(struct
          iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
      else
      {
-        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-        if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) )
+        if ( !iommu->ctrl.event_int_en )
          {
-            entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK;
-            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+            iommu->ctrl.event_int_en = true;
+            writeq(iommu->ctrl.raw,
+                   iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
              /*
               * Re-schedule the tasklet to handle eventual log entries added
               * between reading the log above and re-enabling the interrupt.
@@ -704,11 +654,11 @@ static void iommu_check_ppr_log(struct a
          iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
      else
      {
-        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-        if ( !(entry & IOMMU_CONTROL_PPR_LOG_INT_MASK) )
+        if ( !iommu->ctrl.ppr_int_en )
          {
-            entry |= IOMMU_CONTROL_PPR_LOG_INT_MASK;
-            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+            iommu->ctrl.ppr_int_en = true;
+            writeq(iommu->ctrl.raw,
+                   iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
              /*
               * Re-schedule the tasklet to handle eventual log entries added
               * between reading the log above and re-enabling the interrupt.
@@ -754,7 +704,6 @@ static void do_amd_iommu_irq(unsigned lo
  static void iommu_interrupt_handler(int irq, void *dev_id,
                                      struct cpu_user_regs *regs)
  {
-    u32 entry;
      unsigned long flags;
      struct amd_iommu *iommu = dev_id;
  
@@ -764,10 +713,9 @@ static void iommu_interrupt_handler(int
       * Silence interrupts from both event and PPR by clearing the
       * enable logging bits in the control register
       */
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
-    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu->ctrl.event_int_en = false;
+    iommu->ctrl.ppr_int_en = false;
+    writeq(iommu->ctrl.raw, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
  
      spin_unlock_irqrestore(&iommu->lock, flags);
  
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -88,6 +88,8 @@ struct amd_iommu {
      void *mmio_base;
      unsigned long mmio_base_phys;
  
+    union amd_iommu_control ctrl;
+
      struct table_struct dev_table;
      struct ring_buffer cmd_buffer;
      struct ring_buffer event_log;
@@ -173,7 +175,7 @@ struct guest_iommu {
      uint64_t                mmio_base;             /* MMIO base address */
  
      /* MMIO regs */
-    struct mmio_reg         reg_ctrl;              /* MMIO offset 0018h */
+    union amd_iommu_control reg_ctrl;              /* MMIO offset 0018h */
      struct mmio_reg         reg_status;            /* MMIO offset 2020h */
      union amd_iommu_ext_features reg_ext_feature;  /* MMIO offset 0030h */
  
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -295,38 +295,56 @@ struct amd_iommu_dte {
  
  /* Control Register */
  #define IOMMU_CONTROL_MMIO_OFFSET			0x18
-#define IOMMU_CONTROL_TRANSLATION_ENABLE_MASK		0x00000001
-#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT		0
-#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK	0x00000002
-#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT	1
-#define IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK		0x00000004
-#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT		2
-#define IOMMU_CONTROL_EVENT_LOG_INT_MASK		0x00000008
-#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT		3
-#define IOMMU_CONTROL_COMP_WAIT_INT_MASK		0x00000010
-#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT		4
-#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_MASK		0x000000E0
-#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT	5
-#define IOMMU_CONTROL_PASS_POSTED_WRITE_MASK		0x00000100
-#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT		8
-#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_MASK	0x00000200
-#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT	9
-#define IOMMU_CONTROL_COHERENT_MASK			0x00000400
-#define IOMMU_CONTROL_COHERENT_SHIFT			10
-#define IOMMU_CONTROL_ISOCHRONOUS_MASK			0x00000800
-#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
-#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
-#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_MASK		0x00002000
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT		13
-#define IOMMU_CONTROL_PPR_LOG_INT_MASK			0x00004000
-#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT			14
-#define IOMMU_CONTROL_PPR_ENABLE_MASK			0x00008000
-#define IOMMU_CONTROL_PPR_ENABLE_SHIFT			15
-#define IOMMU_CONTROL_GT_ENABLE_MASK			0x00010000
-#define IOMMU_CONTROL_GT_ENABLE_SHIFT			16
-#define IOMMU_CONTROL_RESTART_MASK			0x80000000
-#define IOMMU_CONTROL_RESTART_SHIFT			31
+
+union amd_iommu_control {
+    uint64_t raw;
+    struct {
+        bool iommu_en:1;
+        bool ht_tun_en:1;
+        bool event_log_en:1;
+        bool event_int_en:1;
+        bool com_wait_int_en:1;
+        unsigned int inv_timeout:3;
+        bool pass_pw:1;
+        bool res_pass_pw:1;
+        bool coherent:1;
+        bool isoc:1;
+        bool cmd_buf_en:1;
+        bool ppr_log_en:1;
+        bool ppr_int_en:1;
+        bool ppr_en:1;
+        bool gt_en:1;
+        bool ga_en:1;
+        unsigned int crw:4;
+        bool smif_en:1;
+        bool slf_wb_dis:1;
+        bool smif_log_en:1;
+        unsigned int gam_en:3;
+        bool ga_log_en:1;
+        bool ga_int_en:1;
+        unsigned int dual_ppr_log_en:2;
+        unsigned int dual_event_log_en:2;
+        unsigned int dev_tbl_seg_en:3;
+        unsigned int priv_abrt_en:2;
+        bool ppr_auto_rsp_en:1;
+        bool marc_en:1;
+        bool blk_stop_mrk_en:1;
+        bool ppr_auto_rsp_aon:1;
+        bool domain_id_pne:1;
+        unsigned int :1;
+        bool eph_en:1;
+        unsigned int had_update:2;
+        bool gd_update_dis:1;
+        unsigned int :1;
+        bool xt_en:1;
+        bool int_cap_xt_en:1;
+        bool vcmd_en:1;
+        bool viommu_en:1;
+        bool ga_update_dis:1;
+        bool gappi_en:1;
+        unsigned int :8;
+    };
+};
  
  /* Exclusion Register */
  #define IOMMU_EXCLUSION_BASE_LOW_OFFSET		0x20

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 03/12] AMD/IOMMU: use bit field for IRTE
Posted by Jan Beulich 4 years, 8 months ago
At the same time restrict its scope to just the single source file
actually using it, and abstract accesses by introducing a union of
pointers. (A union of the actual table entries is not used to make it
impossible to [wrongly, once the 128-bit form gets added] perform
pointer arithmetic / array accesses on derived types.)

Also move away from updating the entries piecemeal: Construct a full new
entry, and write it out.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v4: Re-base. Do away with standalone struct irte_basic.
v3: Switch boolean bitfields to bool.
v2: name {get,free}_intremap_entry()'s last parameter "index" instead of
     "offset". Introduce union irte32.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -24,6 +24,26 @@
  #include <xen/keyhandler.h>
  #include <xen/softirq.h>
  
+union irte32 {
+    uint32_t raw;
+    struct {
+        bool remap_en:1;
+        bool sup_io_pf:1;
+        unsigned int int_type:3;
+        bool rq_eoi:1;
+        bool dm:1;
+        bool guest_mode:1; /* MBZ */
+        unsigned int dest:8;
+        unsigned int vector:8;
+        unsigned int :8;
+    } flds;
+};
+
+union irte_ptr {
+    void *ptr;
+    union irte32 *ptr32;
+};
+
  #define INTREMAP_TABLE_ORDER    1
  #define INTREMAP_LENGTH 0xB
  #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
@@ -102,47 +122,45 @@ static unsigned int alloc_intremap_entry
      return slot;
  }
  
-static u32 *get_intremap_entry(int seg, int bdf, int offset)
+static union irte_ptr get_intremap_entry(unsigned int seg, unsigned int bdf,
+                                         unsigned int index)
  {
-    u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
+    union irte_ptr table = {
+        .ptr = get_ivrs_mappings(seg)[bdf].intremap_table
+    };
+
+    ASSERT(table.ptr && (index < INTREMAP_ENTRIES));
  
-    ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
+    table.ptr32 += index;
  
-    return table + offset;
+    return table;
  }
  
-static void free_intremap_entry(int seg, int bdf, int offset)
-{
-    u32 *entry = get_intremap_entry(seg, bdf, offset);
-
-    memset(entry, 0, sizeof(u32));
-    __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
-}
-
-static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
-    u8 dest_mode, u8 dest)
-{
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
-                            INT_REMAP_ENTRY_REMAPEN_MASK,
-                            INT_REMAP_ENTRY_REMAPEN_SHIFT, entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, *entry,
-                            INT_REMAP_ENTRY_SUPIOPF_MASK,
-                            INT_REMAP_ENTRY_SUPIOPF_SHIFT, entry);
-    set_field_in_reg_u32(int_type, *entry,
-                            INT_REMAP_ENTRY_INTTYPE_MASK,
-                            INT_REMAP_ENTRY_INTTYPE_SHIFT, entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, *entry,
-                            INT_REMAP_ENTRY_REQEOI_MASK,
-                            INT_REMAP_ENTRY_REQEOI_SHIFT, entry);
-    set_field_in_reg_u32((u32)dest_mode, *entry,
-                            INT_REMAP_ENTRY_DM_MASK,
-                            INT_REMAP_ENTRY_DM_SHIFT, entry);
-    set_field_in_reg_u32((u32)dest, *entry,
-                            INT_REMAP_ENTRY_DEST_MAST,
-                            INT_REMAP_ENTRY_DEST_SHIFT, entry);
-    set_field_in_reg_u32((u32)vector, *entry,
-                            INT_REMAP_ENTRY_VECTOR_MASK,
-                            INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
+static void free_intremap_entry(unsigned int seg, unsigned int bdf,
+                                unsigned int index)
+{
+    union irte_ptr entry = get_intremap_entry(seg, bdf, index);
+
+    ACCESS_ONCE(entry.ptr32->raw) = 0;
+
+    __clear_bit(index, get_ivrs_mappings(seg)[bdf].intremap_inuse);
+}
+
+static void update_intremap_entry(union irte_ptr entry, unsigned int vector,
+                                  unsigned int int_type,
+                                  unsigned int dest_mode, unsigned int dest)
+{
+    union irte32 irte = {
+        .flds = {
+            .remap_en = true,
+            .int_type = int_type,
+            .dm = dest_mode,
+            .dest = dest,
+            .vector = vector,
+        },
+    };
+
+    ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
  }
  
  static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
@@ -164,7 +182,7 @@ static int update_intremap_entry_from_io
      u16 *index)
  {
      unsigned long flags;
-    u32* entry;
+    union irte_ptr entry;
      u8 delivery_mode, dest, vector, dest_mode;
      int req_id;
      spinlock_t *lock;
@@ -202,12 +220,8 @@ static int update_intremap_entry_from_io
           * so need to recover vector and delivery mode from IRTE.
           */
          ASSERT(get_rte_index(rte) == offset);
-        vector = get_field_from_reg_u32(*entry,
-                                        INT_REMAP_ENTRY_VECTOR_MASK,
-                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
-        delivery_mode = get_field_from_reg_u32(*entry,
-                                               INT_REMAP_ENTRY_INTTYPE_MASK,
-                                               INT_REMAP_ENTRY_INTTYPE_SHIFT);
+        vector = entry.ptr32->flds.vector;
+        delivery_mode = entry.ptr32->flds.int_type;
      }
      update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
  
@@ -229,7 +243,7 @@ int __init amd_iommu_setup_ioapic_remapp
  {
      struct IO_APIC_route_entry rte;
      unsigned long flags;
-    u32* entry;
+    union irte_ptr entry;
      int apic, pin;
      u8 delivery_mode, dest, vector, dest_mode;
      u16 seg, bdf, req_id;
@@ -408,16 +422,14 @@ unsigned int amd_iommu_read_ioapic_from_
          u16 bdf = ioapic_sbdf[idx].bdf;
          u16 seg = ioapic_sbdf[idx].seg;
          u16 req_id = get_intremap_requestor_id(seg, bdf);
-        const u32 *entry = get_intremap_entry(seg, req_id, offset);
+        union irte_ptr entry = get_intremap_entry(seg, req_id, offset);
  
          ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
          val &= ~(INTREMAP_ENTRIES - 1);
-        val |= get_field_from_reg_u32(*entry,
-                                      INT_REMAP_ENTRY_INTTYPE_MASK,
-                                      INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
-        val |= get_field_from_reg_u32(*entry,
-                                      INT_REMAP_ENTRY_VECTOR_MASK,
-                                      INT_REMAP_ENTRY_VECTOR_SHIFT);
+        val |= MASK_INSR(entry.ptr32->flds.int_type,
+                         IO_APIC_REDIR_DELIV_MODE_MASK);
+        val |= MASK_INSR(entry.ptr32->flds.vector,
+                         IO_APIC_REDIR_VECTOR_MASK);
      }
  
      return val;
@@ -428,7 +440,7 @@ static int update_intremap_entry_from_ms
      int *remap_index, const struct msi_msg *msg, u32 *data)
  {
      unsigned long flags;
-    u32* entry;
+    union irte_ptr entry;
      u16 req_id, alias_id;
      u8 delivery_mode, dest, vector, dest_mode;
      spinlock_t *lock;
@@ -582,7 +594,7 @@ void amd_iommu_read_msi_from_ire(
      const struct pci_dev *pdev = msi_desc->dev;
      u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
      u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
-    const u32 *entry;
+    union irte_ptr entry;
  
      if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
          return;
@@ -598,12 +610,10 @@ void amd_iommu_read_msi_from_ire(
      }
  
      msg->data &= ~(INTREMAP_ENTRIES - 1);
-    msg->data |= get_field_from_reg_u32(*entry,
-                                        INT_REMAP_ENTRY_INTTYPE_MASK,
-                                        INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
-    msg->data |= get_field_from_reg_u32(*entry,
-                                        INT_REMAP_ENTRY_VECTOR_MASK,
-                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
+    msg->data |= MASK_INSR(entry.ptr32->flds.int_type,
+                           MSI_DATA_DELIVERY_MODE_MASK);
+    msg->data |= MASK_INSR(entry.ptr32->flds.vector,
+                           MSI_DATA_VECTOR_MASK);
  }
  
  int __init amd_iommu_free_intremap_table(
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -469,22 +469,6 @@ struct amd_iommu_pte {
  #define IOMMU_CONTROL_DISABLED	0
  #define IOMMU_CONTROL_ENABLED	1
  
-/* interrupt remapping table */
-#define INT_REMAP_ENTRY_REMAPEN_MASK    0x00000001
-#define INT_REMAP_ENTRY_REMAPEN_SHIFT   0
-#define INT_REMAP_ENTRY_SUPIOPF_MASK    0x00000002
-#define INT_REMAP_ENTRY_SUPIOPF_SHIFT   1
-#define INT_REMAP_ENTRY_INTTYPE_MASK    0x0000001C
-#define INT_REMAP_ENTRY_INTTYPE_SHIFT   2
-#define INT_REMAP_ENTRY_REQEOI_MASK     0x00000020
-#define INT_REMAP_ENTRY_REQEOI_SHIFT    5
-#define INT_REMAP_ENTRY_DM_MASK         0x00000040
-#define INT_REMAP_ENTRY_DM_SHIFT        6
-#define INT_REMAP_ENTRY_DEST_MAST       0x0000FF00
-#define INT_REMAP_ENTRY_DEST_SHIFT      8
-#define INT_REMAP_ENTRY_VECTOR_MASK     0x00FF0000
-#define INT_REMAP_ENTRY_VECTOR_SHIFT    16
-
  #define INV_IOMMU_ALL_PAGES_ADDRESS      ((1ULL << 63) - 1)
  
  #define IOMMU_RING_BUFFER_PTR_MASK                  0x0007FFF0

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 04/12] AMD/IOMMU: pass IOMMU to {get, free, update}_intremap_entry()
Posted by Jan Beulich 4 years, 8 months ago
The functions will want to know IOMMU properties (specifically the IRTE
size) subsequently.

Rather than introducing a second error path bogusly returning -E... from
amd_iommu_read_ioapic_from_ire(), also change the existing one to follow
VT-d in returning the raw (untranslated) IO-APIC RTE.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v3: New.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -122,11 +122,11 @@ static unsigned int alloc_intremap_entry
      return slot;
  }
  
-static union irte_ptr get_intremap_entry(unsigned int seg, unsigned int bdf,
-                                         unsigned int index)
+static union irte_ptr get_intremap_entry(const struct amd_iommu *iommu,
+                                         unsigned int bdf, unsigned int index)
  {
      union irte_ptr table = {
-        .ptr = get_ivrs_mappings(seg)[bdf].intremap_table
+        .ptr = get_ivrs_mappings(iommu->seg)[bdf].intremap_table
      };
  
      ASSERT(table.ptr && (index < INTREMAP_ENTRIES));
@@ -136,18 +136,19 @@ static union irte_ptr get_intremap_entry
      return table;
  }
  
-static void free_intremap_entry(unsigned int seg, unsigned int bdf,
-                                unsigned int index)
+static void free_intremap_entry(const struct amd_iommu *iommu,
+                                unsigned int bdf, unsigned int index)
  {
-    union irte_ptr entry = get_intremap_entry(seg, bdf, index);
+    union irte_ptr entry = get_intremap_entry(iommu, bdf, index);
  
      ACCESS_ONCE(entry.ptr32->raw) = 0;
  
-    __clear_bit(index, get_ivrs_mappings(seg)[bdf].intremap_inuse);
+    __clear_bit(index, get_ivrs_mappings(iommu->seg)[bdf].intremap_inuse);
  }
  
-static void update_intremap_entry(union irte_ptr entry, unsigned int vector,
-                                  unsigned int int_type,
+static void update_intremap_entry(const struct amd_iommu *iommu,
+                                  union irte_ptr entry,
+                                  unsigned int vector, unsigned int int_type,
                                    unsigned int dest_mode, unsigned int dest)
  {
      union irte32 irte = {
@@ -212,7 +213,7 @@ static int update_intremap_entry_from_io
          lo_update = 1;
      }
  
-    entry = get_intremap_entry(iommu->seg, req_id, offset);
+    entry = get_intremap_entry(iommu, req_id, offset);
      if ( !lo_update )
      {
          /*
@@ -223,7 +224,7 @@ static int update_intremap_entry_from_io
          vector = entry.ptr32->flds.vector;
          delivery_mode = entry.ptr32->flds.int_type;
      }
-    update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
+    update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
  
      spin_unlock_irqrestore(lock, flags);
  
@@ -288,8 +289,8 @@ int __init amd_iommu_setup_ioapic_remapp
              spin_lock_irqsave(lock, flags);
              offset = alloc_intremap_entry(seg, req_id, 1);
              BUG_ON(offset >= INTREMAP_ENTRIES);
-            entry = get_intremap_entry(iommu->seg, req_id, offset);
-            update_intremap_entry(entry, vector,
+            entry = get_intremap_entry(iommu, req_id, offset);
+            update_intremap_entry(iommu, entry, vector,
                                    delivery_mode, dest_mode, dest);
              spin_unlock_irqrestore(lock, flags);
  
@@ -413,7 +414,7 @@ unsigned int amd_iommu_read_ioapic_from_
  
      idx = ioapic_id_to_index(IO_APIC_ID(apic));
      if ( idx == MAX_IO_APICS )
-        return -EINVAL;
+        return val;
  
      offset = ioapic_sbdf[idx].pin_2_idx[pin];
  
@@ -422,9 +423,13 @@ unsigned int amd_iommu_read_ioapic_from_
          u16 bdf = ioapic_sbdf[idx].bdf;
          u16 seg = ioapic_sbdf[idx].seg;
          u16 req_id = get_intremap_requestor_id(seg, bdf);
-        union irte_ptr entry = get_intremap_entry(seg, req_id, offset);
+        const struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
+        union irte_ptr entry;
  
+        if ( !iommu )
+            return val;
          ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
+        entry = get_intremap_entry(iommu, req_id, offset);
          val &= ~(INTREMAP_ENTRIES - 1);
          val |= MASK_INSR(entry.ptr32->flds.int_type,
                           IO_APIC_REDIR_DELIV_MODE_MASK);
@@ -454,7 +459,7 @@ static int update_intremap_entry_from_ms
          lock = get_intremap_lock(iommu->seg, req_id);
          spin_lock_irqsave(lock, flags);
          for ( i = 0; i < nr; ++i )
-            free_intremap_entry(iommu->seg, req_id, *remap_index + i);
+            free_intremap_entry(iommu, req_id, *remap_index + i);
          spin_unlock_irqrestore(lock, flags);
          goto done;
      }
@@ -479,8 +484,8 @@ static int update_intremap_entry_from_ms
          *remap_index = offset;
      }
  
-    entry = get_intremap_entry(iommu->seg, req_id, offset);
-    update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
+    entry = get_intremap_entry(iommu, req_id, offset);
+    update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
      spin_unlock_irqrestore(lock, flags);
  
      *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
@@ -594,12 +599,13 @@ void amd_iommu_read_msi_from_ire(
      const struct pci_dev *pdev = msi_desc->dev;
      u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
      u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+    const struct amd_iommu *iommu = _find_iommu_for_device(seg, bdf);
      union irte_ptr entry;
  
-    if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
+    if ( IS_ERR_OR_NULL(iommu) )
          return;
  
-    entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
+    entry = get_intremap_entry(iommu, get_dma_requestor_id(seg, bdf), offset);
  
      if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
      {

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 05/12] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format
Posted by Jan Beulich 4 years, 8 months ago
This is in preparation of actually enabling x2APIC mode, which requires
this wider IRTE format to be used.

A specific remark regarding the first hunk changing
amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
tables when creating new one"). Other code introduced by that change has
meanwhile disappeared or further changed, and I wonder if - rather than
adding an x2apic_enabled check to the conditional - the bypass couldn't
be deleted altogether. For now the goal is to affect the non-x2APIC
paths as little as possible.

Take the liberty and use the new "fresh" flag to suppress an unneeded
flush in update_intremap_entry_from_ioapic().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Re-base. Do away with standalone struct irte_full. Use smp_wmb().
v3: Avoid unrelated type changes in update_intremap_entry_from_ioapic().
     Drop irte_mode enum and variable. Convert INTREMAP_TABLE_ORDER into
     a static helper. Comment barrier() uses. Switch boolean bitfields to
     bool.
v2: Add cast in get_full_dest(). Re-base over changes earlier in the
     series. Don't use cmpxchg16b. Use barrier() instead of wmb().
---
Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming)
mode is unavailable in x2APIC mode, but they've confirmed this to be a
mistake on their part.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -39,12 +39,36 @@ union irte32 {
      } flds;
  };
  
+union irte128 {
+    uint64_t raw[2];
+    struct {
+        bool remap_en:1;
+        bool sup_io_pf:1;
+        unsigned int int_type:3;
+        bool rq_eoi:1;
+        bool dm:1;
+        bool guest_mode:1; /* MBZ */
+        unsigned int dest_lo:24;
+        unsigned int :32;
+        unsigned int vector:8;
+        unsigned int :24;
+        unsigned int :24;
+        unsigned int dest_hi:8;
+    } full;
+};
+
  union irte_ptr {
      void *ptr;
      union irte32 *ptr32;
+    union irte128 *ptr128;
  };
  
-#define INTREMAP_TABLE_ORDER    1
+union irte_cptr {
+    const void *ptr;
+    const union irte32 *ptr32;
+    const union irte128 *ptr128;
+} __transparent__;
+
  #define INTREMAP_LENGTH 0xB
  #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
  
@@ -57,6 +81,13 @@ unsigned int nr_ioapic_sbdf;
  
  static void dump_intremap_tables(unsigned char key);
  
+static unsigned int __init intremap_table_order(const struct amd_iommu *iommu)
+{
+    return iommu->ctrl.ga_en
+           ? get_order_from_bytes(INTREMAP_ENTRIES * sizeof(union irte128))
+           : get_order_from_bytes(INTREMAP_ENTRIES * sizeof(union irte32));
+}
+
  unsigned int ioapic_id_to_index(unsigned int apic_id)
  {
      unsigned int idx;
@@ -131,7 +162,10 @@ static union irte_ptr get_intremap_entry
  
      ASSERT(table.ptr && (index < INTREMAP_ENTRIES));
  
-    table.ptr32 += index;
+    if ( iommu->ctrl.ga_en )
+        table.ptr128 += index;
+    else
+        table.ptr32 += index;
  
      return table;
  }
@@ -141,7 +175,22 @@ static void free_intremap_entry(const st
  {
      union irte_ptr entry = get_intremap_entry(iommu, bdf, index);
  
-    ACCESS_ONCE(entry.ptr32->raw) = 0;
+    if ( iommu->ctrl.ga_en )
+    {
+        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
+        /*
+         * Low half (containing RemapEn) needs to be cleared first.  Note that
+         * strictly speaking smp_wmb() isn't enough, as conceptually it expands
+         * to just barrier() when !CONFIG_SMP.  But wmb() would be more than we
+         * need, since the IOMMU is a cache-coherent entity on the bus.  And
+         * given that we don't allow CONFIG_SMP to be turned off, the SMP
+         * variant will do.
+         */
+        smp_wmb();
+        entry.ptr128->raw[1] = 0;
+    }
+    else
+        ACCESS_ONCE(entry.ptr32->raw) = 0;
  
      __clear_bit(index, get_ivrs_mappings(iommu->seg)[bdf].intremap_inuse);
  }
@@ -151,17 +200,44 @@ static void update_intremap_entry(const
                                    unsigned int vector, unsigned int int_type,
                                    unsigned int dest_mode, unsigned int dest)
  {
-    union irte32 irte = {
-        .flds = {
-            .remap_en = true,
-            .int_type = int_type,
-            .dm = dest_mode,
-            .dest = dest,
-            .vector = vector,
-        },
-    };
+    if ( iommu->ctrl.ga_en )
+    {
+        union irte128 irte = {
+            .full = {
+                .remap_en = true,
+                .int_type = int_type,
+                .dm = dest_mode,
+                .dest_lo = dest,
+                .dest_hi = dest >> 24,
+                .vector = vector,
+            },
+        };
+
+        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
+        /*
+         * Low half, in particular RemapEn, needs to be cleared first.  See
+         * comment in free_intremap_entry() regarding the choice of barrier.
+         */
+        smp_wmb();
+        entry.ptr128->raw[1] = irte.raw[1];
+        /* High half needs to be set before low one (containing RemapEn). */
+        smp_wmb();
+        ACCESS_ONCE(entry.ptr128->raw[0]) = irte.raw[0];
+    }
+    else
+    {
+        union irte32 irte = {
+            .flds = {
+                .remap_en = true,
+                .int_type = int_type,
+                .dm = dest_mode,
+                .dest = dest,
+                .vector = vector,
+            },
+        };
  
-    ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
+        ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
+    }
  }
  
  static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
@@ -175,6 +251,11 @@ static inline void set_rte_index(struct
      rte->delivery_mode = offset >> 8;
  }
  
+static inline unsigned int get_full_dest(const union irte128 *entry)
+{
+    return entry->full.dest_lo | ((unsigned int)entry->full.dest_hi << 24);
+}
+
  static int update_intremap_entry_from_ioapic(
      int bdf,
      struct amd_iommu *iommu,
@@ -184,10 +265,11 @@ static int update_intremap_entry_from_io
  {
      unsigned long flags;
      union irte_ptr entry;
-    u8 delivery_mode, dest, vector, dest_mode;
+    uint8_t delivery_mode, vector, dest_mode;
      int req_id;
      spinlock_t *lock;
-    unsigned int offset;
+    unsigned int dest, offset;
+    bool fresh = false;
  
      req_id = get_intremap_requestor_id(iommu->seg, bdf);
      lock = get_intremap_lock(iommu->seg, req_id);
@@ -195,7 +277,7 @@ static int update_intremap_entry_from_io
      delivery_mode = rte->delivery_mode;
      vector = rte->vector;
      dest_mode = rte->dest_mode;
-    dest = rte->dest.logical.logical_dest;
+    dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest;
  
      spin_lock_irqsave(lock, flags);
  
@@ -210,25 +292,40 @@ static int update_intremap_entry_from_io
              return -ENOSPC;
          }
          *index = offset;
-        lo_update = 1;
+        fresh = true;
      }
  
      entry = get_intremap_entry(iommu, req_id, offset);
-    if ( !lo_update )
+    if ( fresh )
+        /* nothing */;
+    else if ( !lo_update )
      {
          /*
           * Low half of incoming RTE is already in remapped format,
           * so need to recover vector and delivery mode from IRTE.
           */
          ASSERT(get_rte_index(rte) == offset);
-        vector = entry.ptr32->flds.vector;
+        if ( iommu->ctrl.ga_en )
+            vector = entry.ptr128->full.vector;
+        else
+            vector = entry.ptr32->flds.vector;
+        /* The IntType fields match for both formats. */
          delivery_mode = entry.ptr32->flds.int_type;
      }
+    else if ( x2apic_enabled )
+    {
+        /*
+         * High half of incoming RTE was read from the I/O APIC and hence may
+         * not hold the full destination, so need to recover full destination
+         * from IRTE.
+         */
+        dest = get_full_dest(entry.ptr128);
+    }
      update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
  
      spin_unlock_irqrestore(lock, flags);
  
-    if ( iommu->enabled )
+    if ( iommu->enabled && !fresh )
      {
          spin_lock_irqsave(&iommu->lock, flags);
          amd_iommu_flush_intremap(iommu, req_id);
@@ -286,6 +383,18 @@ int __init amd_iommu_setup_ioapic_remapp
              dest_mode = rte.dest_mode;
              dest = rte.dest.logical.logical_dest;
  
+            if ( iommu->ctrl.xt_en )
+            {
+                /*
+                 * In x2APIC mode we have no way of discovering the high 24
+                 * bits of the destination of an already enabled interrupt.
+                 * We come here earlier than for xAPIC mode, so no interrupts
+                 * should have been set up before.
+                 */
+                AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC mode\n",
+                                IO_APIC_ID(apic), pin);
+            }
+
              spin_lock_irqsave(lock, flags);
              offset = alloc_intremap_entry(seg, req_id, 1);
              BUG_ON(offset >= INTREMAP_ENTRIES);
@@ -320,7 +429,8 @@ void amd_iommu_ioapic_update_ire(
      struct IO_APIC_route_entry new_rte = { 0 };
      unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
      unsigned int pin = (reg - 0x10) / 2;
-    int saved_mask, seg, bdf, rc;
+    int seg, bdf, rc;
+    bool saved_mask, fresh = false;
      struct amd_iommu *iommu;
      unsigned int idx;
  
@@ -362,12 +472,22 @@ void amd_iommu_ioapic_update_ire(
          *(((u32 *)&new_rte) + 1) = value;
      }
  
-    if ( new_rte.mask &&
-         ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
+    if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
      {
          ASSERT(saved_mask);
-        __io_apic_write(apic, reg, value);
-        return;
+
+        /*
+         * There's nowhere except the IRTE to store a full 32-bit destination,
+         * so we may not bypass entry allocation and updating of the low RTE
+         * half in the (usual) case of the high RTE half getting written first.
+         */
+        if ( new_rte.mask && !x2apic_enabled )
+        {
+            __io_apic_write(apic, reg, value);
+            return;
+        }
+
+        fresh = true;
      }
  
      /* mask the interrupt while we change the intremap table */
@@ -396,8 +516,12 @@ void amd_iommu_ioapic_update_ire(
      if ( reg == rte_lo )
          return;
  
-    /* unmask the interrupt after we have updated the intremap table */
-    if ( !saved_mask )
+    /*
+     * Unmask the interrupt after we have updated the intremap table. Also
+     * write the low half if a fresh entry was allocated for a high half
+     * update in x2APIC mode.
+     */
+    if ( !saved_mask || (x2apic_enabled && fresh) )
      {
          old_rte.mask = saved_mask;
          __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
@@ -411,31 +535,40 @@ unsigned int amd_iommu_read_ioapic_from_
      unsigned int offset;
      unsigned int val = __io_apic_read(apic, reg);
      unsigned int pin = (reg - 0x10) / 2;
+    uint16_t seg, bdf, req_id;
+    const struct amd_iommu *iommu;
+    union irte_ptr entry;
  
      idx = ioapic_id_to_index(IO_APIC_ID(apic));
      if ( idx == MAX_IO_APICS )
          return val;
  
      offset = ioapic_sbdf[idx].pin_2_idx[pin];
+    if ( offset >= INTREMAP_ENTRIES )
+        return val;
  
-    if ( !(reg & 1) && offset < INTREMAP_ENTRIES )
-    {
-        u16 bdf = ioapic_sbdf[idx].bdf;
-        u16 seg = ioapic_sbdf[idx].seg;
-        u16 req_id = get_intremap_requestor_id(seg, bdf);
-        const struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
-        union irte_ptr entry;
+    seg = ioapic_sbdf[idx].seg;
+    bdf = ioapic_sbdf[idx].bdf;
+    iommu = find_iommu_for_device(seg, bdf);
+    if ( !iommu )
+        return val;
+    req_id = get_intremap_requestor_id(seg, bdf);
+    entry = get_intremap_entry(iommu, req_id, offset);
  
-        if ( !iommu )
-            return val;
+    if ( !(reg & 1) )
+    {
          ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
-        entry = get_intremap_entry(iommu, req_id, offset);
          val &= ~(INTREMAP_ENTRIES - 1);
+        /* The IntType fields match for both formats. */
          val |= MASK_INSR(entry.ptr32->flds.int_type,
                           IO_APIC_REDIR_DELIV_MODE_MASK);
-        val |= MASK_INSR(entry.ptr32->flds.vector,
+        val |= MASK_INSR(iommu->ctrl.ga_en
+                         ? entry.ptr128->full.vector
+                         : entry.ptr32->flds.vector,
                           IO_APIC_REDIR_VECTOR_MASK);
      }
+    else if ( x2apic_enabled )
+        val = get_full_dest(entry.ptr128);
  
      return val;
  }
@@ -447,9 +580,9 @@ static int update_intremap_entry_from_ms
      unsigned long flags;
      union irte_ptr entry;
      u16 req_id, alias_id;
-    u8 delivery_mode, dest, vector, dest_mode;
+    uint8_t delivery_mode, vector, dest_mode;
      spinlock_t *lock;
-    unsigned int offset, i;
+    unsigned int dest, offset, i;
  
      req_id = get_dma_requestor_id(iommu->seg, bdf);
      alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -470,7 +603,12 @@ static int update_intremap_entry_from_ms
      dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
      delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
      vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
-    dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
+
+    if ( x2apic_enabled )
+        dest = msg->dest32;
+    else
+        dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK);
+
      offset = *remap_index;
      if ( offset >= INTREMAP_ENTRIES )
      {
@@ -616,10 +754,21 @@ void amd_iommu_read_msi_from_ire(
      }
  
      msg->data &= ~(INTREMAP_ENTRIES - 1);
+    /* The IntType fields match for both formats. */
      msg->data |= MASK_INSR(entry.ptr32->flds.int_type,
                             MSI_DATA_DELIVERY_MODE_MASK);
-    msg->data |= MASK_INSR(entry.ptr32->flds.vector,
-                           MSI_DATA_VECTOR_MASK);
+    if ( iommu->ctrl.ga_en )
+    {
+        msg->data |= MASK_INSR(entry.ptr128->full.vector,
+                               MSI_DATA_VECTOR_MASK);
+        msg->dest32 = get_full_dest(entry.ptr128);
+    }
+    else
+    {
+        msg->data |= MASK_INSR(entry.ptr32->flds.vector,
+                               MSI_DATA_VECTOR_MASK);
+        msg->dest32 = entry.ptr32->flds.dest;
+    }
  }
  
  int __init amd_iommu_free_intremap_table(
@@ -631,7 +780,7 @@ int __init amd_iommu_free_intremap_table
  
      if ( tb )
      {
-        __free_amd_iommu_tables(tb, INTREMAP_TABLE_ORDER);
+        __free_amd_iommu_tables(tb, intremap_table_order(iommu));
          ivrs_mapping->intremap_table = NULL;
      }
  
@@ -641,10 +790,10 @@ int __init amd_iommu_free_intremap_table
  void *__init amd_iommu_alloc_intremap_table(
      const struct amd_iommu *iommu, unsigned long **inuse_map)
  {
-    void *tb;
-    tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
+    void *tb = __alloc_amd_iommu_tables(intremap_table_order(iommu));
+
      BUG_ON(tb == NULL);
-    memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+    memset(tb, 0, PAGE_SIZE << intremap_table_order(iommu));
      *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
      BUG_ON(*inuse_map == NULL);
      return tb;
@@ -685,18 +834,29 @@ int __init amd_setup_hpet_msi(struct msi
      return rc;
  }
  
-static void dump_intremap_table(const u32 *table)
+static void dump_intremap_table(const struct amd_iommu *iommu,
+                                union irte_cptr tbl)
  {
-    u32 count;
+    unsigned int count;
  
-    if ( !table )
+    if ( !tbl.ptr )
          return;
  
      for ( count = 0; count < INTREMAP_ENTRIES; count++ )
      {
-        if ( !table[count] )
-            continue;
-        printk("    IRTE[%03x] %08x\n", count, table[count]);
+        if ( iommu->ctrl.ga_en )
+        {
+            if ( !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1] )
+                continue;
+            printk("    IRTE[%03x] %016lx_%016lx\n",
+                   count, tbl.ptr128[count].raw[1], tbl.ptr128[count].raw[0]);
+        }
+        else
+        {
+            if ( !tbl.ptr32[count].raw )
+                continue;
+            printk("    IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw);
+        }
      }
  }
  
@@ -714,7 +874,7 @@ static int dump_intremap_mapping(const s
             PCI_FUNC(ivrs_mapping->dte_requestor_id));
  
      spin_lock_irqsave(&(ivrs_mapping->intremap_lock), flags);
-    dump_intremap_table(ivrs_mapping->intremap_table);
+    dump_intremap_table(iommu, ivrs_mapping->intremap_table);
      spin_unlock_irqrestore(&(ivrs_mapping->intremap_lock), flags);
  
      process_pending_softirqs();
@@ -733,6 +893,8 @@ static void dump_intremap_tables(unsigne
      printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
  
      spin_lock_irqsave(&shared_intremap_lock, flags);
-    dump_intremap_table(shared_intremap_table);
+    dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
+                                         list),
+                        shared_intremap_table);
      spin_unlock_irqrestore(&shared_intremap_lock, flags);
  }

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 05/12] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format
Posted by Woods, Brian 4 years, 8 months ago
On Thu, Jul 25, 2019 at 01:31:02PM +0000, Jan Beulich wrote:
> This is in preparation of actually enabling x2APIC mode, which requires
> this wider IRTE format to be used.
> 
> A specific remark regarding the first hunk changing
> amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
> i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
> tables when creating new one"). Other code introduced by that change has
> meanwhile disappeared or further changed, and I wonder if - rather than
> adding an x2apic_enabled check to the conditional - the bypass couldn't
> be deleted altogether. For now the goal is to affect the non-x2APIC
> paths as little as possible.
> 
> Take the liberty and use the new "fresh" flag to suppress an unneeded
> flush in update_intremap_entry_from_ioapic().
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Brian Woods <brian.woods@amd.com>

> ---
> v4: Re-base. Do away with standalone struct irte_full. Use smp_wmb().
> v3: Avoid unrelated type changes in update_intremap_entry_from_ioapic().
>      Drop irte_mode enum and variable. Convert INTREMAP_TABLE_ORDER into
>      a static helper. Comment barrier() uses. Switch boolean bitfields to
>      bool.
> v2: Add cast in get_full_dest(). Re-base over changes earlier in the
>      series. Don't use cmpxchg16b. Use barrier() instead of wmb().
> ---
> Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming)
> mode is unavailable in x2APIC mode, but they've confirmed this to be a
> mistake on their part.
> 
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -39,12 +39,36 @@ union irte32 {
>       } flds;
>   };
>   
> +union irte128 {
> +    uint64_t raw[2];
> +    struct {
> +        bool remap_en:1;
> +        bool sup_io_pf:1;
> +        unsigned int int_type:3;
> +        bool rq_eoi:1;
> +        bool dm:1;
> +        bool guest_mode:1; /* MBZ */
> +        unsigned int dest_lo:24;
> +        unsigned int :32;
> +        unsigned int vector:8;
> +        unsigned int :24;
> +        unsigned int :24;
> +        unsigned int dest_hi:8;
> +    } full;
> +};
> +
>   union irte_ptr {
>       void *ptr;
>       union irte32 *ptr32;
> +    union irte128 *ptr128;
>   };
>   
> -#define INTREMAP_TABLE_ORDER    1
> +union irte_cptr {
> +    const void *ptr;
> +    const union irte32 *ptr32;
> +    const union irte128 *ptr128;
> +} __transparent__;
> +
>   #define INTREMAP_LENGTH 0xB
>   #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
>   
> @@ -57,6 +81,13 @@ unsigned int nr_ioapic_sbdf;
>   
>   static void dump_intremap_tables(unsigned char key);
>   
> +static unsigned int __init intremap_table_order(const struct amd_iommu *iommu)
> +{
> +    return iommu->ctrl.ga_en
> +           ? get_order_from_bytes(INTREMAP_ENTRIES * sizeof(union irte128))
> +           : get_order_from_bytes(INTREMAP_ENTRIES * sizeof(union irte32));
> +}
> +
>   unsigned int ioapic_id_to_index(unsigned int apic_id)
>   {
>       unsigned int idx;
> @@ -131,7 +162,10 @@ static union irte_ptr get_intremap_entry
>   
>       ASSERT(table.ptr && (index < INTREMAP_ENTRIES));
>   
> -    table.ptr32 += index;
> +    if ( iommu->ctrl.ga_en )
> +        table.ptr128 += index;
> +    else
> +        table.ptr32 += index;
>   
>       return table;
>   }
> @@ -141,7 +175,22 @@ static void free_intremap_entry(const st
>   {
>       union irte_ptr entry = get_intremap_entry(iommu, bdf, index);
>   
> -    ACCESS_ONCE(entry.ptr32->raw) = 0;
> +    if ( iommu->ctrl.ga_en )
> +    {
> +        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
> +        /*
> +         * Low half (containing RemapEn) needs to be cleared first.  Note that
> +         * strictly speaking smp_wmb() isn't enough, as conceptually it expands
> +         * to just barrier() when !CONFIG_SMP.  But wmb() would be more than we
> +         * need, since the IOMMU is a cache-coherent entity on the bus.  And
> +         * given that we don't allow CONFIG_SMP to be turned off, the SMP
> +         * variant will do.
> +         */
> +        smp_wmb();
> +        entry.ptr128->raw[1] = 0;
> +    }
> +    else
> +        ACCESS_ONCE(entry.ptr32->raw) = 0;
>   
>       __clear_bit(index, get_ivrs_mappings(iommu->seg)[bdf].intremap_inuse);
>   }
> @@ -151,17 +200,44 @@ static void update_intremap_entry(const
>                                     unsigned int vector, unsigned int int_type,
>                                     unsigned int dest_mode, unsigned int dest)
>   {
> -    union irte32 irte = {
> -        .flds = {
> -            .remap_en = true,
> -            .int_type = int_type,
> -            .dm = dest_mode,
> -            .dest = dest,
> -            .vector = vector,
> -        },
> -    };
> +    if ( iommu->ctrl.ga_en )
> +    {
> +        union irte128 irte = {
> +            .full = {
> +                .remap_en = true,
> +                .int_type = int_type,
> +                .dm = dest_mode,
> +                .dest_lo = dest,
> +                .dest_hi = dest >> 24,
> +                .vector = vector,
> +            },
> +        };
> +
> +        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
> +        /*
> +         * Low half, in particular RemapEn, needs to be cleared first.  See
> +         * comment in free_intremap_entry() regarding the choice of barrier.
> +         */
> +        smp_wmb();
> +        entry.ptr128->raw[1] = irte.raw[1];
> +        /* High half needs to be set before low one (containing RemapEn). */
> +        smp_wmb();
> +        ACCESS_ONCE(entry.ptr128->raw[0]) = irte.raw[0];
> +    }
> +    else
> +    {
> +        union irte32 irte = {
> +            .flds = {
> +                .remap_en = true,
> +                .int_type = int_type,
> +                .dm = dest_mode,
> +                .dest = dest,
> +                .vector = vector,
> +            },
> +        };
>   
> -    ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
> +        ACCESS_ONCE(entry.ptr32->raw) = irte.raw;
> +    }
>   }
>   
>   static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
> @@ -175,6 +251,11 @@ static inline void set_rte_index(struct
>       rte->delivery_mode = offset >> 8;
>   }
>   
> +static inline unsigned int get_full_dest(const union irte128 *entry)
> +{
> +    return entry->full.dest_lo | ((unsigned int)entry->full.dest_hi << 24);
> +}
> +
>   static int update_intremap_entry_from_ioapic(
>       int bdf,
>       struct amd_iommu *iommu,
> @@ -184,10 +265,11 @@ static int update_intremap_entry_from_io
>   {
>       unsigned long flags;
>       union irte_ptr entry;
> -    u8 delivery_mode, dest, vector, dest_mode;
> +    uint8_t delivery_mode, vector, dest_mode;
>       int req_id;
>       spinlock_t *lock;
> -    unsigned int offset;
> +    unsigned int dest, offset;
> +    bool fresh = false;
>   
>       req_id = get_intremap_requestor_id(iommu->seg, bdf);
>       lock = get_intremap_lock(iommu->seg, req_id);
> @@ -195,7 +277,7 @@ static int update_intremap_entry_from_io
>       delivery_mode = rte->delivery_mode;
>       vector = rte->vector;
>       dest_mode = rte->dest_mode;
> -    dest = rte->dest.logical.logical_dest;
> +    dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest;
>   
>       spin_lock_irqsave(lock, flags);
>   
> @@ -210,25 +292,40 @@ static int update_intremap_entry_from_io
>               return -ENOSPC;
>           }
>           *index = offset;
> -        lo_update = 1;
> +        fresh = true;
>       }
>   
>       entry = get_intremap_entry(iommu, req_id, offset);
> -    if ( !lo_update )
> +    if ( fresh )
> +        /* nothing */;
> +    else if ( !lo_update )
>       {
>           /*
>            * Low half of incoming RTE is already in remapped format,
>            * so need to recover vector and delivery mode from IRTE.
>            */
>           ASSERT(get_rte_index(rte) == offset);
> -        vector = entry.ptr32->flds.vector;
> +        if ( iommu->ctrl.ga_en )
> +            vector = entry.ptr128->full.vector;
> +        else
> +            vector = entry.ptr32->flds.vector;
> +        /* The IntType fields match for both formats. */
>           delivery_mode = entry.ptr32->flds.int_type;
>       }
> +    else if ( x2apic_enabled )
> +    {
> +        /*
> +         * High half of incoming RTE was read from the I/O APIC and hence may
> +         * not hold the full destination, so need to recover full destination
> +         * from IRTE.
> +         */
> +        dest = get_full_dest(entry.ptr128);
> +    }
>       update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
>   
>       spin_unlock_irqrestore(lock, flags);
>   
> -    if ( iommu->enabled )
> +    if ( iommu->enabled && !fresh )
>       {
>           spin_lock_irqsave(&iommu->lock, flags);
>           amd_iommu_flush_intremap(iommu, req_id);
> @@ -286,6 +383,18 @@ int __init amd_iommu_setup_ioapic_remapp
>               dest_mode = rte.dest_mode;
>               dest = rte.dest.logical.logical_dest;
>   
> +            if ( iommu->ctrl.xt_en )
> +            {
> +                /*
> +                 * In x2APIC mode we have no way of discovering the high 24
> +                 * bits of the destination of an already enabled interrupt.
> +                 * We come here earlier than for xAPIC mode, so no interrupts
> +                 * should have been set up before.
> +                 */
> +                AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC mode\n",
> +                                IO_APIC_ID(apic), pin);
> +            }
> +
>               spin_lock_irqsave(lock, flags);
>               offset = alloc_intremap_entry(seg, req_id, 1);
>               BUG_ON(offset >= INTREMAP_ENTRIES);
> @@ -320,7 +429,8 @@ void amd_iommu_ioapic_update_ire(
>       struct IO_APIC_route_entry new_rte = { 0 };
>       unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
>       unsigned int pin = (reg - 0x10) / 2;
> -    int saved_mask, seg, bdf, rc;
> +    int seg, bdf, rc;
> +    bool saved_mask, fresh = false;
>       struct amd_iommu *iommu;
>       unsigned int idx;
>   
> @@ -362,12 +472,22 @@ void amd_iommu_ioapic_update_ire(
>           *(((u32 *)&new_rte) + 1) = value;
>       }
>   
> -    if ( new_rte.mask &&
> -         ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
> +    if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
>       {
>           ASSERT(saved_mask);
> -        __io_apic_write(apic, reg, value);
> -        return;
> +
> +        /*
> +         * There's nowhere except the IRTE to store a full 32-bit destination,
> +         * so we may not bypass entry allocation and updating of the low RTE
> +         * half in the (usual) case of the high RTE half getting written first.
> +         */
> +        if ( new_rte.mask && !x2apic_enabled )
> +        {
> +            __io_apic_write(apic, reg, value);
> +            return;
> +        }
> +
> +        fresh = true;
>       }
>   
>       /* mask the interrupt while we change the intremap table */
> @@ -396,8 +516,12 @@ void amd_iommu_ioapic_update_ire(
>       if ( reg == rte_lo )
>           return;
>   
> -    /* unmask the interrupt after we have updated the intremap table */
> -    if ( !saved_mask )
> +    /*
> +     * Unmask the interrupt after we have updated the intremap table. Also
> +     * write the low half if a fresh entry was allocated for a high half
> +     * update in x2APIC mode.
> +     */
> +    if ( !saved_mask || (x2apic_enabled && fresh) )
>       {
>           old_rte.mask = saved_mask;
>           __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
> @@ -411,31 +535,40 @@ unsigned int amd_iommu_read_ioapic_from_
>       unsigned int offset;
>       unsigned int val = __io_apic_read(apic, reg);
>       unsigned int pin = (reg - 0x10) / 2;
> +    uint16_t seg, bdf, req_id;
> +    const struct amd_iommu *iommu;
> +    union irte_ptr entry;
>   
>       idx = ioapic_id_to_index(IO_APIC_ID(apic));
>       if ( idx == MAX_IO_APICS )
>           return val;
>   
>       offset = ioapic_sbdf[idx].pin_2_idx[pin];
> +    if ( offset >= INTREMAP_ENTRIES )
> +        return val;
>   
> -    if ( !(reg & 1) && offset < INTREMAP_ENTRIES )
> -    {
> -        u16 bdf = ioapic_sbdf[idx].bdf;
> -        u16 seg = ioapic_sbdf[idx].seg;
> -        u16 req_id = get_intremap_requestor_id(seg, bdf);
> -        const struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
> -        union irte_ptr entry;
> +    seg = ioapic_sbdf[idx].seg;
> +    bdf = ioapic_sbdf[idx].bdf;
> +    iommu = find_iommu_for_device(seg, bdf);
> +    if ( !iommu )
> +        return val;
> +    req_id = get_intremap_requestor_id(seg, bdf);
> +    entry = get_intremap_entry(iommu, req_id, offset);
>   
> -        if ( !iommu )
> -            return val;
> +    if ( !(reg & 1) )
> +    {
>           ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
> -        entry = get_intremap_entry(iommu, req_id, offset);
>           val &= ~(INTREMAP_ENTRIES - 1);
> +        /* The IntType fields match for both formats. */
>           val |= MASK_INSR(entry.ptr32->flds.int_type,
>                            IO_APIC_REDIR_DELIV_MODE_MASK);
> -        val |= MASK_INSR(entry.ptr32->flds.vector,
> +        val |= MASK_INSR(iommu->ctrl.ga_en
> +                         ? entry.ptr128->full.vector
> +                         : entry.ptr32->flds.vector,
>                            IO_APIC_REDIR_VECTOR_MASK);
>       }
> +    else if ( x2apic_enabled )
> +        val = get_full_dest(entry.ptr128);
>   
>       return val;
>   }
> @@ -447,9 +580,9 @@ static int update_intremap_entry_from_ms
>       unsigned long flags;
>       union irte_ptr entry;
>       u16 req_id, alias_id;
> -    u8 delivery_mode, dest, vector, dest_mode;
> +    uint8_t delivery_mode, vector, dest_mode;
>       spinlock_t *lock;
> -    unsigned int offset, i;
> +    unsigned int dest, offset, i;
>   
>       req_id = get_dma_requestor_id(iommu->seg, bdf);
>       alias_id = get_intremap_requestor_id(iommu->seg, bdf);
> @@ -470,7 +603,12 @@ static int update_intremap_entry_from_ms
>       dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
>       delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
>       vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
> -    dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
> +
> +    if ( x2apic_enabled )
> +        dest = msg->dest32;
> +    else
> +        dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK);
> +
>       offset = *remap_index;
>       if ( offset >= INTREMAP_ENTRIES )
>       {
> @@ -616,10 +754,21 @@ void amd_iommu_read_msi_from_ire(
>       }
>   
>       msg->data &= ~(INTREMAP_ENTRIES - 1);
> +    /* The IntType fields match for both formats. */
>       msg->data |= MASK_INSR(entry.ptr32->flds.int_type,
>                              MSI_DATA_DELIVERY_MODE_MASK);
> -    msg->data |= MASK_INSR(entry.ptr32->flds.vector,
> -                           MSI_DATA_VECTOR_MASK);
> +    if ( iommu->ctrl.ga_en )
> +    {
> +        msg->data |= MASK_INSR(entry.ptr128->full.vector,
> +                               MSI_DATA_VECTOR_MASK);
> +        msg->dest32 = get_full_dest(entry.ptr128);
> +    }
> +    else
> +    {
> +        msg->data |= MASK_INSR(entry.ptr32->flds.vector,
> +                               MSI_DATA_VECTOR_MASK);
> +        msg->dest32 = entry.ptr32->flds.dest;
> +    }
>   }
>   
>   int __init amd_iommu_free_intremap_table(
> @@ -631,7 +780,7 @@ int __init amd_iommu_free_intremap_table
>   
>       if ( tb )
>       {
> -        __free_amd_iommu_tables(tb, INTREMAP_TABLE_ORDER);
> +        __free_amd_iommu_tables(tb, intremap_table_order(iommu));
>           ivrs_mapping->intremap_table = NULL;
>       }
>   
> @@ -641,10 +790,10 @@ int __init amd_iommu_free_intremap_table
>   void *__init amd_iommu_alloc_intremap_table(
>       const struct amd_iommu *iommu, unsigned long **inuse_map)
>   {
> -    void *tb;
> -    tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
> +    void *tb = __alloc_amd_iommu_tables(intremap_table_order(iommu));
> +
>       BUG_ON(tb == NULL);
> -    memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
> +    memset(tb, 0, PAGE_SIZE << intremap_table_order(iommu));
>       *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
>       BUG_ON(*inuse_map == NULL);
>       return tb;
> @@ -685,18 +834,29 @@ int __init amd_setup_hpet_msi(struct msi
>       return rc;
>   }
>   
> -static void dump_intremap_table(const u32 *table)
> +static void dump_intremap_table(const struct amd_iommu *iommu,
> +                                union irte_cptr tbl)
>   {
> -    u32 count;
> +    unsigned int count;
>   
> -    if ( !table )
> +    if ( !tbl.ptr )
>           return;
>   
>       for ( count = 0; count < INTREMAP_ENTRIES; count++ )
>       {
> -        if ( !table[count] )
> -            continue;
> -        printk("    IRTE[%03x] %08x\n", count, table[count]);
> +        if ( iommu->ctrl.ga_en )
> +        {
> +            if ( !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1] )
> +                continue;
> +            printk("    IRTE[%03x] %016lx_%016lx\n",
> +                   count, tbl.ptr128[count].raw[1], tbl.ptr128[count].raw[0]);
> +        }
> +        else
> +        {
> +            if ( !tbl.ptr32[count].raw )
> +                continue;
> +            printk("    IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw);
> +        }
>       }
>   }
>   
> @@ -714,7 +874,7 @@ static int dump_intremap_mapping(const s
>              PCI_FUNC(ivrs_mapping->dte_requestor_id));
>   
>       spin_lock_irqsave(&(ivrs_mapping->intremap_lock), flags);
> -    dump_intremap_table(ivrs_mapping->intremap_table);
> +    dump_intremap_table(iommu, ivrs_mapping->intremap_table);
>       spin_unlock_irqrestore(&(ivrs_mapping->intremap_lock), flags);
>   
>       process_pending_softirqs();
> @@ -733,6 +893,8 @@ static void dump_intremap_tables(unsigne
>       printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
>   
>       spin_lock_irqsave(&shared_intremap_lock, flags);
> -    dump_intremap_table(shared_intremap_table);
> +    dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
> +                                         list),
> +                        shared_intremap_table);
>       spin_unlock_irqrestore(&shared_intremap_lock, flags);
>   }
> 

-- 
Brian Woods

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 05/12] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format
Posted by Andrew Cooper 4 years, 8 months ago
On 25/07/2019 14:31, Jan Beulich wrote:
> This is in preparation of actually enabling x2APIC mode, which requires
> this wider IRTE format to be used.
>
> A specific remark regarding the first hunk changing
> amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
> i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
> tables when creating new one"). Other code introduced by that change has
> meanwhile disappeared or further changed, and I wonder if - rather than
> adding an x2apic_enabled check to the conditional - the bypass couldn't
> be deleted altogether. For now the goal is to affect the non-x2APIC
> paths as little as possible.
>
> Take the liberty and use the new "fresh" flag to suppress an unneeded
> flush in update_intremap_entry_from_ioapic().
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 06/12] AMD/IOMMU: split amd_iommu_init_one()
Posted by Jan Beulich 4 years, 8 months ago
Mapping the MMIO space and obtaining feature information needs to happen
slightly earlier, such that for x2APIC support we can set XTEn prior to
calling amd_iommu_update_ivrs_mapping_acpi() and
amd_iommu_setup_ioapic_remapping().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -969,14 +969,6 @@ static void * __init allocate_ppr_log(st
  
  static int __init amd_iommu_init_one(struct amd_iommu *iommu)
  {
-    if ( map_iommu_mmio_region(iommu) != 0 )
-        goto error_out;
-
-    get_iommu_features(iommu);
-
-    if ( iommu->features.raw )
-        iommuv2_enabled = 1;
-
      if ( allocate_cmd_buffer(iommu) == NULL )
          goto error_out;
  
@@ -1201,6 +1193,23 @@ static bool_t __init amd_sp5100_erratum2
      return 0;
  }
  
+static int __init amd_iommu_prepare_one(struct amd_iommu *iommu)
+{
+    int rc = alloc_ivrs_mappings(iommu->seg);
+
+    if ( !rc )
+        rc = map_iommu_mmio_region(iommu);
+    if ( rc )
+        return rc;
+
+    get_iommu_features(iommu);
+
+    if ( iommu->features.raw )
+        iommuv2_enabled = true;
+
+    return 0;
+}
+
  int __init amd_iommu_init(void)
  {
      struct amd_iommu *iommu;
@@ -1231,7 +1240,7 @@ int __init amd_iommu_init(void)
      radix_tree_init(&ivrs_maps);
      for_each_amd_iommu ( iommu )
      {
-        rc = alloc_ivrs_mappings(iommu->seg);
+        rc = amd_iommu_prepare_one(iommu);
          if ( rc )
              goto error_out;
      }

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 07/12] AMD/IOMMU: allow enabling with IRQ not yet set up
Posted by Jan Beulich 4 years, 8 months ago
Early enabling (to enter x2APIC mode) requires deferring of the IRQ
setup. Code to actually do that setup in the x2APIC case will get added
subsequently.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v3: Re-base.

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -813,7 +813,6 @@ static void amd_iommu_erratum_746_workar
  static void enable_iommu(struct amd_iommu *iommu)
  {
      unsigned long flags;
-    struct irq_desc *desc;
  
      spin_lock_irqsave(&iommu->lock, flags);
  
@@ -833,19 +832,27 @@ static void enable_iommu(struct amd_iomm
      if ( iommu->features.flds.ppr_sup )
          register_iommu_ppr_log_in_mmio_space(iommu);
  
-    desc = irq_to_desc(iommu->msi.irq);
-    spin_lock(&desc->lock);
-    set_msi_affinity(desc, NULL);
-    spin_unlock(&desc->lock);
+    if ( iommu->msi.irq > 0 )
+    {
+        struct irq_desc *desc = irq_to_desc(iommu->msi.irq);
+
+        spin_lock(&desc->lock);
+        set_msi_affinity(desc, NULL);
+        spin_unlock(&desc->lock);
+    }
  
      amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
  
      set_iommu_ht_flags(iommu);
      set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
-    set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
  
-    if ( iommu->features.flds.ppr_sup )
-        set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED);
+    if ( iommu->msi.irq > 0 )
+    {
+        set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
+
+        if ( iommu->features.flds.ppr_sup )
+            set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED);
+    }
  
      if ( iommu->features.flds.gt_sup )
          set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_ENABLED);

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 08/12] AMD/IOMMU: adjust setup of internal interrupt for x2APIC mode
Posted by Jan Beulich 4 years, 8 months ago
In order to be able to express all possible destinations we need to make
use of this non-MSI-capability based mechanism. The new IRQ controller
structure can re-use certain MSI functions, though.

For now general and PPR interrupts still share a single vector, IRQ, and
hence handler.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v4: Re-base.
v3: Re-base.

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -472,6 +472,44 @@ static hw_irq_controller iommu_maskable_
      .set_affinity = set_msi_affinity,
  };
  
+static void set_x2apic_affinity(struct irq_desc *desc, const cpumask_t *mask)
+{
+    struct amd_iommu *iommu = desc->action->dev_id;
+    unsigned int dest = set_desc_affinity(desc, mask);
+    union amd_iommu_x2apic_control ctrl = {};
+    unsigned long flags;
+
+    if ( dest == BAD_APICID )
+        return;
+
+    msi_compose_msg(desc->arch.vector, NULL, &iommu->msi.msg);
+    iommu->msi.msg.dest32 = dest;
+
+    ctrl.dest_mode = MASK_EXTR(iommu->msi.msg.address_lo,
+                               MSI_ADDR_DESTMODE_MASK);
+    ctrl.int_type = MASK_EXTR(iommu->msi.msg.data,
+                              MSI_DATA_DELIVERY_MODE_MASK);
+    ctrl.vector = desc->arch.vector;
+    ctrl.dest_lo = dest;
+    ctrl.dest_hi = dest >> 24;
+
+    spin_lock_irqsave(&iommu->lock, flags);
+    writeq(ctrl.raw, iommu->mmio_base + IOMMU_XT_INT_CTRL_MMIO_OFFSET);
+    writeq(ctrl.raw, iommu->mmio_base + IOMMU_XT_PPR_INT_CTRL_MMIO_OFFSET);
+    spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static hw_irq_controller iommu_x2apic_type = {
+    .typename     = "IOMMU-x2APIC",
+    .startup      = irq_startup_none,
+    .shutdown     = irq_shutdown_none,
+    .enable       = irq_enable_none,
+    .disable      = irq_disable_none,
+    .ack          = ack_nonmaskable_msi_irq,
+    .end          = end_nonmaskable_msi_irq,
+    .set_affinity = set_x2apic_affinity,
+};
+
  static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
  {
      u16 domain_id, device_id, flags;
@@ -726,8 +764,6 @@ static void iommu_interrupt_handler(int
  static bool_t __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
  {
      int irq, ret;
-    hw_irq_controller *handler;
-    u16 control;
  
      irq = create_irq(NUMA_NO_NODE);
      if ( irq <= 0 )
@@ -747,19 +783,42 @@ static bool_t __init set_iommu_interrupt
                          PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
          return 0;
      }
-    control = pci_conf_read16(PCI_SBDF2(iommu->seg, iommu->bdf),
-                              iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS);
-    iommu->msi.msi.nvec = 1;
-    if ( is_mask_bit_support(control) )
-    {
-        iommu->msi.msi_attrib.maskbit = 1;
-        iommu->msi.msi.mpos = msi_mask_bits_reg(iommu->msi.msi_attrib.pos,
-                                                is_64bit_address(control));
-        handler = &iommu_maskable_msi_type;
+
+    if ( iommu->ctrl.int_cap_xt_en )
+    {
+        struct irq_desc *desc = irq_to_desc(irq);
+
+        iommu->msi.msi_attrib.pos = MSI_TYPE_IOMMU;
+        iommu->msi.msi_attrib.maskbit = 0;
+        iommu->msi.msi_attrib.is_64 = 1;
+
+        desc->msi_desc = &iommu->msi;
+        desc->handler = &iommu_x2apic_type;
+
+        ret = 0;
      }
      else
-        handler = &iommu_msi_type;
-    ret = __setup_msi_irq(irq_to_desc(irq), &iommu->msi, handler);
+    {
+        hw_irq_controller *handler;
+        u16 control;
+
+        control = pci_conf_read16(PCI_SBDF2(iommu->seg, iommu->bdf),
+                                  iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS);
+
+        iommu->msi.msi.nvec = 1;
+        if ( is_mask_bit_support(control) )
+        {
+            iommu->msi.msi_attrib.maskbit = 1;
+            iommu->msi.msi.mpos = msi_mask_bits_reg(iommu->msi.msi_attrib.pos,
+                                                    is_64bit_address(control));
+            handler = &iommu_maskable_msi_type;
+        }
+        else
+            handler = &iommu_msi_type;
+
+        ret = __setup_msi_irq(irq_to_desc(irq), &iommu->msi, handler);
+    }
+
      if ( !ret )
          ret = request_irq(irq, 0, iommu_interrupt_handler, "amd_iommu", iommu);
      if ( ret )
@@ -837,8 +896,19 @@ static void enable_iommu(struct amd_iomm
          struct irq_desc *desc = irq_to_desc(iommu->msi.irq);
  
          spin_lock(&desc->lock);
-        set_msi_affinity(desc, NULL);
-        spin_unlock(&desc->lock);
+
+        if ( iommu->ctrl.int_cap_xt_en )
+        {
+            set_x2apic_affinity(desc, NULL);
+            spin_unlock(&desc->lock);
+        }
+        else
+        {
+            set_msi_affinity(desc, NULL);
+            spin_unlock(&desc->lock);
+
+            amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
+        }
      }
  
      amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
@@ -878,7 +948,9 @@ static void disable_iommu(struct amd_iom
          return;
      }
  
-    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
+    if ( !iommu->ctrl.int_cap_xt_en )
+        amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
+
      set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_DISABLED);
      set_iommu_event_log_control(iommu, IOMMU_CONTROL_DISABLED);
  
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -416,6 +416,25 @@ union amd_iommu_ext_features {
      } flds;
  };
  
+/* x2APIC Control Registers */
+#define IOMMU_XT_INT_CTRL_MMIO_OFFSET		0x0170
+#define IOMMU_XT_PPR_INT_CTRL_MMIO_OFFSET	0x0178
+#define IOMMU_XT_GA_INT_CTRL_MMIO_OFFSET	0x0180
+
+union amd_iommu_x2apic_control {
+    uint64_t raw;
+    struct {
+        unsigned int :2;
+        unsigned int dest_mode:1;
+        unsigned int :5;
+        unsigned int dest_lo:24;
+        unsigned int vector:8;
+        unsigned int int_type:1; /* DM in IOMMU spec 3.04 */
+        unsigned int :15;
+        unsigned int dest_hi:8;
+    };
+};
+
  /* Status Register*/
  #define IOMMU_STATUS_MMIO_OFFSET		0x2020
  #define IOMMU_STATUS_EVENT_OVERFLOW_MASK	0x00000001

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 09/12] AMD/IOMMU: enable x2APIC mode when available
Posted by Jan Beulich 4 years, 8 months ago
In order for the CPUs to use x2APIC mode, the IOMMU(s) first need to be
switched into suitable state.

The post-AP-bringup IRQ affinity adjustment is done also for the non-
x2APIC case, matching what VT-d does.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Brian Woods <brian.woods@amd.com>
---
v4: Re-base.
v3: Set GAEn (and other control register bits) earlier. Also clear the
     bits enabled here in amd_iommu_init_cleanup(). Re-base. Pass NULL
     CPU mask to set_{x2apic,msi}_affinity().
v2: Drop cpu_has_cx16 check. Add comment.
---
TBD: Instead of the system_state check in iov_enable_xt() the function
      could also zap its own hook pointer, at which point it could also
      become __init. This would, however, require that either
      resume_x2apic() be bound to ignore iommu_enable_x2apic() errors
      forever, or that iommu_enable_x2apic() be slightly re-arranged to
      not return -EOPNOTSUPP when finding a NULL hook during resume.

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -833,6 +833,30 @@ static bool_t __init set_iommu_interrupt
      return 1;
  }
  
+int iov_adjust_irq_affinities(void)
+{
+    const struct amd_iommu *iommu;
+
+    if ( !iommu_enabled )
+        return 0;
+
+    for_each_amd_iommu ( iommu )
+    {
+        struct irq_desc *desc = irq_to_desc(iommu->msi.irq);
+        unsigned long flags;
+
+        spin_lock_irqsave(&desc->lock, flags);
+        if ( iommu->ctrl.int_cap_xt_en )
+            set_x2apic_affinity(desc, NULL);
+        else
+            set_msi_affinity(desc, NULL);
+        spin_unlock_irqrestore(&desc->lock, flags);
+    }
+
+    return 0;
+}
+__initcall(iov_adjust_irq_affinities);
+
  /*
   * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
   * Workaround:
@@ -1046,7 +1070,7 @@ static void * __init allocate_ppr_log(st
                                  IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log");
  }
  
-static int __init amd_iommu_init_one(struct amd_iommu *iommu)
+static int __init amd_iommu_init_one(struct amd_iommu *iommu, bool intr)
  {
      if ( allocate_cmd_buffer(iommu) == NULL )
          goto error_out;
@@ -1057,7 +1081,7 @@ static int __init amd_iommu_init_one(str
      if ( iommu->features.flds.ppr_sup && !allocate_ppr_log(iommu) )
          goto error_out;
  
-    if ( !set_iommu_interrupt_handler(iommu) )
+    if ( intr && !set_iommu_interrupt_handler(iommu) )
          goto error_out;
  
      /* To make sure that device_table.buffer has been successfully allocated */
@@ -1086,8 +1110,16 @@ static void __init amd_iommu_init_cleanu
      list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
      {
          list_del(&iommu->list);
+
+        iommu->ctrl.ga_en = 0;
+        iommu->ctrl.xt_en = 0;
+        iommu->ctrl.int_cap_xt_en = 0;
+
          if ( iommu->enabled )
              disable_iommu(iommu);
+        else if ( iommu->mmio_base )
+            writeq(iommu->ctrl.raw,
+                   iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
  
          deallocate_ring_buffer(&iommu->cmd_buffer);
          deallocate_ring_buffer(&iommu->event_log);
@@ -1289,7 +1321,7 @@ static int __init amd_iommu_prepare_one(
      return 0;
  }
  
-int __init amd_iommu_init(void)
+int __init amd_iommu_prepare(bool xt)
  {
      struct amd_iommu *iommu;
      int rc = -ENODEV;
@@ -1304,9 +1336,14 @@ int __init amd_iommu_init(void)
      if ( unlikely(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) )
          goto error_out;
  
+    /* Have we been here before? */
+    if ( ivhd_type )
+        return 0;
+
      rc = amd_iommu_get_supported_ivhd_type();
      if ( rc < 0 )
          goto error_out;
+    BUG_ON(!rc);
      ivhd_type = rc;
  
      rc = amd_iommu_get_ivrs_dev_entries();
@@ -1322,9 +1359,37 @@ int __init amd_iommu_init(void)
          rc = amd_iommu_prepare_one(iommu);
          if ( rc )
              goto error_out;
+
+        rc = -ENODEV;
+        if ( xt && (!iommu->features.flds.ga_sup || !iommu->features.flds.xt_sup) )
+            goto error_out;
+    }
+
+    for_each_amd_iommu ( iommu )
+    {
+        /* NB: There's no need to actually write these out right here. */
+        iommu->ctrl.ga_en |= xt;
+        iommu->ctrl.xt_en = xt;
+        iommu->ctrl.int_cap_xt_en = xt;
      }
  
      rc = amd_iommu_update_ivrs_mapping_acpi();
+
+ error_out:
+    if ( rc )
+    {
+        amd_iommu_init_cleanup();
+        ivhd_type = 0;
+    }
+
+    return rc;
+}
+
+int __init amd_iommu_init(bool xt)
+{
+    struct amd_iommu *iommu;
+    int rc = amd_iommu_prepare(xt);
+
      if ( rc )
          goto error_out;
  
@@ -1350,7 +1415,12 @@ int __init amd_iommu_init(void)
      /* per iommu initialization  */
      for_each_amd_iommu ( iommu )
      {
-        rc = amd_iommu_init_one(iommu);
+        /*
+         * Setting up of the IOMMU interrupts cannot occur yet at the (very
+         * early) time we get here when enabling x2APIC mode. Suppress it
+         * here, and do it explicitly in amd_iommu_init_interrupt().
+         */
+        rc = amd_iommu_init_one(iommu, !xt);
          if ( rc )
              goto error_out;
      }
@@ -1362,6 +1432,40 @@ error_out:
      return rc;
  }
  
+int __init amd_iommu_init_interrupt(void)
+{
+    struct amd_iommu *iommu;
+    int rc = 0;
+
+    for_each_amd_iommu ( iommu )
+    {
+        struct irq_desc *desc;
+
+        if ( !set_iommu_interrupt_handler(iommu) )
+        {
+            rc = -EIO;
+            break;
+        }
+
+        desc = irq_to_desc(iommu->msi.irq);
+
+        spin_lock(&desc->lock);
+        ASSERT(iommu->ctrl.int_cap_xt_en);
+        set_x2apic_affinity(desc, &cpu_online_map);
+        spin_unlock(&desc->lock);
+
+        set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
+
+        if ( iommu->features.flds.ppr_sup )
+            set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED);
+    }
+
+    if ( rc )
+        amd_iommu_init_cleanup();
+
+    return rc;
+}
+
  static void invalidate_all_domain_pages(void)
  {
      struct domain *d;
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -799,6 +799,35 @@ void *__init amd_iommu_alloc_intremap_ta
      return tb;
  }
  
+bool __init iov_supports_xt(void)
+{
+    unsigned int apic;
+
+    if ( !iommu_enable || !iommu_intremap )
+        return false;
+
+    if ( amd_iommu_prepare(true) )
+        return false;
+
+    for ( apic = 0; apic < nr_ioapics; apic++ )
+    {
+        unsigned int idx = ioapic_id_to_index(IO_APIC_ID(apic));
+
+        if ( idx == MAX_IO_APICS )
+            return false;
+
+        if ( !find_iommu_for_device(ioapic_sbdf[idx].seg,
+                                    ioapic_sbdf[idx].bdf) )
+        {
+            AMD_IOMMU_DEBUG("No IOMMU for IO-APIC %#x (ID %x)\n",
+                            apic, IO_APIC_ID(apic));
+            return false;
+        }
+    }
+
+    return true;
+}
+
  int __init amd_setup_hpet_msi(struct msi_desc *msi_desc)
  {
      spinlock_t *lock;
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -170,7 +170,8 @@ static int __init iov_detect(void)
      if ( !iommu_enable && !iommu_intremap )
          return 0;
  
-    if ( amd_iommu_init() != 0 )
+    else if ( (init_done ? amd_iommu_init_interrupt()
+                         : amd_iommu_init(false)) != 0 )
      {
          printk("AMD-Vi: Error initialization\n");
          return -ENODEV;
@@ -184,6 +185,25 @@ static int __init iov_detect(void)
      return 0;
  }
  
+static int iov_enable_xt(void)
+{
+    int rc;
+
+    if ( system_state >= SYS_STATE_active )
+        return 0;
+
+    if ( (rc = amd_iommu_init(true)) != 0 )
+    {
+        printk("AMD-Vi: Error %d initializing for x2APIC mode\n", rc);
+        /* -ENXIO has special meaning to the caller - convert it. */
+        return rc != -ENXIO ? rc : -ENODATA;
+    }
+
+    init_done = true;
+
+    return 0;
+}
+
  int amd_iommu_alloc_root(struct domain_iommu *hd)
  {
      if ( unlikely(!hd->arch.root_table) )
@@ -557,11 +577,13 @@ static const struct iommu_ops __initcons
      .free_page_table = deallocate_page_table,
      .reassign_device = reassign_device,
      .get_device_group_id = amd_iommu_group_id,
+    .enable_x2apic = iov_enable_xt,
      .update_ire_from_apic = amd_iommu_ioapic_update_ire,
      .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
      .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
      .read_msi_from_ire = amd_iommu_read_msi_from_ire,
      .setup_hpet_msi = amd_setup_hpet_msi,
+    .adjust_irq_affinities = iov_adjust_irq_affinities,
      .suspend = amd_iommu_suspend,
      .resume = amd_iommu_resume,
      .crash_shutdown = amd_iommu_crash_shutdown,
@@ -571,4 +593,5 @@ static const struct iommu_ops __initcons
  static const struct iommu_init_ops __initconstrel _iommu_init_ops = {
      .ops = &_iommu_ops,
      .setup = iov_detect,
+    .supports_x2apic = iov_supports_xt,
  };
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -48,8 +48,11 @@ int amd_iommu_detect_acpi(void);
  void get_iommu_features(struct amd_iommu *iommu);
  
  /* amd-iommu-init functions */
-int amd_iommu_init(void);
+int amd_iommu_prepare(bool xt);
+int amd_iommu_init(bool xt);
+int amd_iommu_init_interrupt(void);
  int amd_iommu_update_ivrs_mapping_acpi(void);
+int iov_adjust_irq_affinities(void);
  
  /* mapping functions */
  int __must_check amd_iommu_map_page(struct domain *d, dfn_t dfn,
@@ -93,6 +96,7 @@ void amd_iommu_flush_all_caches(struct a
  struct amd_iommu *find_iommu_for_device(int seg, int bdf);
  
  /* interrupt remapping */
+bool iov_supports_xt(void);
  int amd_iommu_setup_ioapic_remapping(void);
  void *amd_iommu_alloc_intremap_table(
      const struct amd_iommu *, unsigned long **);

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 10/12] AMD/IOMMU: correct IRTE updating
Posted by Jan Beulich 4 years, 8 months ago
Flushing didn't get done along the lines of what the specification says.
Mark entries to be updated as not remapped (which will result in
interrupt requests to get target aborted, but the interrupts should be
masked anyway at that point in time), issue the flush, and only then
write the new entry.

In update_intremap_entry_from_msi_msg() also fold the duplicate initial
lock determination and acquire into just a single instance.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
RFC: Putting the flush invocations in loops isn't overly nice, but I
      don't think this can really be abused, since callers up the stack
      hold further locks. Nevertheless I'd like to ask for better
      suggestions.
---
v4: Re-base.
v3: Remove stale parts of description. Re-base.
v2: Parts morphed into earlier patch.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -213,15 +213,13 @@ static void update_intremap_entry(const
              },
          };
  
-        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
+        ASSERT(!entry.ptr128->full.remap_en);
+        entry.ptr128->raw[1] = irte.raw[1];
          /*
-         * Low half, in particular RemapEn, needs to be cleared first.  See
+         * High half needs to be set before low one (containing RemapEn).  See
           * comment in free_intremap_entry() regarding the choice of barrier.
           */
          smp_wmb();
-        entry.ptr128->raw[1] = irte.raw[1];
-        /* High half needs to be set before low one (containing RemapEn). */
-        smp_wmb();
          ACCESS_ONCE(entry.ptr128->raw[0]) = irte.raw[0];
      }
      else
@@ -296,6 +294,20 @@ static int update_intremap_entry_from_io
      }
  
      entry = get_intremap_entry(iommu, req_id, offset);
+
+    /* The RemapEn fields match for all formats. */
+    while ( iommu->enabled && entry.ptr32->flds.remap_en )
+    {
+        entry.ptr32->flds.remap_en = false;
+        spin_unlock(lock);
+
+        spin_lock(&iommu->lock);
+        amd_iommu_flush_intremap(iommu, req_id);
+        spin_unlock(&iommu->lock);
+
+        spin_lock(lock);
+    }
+
      if ( fresh )
          /* nothing */;
      else if ( !lo_update )
@@ -325,13 +337,6 @@ static int update_intremap_entry_from_io
  
      spin_unlock_irqrestore(lock, flags);
  
-    if ( iommu->enabled && !fresh )
-    {
-        spin_lock_irqsave(&iommu->lock, flags);
-        amd_iommu_flush_intremap(iommu, req_id);
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
      set_rte_index(rte, offset);
  
      return 0;
@@ -587,19 +592,27 @@ static int update_intremap_entry_from_ms
      req_id = get_dma_requestor_id(iommu->seg, bdf);
      alias_id = get_intremap_requestor_id(iommu->seg, bdf);
  
+    lock = get_intremap_lock(iommu->seg, req_id);
+    spin_lock_irqsave(lock, flags);
+
      if ( msg == NULL )
      {
-        lock = get_intremap_lock(iommu->seg, req_id);
-        spin_lock_irqsave(lock, flags);
          for ( i = 0; i < nr; ++i )
              free_intremap_entry(iommu, req_id, *remap_index + i);
          spin_unlock_irqrestore(lock, flags);
-        goto done;
-    }
  
-    lock = get_intremap_lock(iommu->seg, req_id);
+        if ( iommu->enabled )
+        {
+            spin_lock_irqsave(&iommu->lock, flags);
+            amd_iommu_flush_intremap(iommu, req_id);
+            if ( alias_id != req_id )
+                amd_iommu_flush_intremap(iommu, alias_id);
+            spin_unlock_irqrestore(&iommu->lock, flags);
+        }
+
+        return 0;
+    }
  
-    spin_lock_irqsave(lock, flags);
      dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
      delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
      vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
@@ -623,6 +636,22 @@ static int update_intremap_entry_from_ms
      }
  
      entry = get_intremap_entry(iommu, req_id, offset);
+
+    /* The RemapEn fields match for all formats. */
+    while ( iommu->enabled && entry.ptr32->flds.remap_en )
+    {
+        entry.ptr32->flds.remap_en = false;
+        spin_unlock(lock);
+
+        spin_lock(&iommu->lock);
+        amd_iommu_flush_intremap(iommu, req_id);
+        if ( alias_id != req_id )
+            amd_iommu_flush_intremap(iommu, alias_id);
+        spin_unlock(&iommu->lock);
+
+        spin_lock(lock);
+    }
+
      update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
      spin_unlock_irqrestore(lock, flags);
  
@@ -642,16 +671,6 @@ static int update_intremap_entry_from_ms
                 get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
      }
  
-done:
-    if ( iommu->enabled )
-    {
-        spin_lock_irqsave(&iommu->lock, flags);
-        amd_iommu_flush_intremap(iommu, req_id);
-        if ( alias_id != req_id )
-            amd_iommu_flush_intremap(iommu, alias_id);
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
      return 0;
  }
  

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 10/12] AMD/IOMMU: correct IRTE updating
Posted by Woods, Brian 4 years, 8 months ago
On Thu, Jul 25, 2019 at 01:33:02PM +0000, Jan Beulich wrote:
> Flushing didn't get done along the lines of what the specification says.
> Mark entries to be updated as not remapped (which will result in
> interrupt requests to get target aborted, but the interrupts should be
> masked anyway at that point in time), issue the flush, and only then
> write the new entry.
> 
> In update_intremap_entry_from_msi_msg() also fold the duplicate initial
> lock determination and acquire into just a single instance.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

Acked-by: Brian Woods <brian.woods@amd.com>

> ---
> RFC: Putting the flush invocations in loops isn't overly nice, but I
>       don't think this can really be abused, since callers up the stack
>       hold further locks. Nevertheless I'd like to ask for better
>       suggestions.
> ---
> v4: Re-base.
> v3: Remove stale parts of description. Re-base.
> v2: Parts morphed into earlier patch.
> 
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -213,15 +213,13 @@ static void update_intremap_entry(const
>               },
>           };
>   
> -        ACCESS_ONCE(entry.ptr128->raw[0]) = 0;
> +        ASSERT(!entry.ptr128->full.remap_en);
> +        entry.ptr128->raw[1] = irte.raw[1];
>           /*
> -         * Low half, in particular RemapEn, needs to be cleared first.  See
> +         * High half needs to be set before low one (containing RemapEn).  See
>            * comment in free_intremap_entry() regarding the choice of barrier.
>            */
>           smp_wmb();
> -        entry.ptr128->raw[1] = irte.raw[1];
> -        /* High half needs to be set before low one (containing RemapEn). */
> -        smp_wmb();
>           ACCESS_ONCE(entry.ptr128->raw[0]) = irte.raw[0];
>       }
>       else
> @@ -296,6 +294,20 @@ static int update_intremap_entry_from_io
>       }
>   
>       entry = get_intremap_entry(iommu, req_id, offset);
> +
> +    /* The RemapEn fields match for all formats. */
> +    while ( iommu->enabled && entry.ptr32->flds.remap_en )
> +    {
> +        entry.ptr32->flds.remap_en = false;
> +        spin_unlock(lock);
> +
> +        spin_lock(&iommu->lock);
> +        amd_iommu_flush_intremap(iommu, req_id);
> +        spin_unlock(&iommu->lock);
> +
> +        spin_lock(lock);
> +    }
> +
>       if ( fresh )
>           /* nothing */;
>       else if ( !lo_update )
> @@ -325,13 +337,6 @@ static int update_intremap_entry_from_io
>   
>       spin_unlock_irqrestore(lock, flags);
>   
> -    if ( iommu->enabled && !fresh )
> -    {
> -        spin_lock_irqsave(&iommu->lock, flags);
> -        amd_iommu_flush_intremap(iommu, req_id);
> -        spin_unlock_irqrestore(&iommu->lock, flags);
> -    }
> -
>       set_rte_index(rte, offset);
>   
>       return 0;
> @@ -587,19 +592,27 @@ static int update_intremap_entry_from_ms
>       req_id = get_dma_requestor_id(iommu->seg, bdf);
>       alias_id = get_intremap_requestor_id(iommu->seg, bdf);
>   
> +    lock = get_intremap_lock(iommu->seg, req_id);
> +    spin_lock_irqsave(lock, flags);
> +
>       if ( msg == NULL )
>       {
> -        lock = get_intremap_lock(iommu->seg, req_id);
> -        spin_lock_irqsave(lock, flags);
>           for ( i = 0; i < nr; ++i )
>               free_intremap_entry(iommu, req_id, *remap_index + i);
>           spin_unlock_irqrestore(lock, flags);
> -        goto done;
> -    }
>   
> -    lock = get_intremap_lock(iommu->seg, req_id);
> +        if ( iommu->enabled )
> +        {
> +            spin_lock_irqsave(&iommu->lock, flags);
> +            amd_iommu_flush_intremap(iommu, req_id);
> +            if ( alias_id != req_id )
> +                amd_iommu_flush_intremap(iommu, alias_id);
> +            spin_unlock_irqrestore(&iommu->lock, flags);
> +        }
> +
> +        return 0;
> +    }
>   
> -    spin_lock_irqsave(lock, flags);
>       dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
>       delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
>       vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
> @@ -623,6 +636,22 @@ static int update_intremap_entry_from_ms
>       }
>   
>       entry = get_intremap_entry(iommu, req_id, offset);
> +
> +    /* The RemapEn fields match for all formats. */
> +    while ( iommu->enabled && entry.ptr32->flds.remap_en )
> +    {
> +        entry.ptr32->flds.remap_en = false;
> +        spin_unlock(lock);
> +
> +        spin_lock(&iommu->lock);
> +        amd_iommu_flush_intremap(iommu, req_id);
> +        if ( alias_id != req_id )
> +            amd_iommu_flush_intremap(iommu, alias_id);
> +        spin_unlock(&iommu->lock);
> +
> +        spin_lock(lock);
> +    }
> +
>       update_intremap_entry(iommu, entry, vector, delivery_mode, dest_mode, dest);
>       spin_unlock_irqrestore(lock, flags);
>   
> @@ -642,16 +671,6 @@ static int update_intremap_entry_from_ms
>                  get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
>       }
>   
> -done:
> -    if ( iommu->enabled )
> -    {
> -        spin_lock_irqsave(&iommu->lock, flags);
> -        amd_iommu_flush_intremap(iommu, req_id);
> -        if ( alias_id != req_id )
> -            amd_iommu_flush_intremap(iommu, alias_id);
> -        spin_unlock_irqrestore(&iommu->lock, flags);
> -    }
> -
>       return 0;
>   }
>   
> 

-- 
Brian Woods

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 11/12] AMD/IOMMU: don't needlessly log headers when dumping IRTs
Posted by Jan Beulich 4 years, 8 months ago
Log SBDF headers only when there are actual IRTEs to log. This is
particularly important for the total volume of output when the ACPI
tables describe far more than just the existing devices. On my Rome
system so far there was one line for every function of every device on
all 256 buses of segment 0, with extremely few exceptions (like the
IOMMUs themselves).

Also only log one of the "per-device" or "shared" overall headers.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: New.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -883,7 +883,8 @@ int __init amd_setup_hpet_msi(struct msi
  }
  
  static void dump_intremap_table(const struct amd_iommu *iommu,
-                                union irte_cptr tbl)
+                                union irte_cptr tbl,
+                                const struct ivrs_mappings *ivrs_mapping)
  {
      unsigned int count;
  
@@ -892,19 +893,25 @@ static void dump_intremap_table(const st
  
      for ( count = 0; count < INTREMAP_ENTRIES; count++ )
      {
-        if ( iommu->ctrl.ga_en )
-        {
-            if ( !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1] )
+        if ( iommu->ctrl.ga_en
+             ? !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1]
+             : !tbl.ptr32[count].raw )
                  continue;
+
+        if ( ivrs_mapping )
+        {
+            printk("  %04x:%02x:%02x:%u:\n", iommu->seg,
+                   PCI_BUS(ivrs_mapping->dte_requestor_id),
+                   PCI_SLOT(ivrs_mapping->dte_requestor_id),
+                   PCI_FUNC(ivrs_mapping->dte_requestor_id));
+            ivrs_mapping = NULL;
+        }
+
+        if ( iommu->ctrl.ga_en )
              printk("    IRTE[%03x] %016lx_%016lx\n",
                     count, tbl.ptr128[count].raw[1], tbl.ptr128[count].raw[0]);
-        }
          else
-        {
-            if ( !tbl.ptr32[count].raw )
-                continue;
              printk("    IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw);
-        }
      }
  }
  
@@ -916,13 +923,8 @@ static int dump_intremap_mapping(const s
      if ( !ivrs_mapping )
          return 0;
  
-    printk("  %04x:%02x:%02x:%u:\n", iommu->seg,
-           PCI_BUS(ivrs_mapping->dte_requestor_id),
-           PCI_SLOT(ivrs_mapping->dte_requestor_id),
-           PCI_FUNC(ivrs_mapping->dte_requestor_id));
-
      spin_lock_irqsave(&(ivrs_mapping->intremap_lock), flags);
-    dump_intremap_table(iommu, ivrs_mapping->intremap_table);
+    dump_intremap_table(iommu, ivrs_mapping->intremap_table, ivrs_mapping);
      spin_unlock_irqrestore(&(ivrs_mapping->intremap_lock), flags);
  
      process_pending_softirqs();
@@ -932,17 +934,22 @@ static int dump_intremap_mapping(const s
  
  static void dump_intremap_tables(unsigned char key)
  {
-    unsigned long flags;
-
-    printk("--- Dumping Per-dev IOMMU Interrupt Remapping Table ---\n");
+    if ( !shared_intremap_table )
+    {
+        printk("--- Dumping Per-dev IOMMU Interrupt Remapping Table ---\n");
  
-    iterate_ivrs_entries(dump_intremap_mapping);
+        iterate_ivrs_entries(dump_intremap_mapping);
+    }
+    else
+    {
+        unsigned long flags;
  
-    printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
+        printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
  
-    spin_lock_irqsave(&shared_intremap_lock, flags);
-    dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
-                                         list),
-                        shared_intremap_table);
-    spin_unlock_irqrestore(&shared_intremap_lock, flags);
+        spin_lock_irqsave(&shared_intremap_lock, flags);
+        dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
+                                             list),
+                            shared_intremap_table, NULL);
+        spin_unlock_irqrestore(&shared_intremap_lock, flags);
+    }
  }

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 11/12] AMD/IOMMU: don't needlessly log headers when dumping IRTs
Posted by Woods, Brian 4 years, 8 months ago
On Thu, Jul 25, 2019 at 01:33:24PM +0000, Jan Beulich wrote:
> Log SBDF headers only when there are actual IRTEs to log. This is
> particularly important for the total volume of output when the ACPI
> tables describe far more than just the existing devices. On my Rome
> system so far there was one line for every function of every device on
> all 256 buses of segment 0, with extremely few exceptions (like the
> IOMMUs themselves).
> 
> Also only log one of the "per-device" or "shared" overall headers.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Brian Woods <brian.woods@amd.com>

> ---
> v4: New.
> 
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -883,7 +883,8 @@ int __init amd_setup_hpet_msi(struct msi
>   }
>   
>   static void dump_intremap_table(const struct amd_iommu *iommu,
> -                                union irte_cptr tbl)
> +                                union irte_cptr tbl,
> +                                const struct ivrs_mappings *ivrs_mapping)
>   {
>       unsigned int count;
>   
> @@ -892,19 +893,25 @@ static void dump_intremap_table(const st
>   
>       for ( count = 0; count < INTREMAP_ENTRIES; count++ )
>       {
> -        if ( iommu->ctrl.ga_en )
> -        {
> -            if ( !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1] )
> +        if ( iommu->ctrl.ga_en
> +             ? !tbl.ptr128[count].raw[0] && !tbl.ptr128[count].raw[1]
> +             : !tbl.ptr32[count].raw )
>                   continue;
> +
> +        if ( ivrs_mapping )
> +        {
> +            printk("  %04x:%02x:%02x:%u:\n", iommu->seg,
> +                   PCI_BUS(ivrs_mapping->dte_requestor_id),
> +                   PCI_SLOT(ivrs_mapping->dte_requestor_id),
> +                   PCI_FUNC(ivrs_mapping->dte_requestor_id));
> +            ivrs_mapping = NULL;
> +        }
> +
> +        if ( iommu->ctrl.ga_en )
>               printk("    IRTE[%03x] %016lx_%016lx\n",
>                      count, tbl.ptr128[count].raw[1], tbl.ptr128[count].raw[0]);
> -        }
>           else
> -        {
> -            if ( !tbl.ptr32[count].raw )
> -                continue;
>               printk("    IRTE[%03x] %08x\n", count, tbl.ptr32[count].raw);
> -        }
>       }
>   }
>   
> @@ -916,13 +923,8 @@ static int dump_intremap_mapping(const s
>       if ( !ivrs_mapping )
>           return 0;
>   
> -    printk("  %04x:%02x:%02x:%u:\n", iommu->seg,
> -           PCI_BUS(ivrs_mapping->dte_requestor_id),
> -           PCI_SLOT(ivrs_mapping->dte_requestor_id),
> -           PCI_FUNC(ivrs_mapping->dte_requestor_id));
> -
>       spin_lock_irqsave(&(ivrs_mapping->intremap_lock), flags);
> -    dump_intremap_table(iommu, ivrs_mapping->intremap_table);
> +    dump_intremap_table(iommu, ivrs_mapping->intremap_table, ivrs_mapping);
>       spin_unlock_irqrestore(&(ivrs_mapping->intremap_lock), flags);
>   
>       process_pending_softirqs();
> @@ -932,17 +934,22 @@ static int dump_intremap_mapping(const s
>   
>   static void dump_intremap_tables(unsigned char key)
>   {
> -    unsigned long flags;
> -
> -    printk("--- Dumping Per-dev IOMMU Interrupt Remapping Table ---\n");
> +    if ( !shared_intremap_table )
> +    {
> +        printk("--- Dumping Per-dev IOMMU Interrupt Remapping Table ---\n");
>   
> -    iterate_ivrs_entries(dump_intremap_mapping);
> +        iterate_ivrs_entries(dump_intremap_mapping);
> +    }
> +    else
> +    {
> +        unsigned long flags;
>   
> -    printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
> +        printk("--- Dumping Shared IOMMU Interrupt Remapping Table ---\n");
>   
> -    spin_lock_irqsave(&shared_intremap_lock, flags);
> -    dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
> -                                         list),
> -                        shared_intremap_table);
> -    spin_unlock_irqrestore(&shared_intremap_lock, flags);
> +        spin_lock_irqsave(&shared_intremap_lock, flags);
> +        dump_intremap_table(list_first_entry(&amd_iommu_head, struct amd_iommu,
> +                                             list),
> +                            shared_intremap_table, NULL);
> +        spin_unlock_irqrestore(&shared_intremap_lock, flags);
> +    }
>   }
> 

-- 
Brian Woods

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 11/12] AMD/IOMMU: don't needlessly log headers when dumping IRTs
Posted by Andrew Cooper 4 years, 8 months ago
On 25/07/2019 14:33, Jan Beulich wrote:
> Log SBDF headers only when there are actual IRTEs to log. This is
> particularly important for the total volume of output when the ACPI
> tables describe far more than just the existing devices. On my Rome
> system so far there was one line for every function of every device on
> all 256 buses of segment 0, with extremely few exceptions (like the
> IOMMUs themselves).
>
> Also only log one of the "per-device" or "shared" overall headers.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v4 12/12] AMD/IOMMU: miscellaneous DTE handling adjustments
Posted by Jan Beulich 4 years, 8 months ago
First and foremost switch boolean fields to bool. Adjust a few related
function parameters as well. Then
- in amd_iommu_set_intremap_table() don't use literal numbers,
- in iommu_dte_add_device_entry() use a compound literal instead of many
   assignments,
- in amd_iommu_setup_domain_device()
   - eliminate a pointless local variable,
   - use || instead of && when deciding whether to clear an entry,
   - clear the I field without any checking of ATS / IOTLB state,
- leave reserved fields unnamed.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: New.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -69,8 +69,7 @@ union irte_cptr {
      const union irte128 *ptr128;
  } __transparent__;
  
-#define INTREMAP_LENGTH 0xB
-#define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
+#define INTREMAP_ENTRIES (1 << IOMMU_INTREMAP_LENGTH)
  
  struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
  struct hpet_sbdf hpet_sbdf;
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -101,51 +101,52 @@ static unsigned int set_iommu_pte_presen
  
  void amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
                                     uint64_t root_ptr, uint16_t domain_id,
-                                   uint8_t paging_mode, uint8_t valid)
+                                   uint8_t paging_mode, bool valid)
  {
      dte->domain_id = domain_id;
      dte->pt_root = paddr_to_pfn(root_ptr);
-    dte->iw = 1;
-    dte->ir = 1;
+    dte->iw = true;
+    dte->ir = true;
      dte->paging_mode = paging_mode;
-    dte->tv = 1;
+    dte->tv = true;
      dte->v = valid;
  }
  
  void __init amd_iommu_set_intremap_table(
-    struct amd_iommu_dte *dte, uint64_t intremap_ptr, uint8_t int_valid)
+    struct amd_iommu_dte *dte, uint64_t intremap_ptr, bool valid)
  {
      dte->it_root = intremap_ptr >> 6;
-    dte->int_tab_len = 0xb; /* 2048 entries */
-    dte->int_ctl = 2; /* fixed and arbitrated interrupts remapped */
-    dte->ig = 0; /* unmapped interrupt results io page faults */
-    dte->iv = int_valid;
+    dte->int_tab_len = IOMMU_INTREMAP_LENGTH;
+    dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED;
+    dte->ig = false; /* unmapped interrupts result in i/o page faults */
+    dte->iv = valid;
  }
  
  void __init iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
-                                       struct ivrs_mappings *ivrs_dev)
+                                       const struct ivrs_mappings *ivrs_dev)
  {
      uint8_t flags = ivrs_dev->device_flags;
  
-    memset(dte, 0, sizeof(*dte));
-
-    dte->init_pass = MASK_EXTR(flags, ACPI_IVHD_INIT_PASS);
-    dte->ext_int_pass = MASK_EXTR(flags, ACPI_IVHD_EINT_PASS);
-    dte->nmi_pass = MASK_EXTR(flags, ACPI_IVHD_NMI_PASS);
-    dte->lint0_pass = MASK_EXTR(flags, ACPI_IVHD_LINT0_PASS);
-    dte->lint1_pass = MASK_EXTR(flags, ACPI_IVHD_LINT1_PASS);
-    dte->sys_mgt = MASK_EXTR(flags, ACPI_IVHD_SYSTEM_MGMT);
-    dte->ex = ivrs_dev->dte_allow_exclusion;
+    *dte = (struct amd_iommu_dte){
+        .init_pass = flags & ACPI_IVHD_INIT_PASS,
+        .ext_int_pass = flags & ACPI_IVHD_EINT_PASS,
+        .nmi_pass = flags & ACPI_IVHD_NMI_PASS,
+        .lint0_pass = flags & ACPI_IVHD_LINT0_PASS,
+        .lint1_pass = flags & ACPI_IVHD_LINT1_PASS,
+        .ioctl = IOMMU_DEV_TABLE_IO_CONTROL_ABORTED,
+        .sys_mgt = MASK_EXTR(flags, ACPI_IVHD_SYSTEM_MGMT),
+        .ex = ivrs_dev->dte_allow_exclusion,
+    };
  }
  
  void iommu_dte_set_guest_cr3(struct amd_iommu_dte *dte, uint16_t dom_id,
-                             uint64_t gcr3_mfn, uint8_t gv, uint8_t glx)
+                             uint64_t gcr3_mfn, bool gv, uint8_t glx)
  {
  #define GCR3_MASK(hi, lo) (((1ul << ((hi) + 1)) - 1) & ~((1ul << (lo)) - 1))
  #define GCR3_SHIFT(lo) ((lo) - PAGE_SHIFT)
  
      /* I bit must be set when gcr3 is enabled */
-    dte->i = 1;
+    dte->i = true;
  
      dte->gcr3_trp_14_12 = (gcr3_mfn & GCR3_MASK(14, 12)) >> GCR3_SHIFT(12);
      dte->gcr3_trp_30_15 = (gcr3_mfn & GCR3_MASK(30, 15)) >> GCR3_SHIFT(15);
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -93,7 +93,6 @@ static void amd_iommu_setup_domain_devic
      struct amd_iommu_dte *table, *dte;
      unsigned long flags;
      int req_id, valid = 1;
-    int dte_i = 0;
      u8 bus = pdev->bus;
      const struct domain_iommu *hd = dom_iommu(domain);
  
@@ -103,9 +102,6 @@ static void amd_iommu_setup_domain_devic
      if ( iommu_hwdom_passthrough && is_hardware_domain(domain) )
          valid = 0;
  
-    if ( ats_enabled )
-        dte_i = 1;
-
      /* get device-table entry */
      req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
      table = iommu->dev_table.buffer;
@@ -122,7 +118,7 @@ static void amd_iommu_setup_domain_devic
  
          if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
               iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
-            dte->i = dte_i;
+            dte->i = ats_enabled;
  
          amd_iommu_flush_device(iommu, req_id);
  
@@ -288,14 +284,11 @@ void amd_iommu_disable_domain_device(str
      dte = &table[req_id];
  
      spin_lock_irqsave(&iommu->lock, flags);
-    if ( dte->tv && dte->v )
+    if ( dte->tv || dte->v )
      {
-        dte->tv = 0;
-        dte->v = 0;
-
-        if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
-             iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
-            dte->i = 0;
+        dte->tv = false;
+        dte->v = false;
+        dte->i = false;
  
          amd_iommu_flush_device(iommu, req_id);
  
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -107,57 +107,60 @@
  #define IOMMU_DEV_TABLE_INT_CONTROL_FORWARDED	0x1
  #define IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED	0x2
  
+/* For now we always allocate maximum possible interrupt remapping tables. */
+#define IOMMU_INTREMAP_LENGTH			0xB
+
  struct amd_iommu_dte {
      /* 0 - 63 */
-    uint64_t v:1;
-    uint64_t tv:1;
-    uint64_t reserved0:5;
-    uint64_t had:2;
-    uint64_t paging_mode:3;
+    bool v:1;
+    bool tv:1;
+    unsigned int :5;
+    unsigned int had:2;
+    unsigned int paging_mode:3;
      uint64_t pt_root:40;
-    uint64_t ppr:1;
-    uint64_t gprp:1;
-    uint64_t giov:1;
-    uint64_t gv:1;
-    uint64_t glx:2;
-    uint64_t gcr3_trp_14_12:3;
-    uint64_t ir:1;
-    uint64_t iw:1;
-    uint64_t reserved1:1;
+    bool ppr:1;
+    bool gprp:1;
+    bool giov:1;
+    bool gv:1;
+    unsigned int glx:2;
+    unsigned int gcr3_trp_14_12:3;
+    bool ir:1;
+    bool iw:1;
+    unsigned int :1;
  
      /* 64 - 127 */
-    uint64_t domain_id:16;
-    uint64_t gcr3_trp_30_15:16;
-    uint64_t i:1;
-    uint64_t se:1;
-    uint64_t sa:1;
-    uint64_t ioctl:2;
-    uint64_t cache:1;
-    uint64_t sd:1;
-    uint64_t ex:1;
-    uint64_t sys_mgt:2;
-    uint64_t reserved2:1;
-    uint64_t gcr3_trp_51_31:21;
+    unsigned int domain_id:16;
+    unsigned int gcr3_trp_30_15:16;
+    bool i:1;
+    bool se:1;
+    bool sa:1;
+    unsigned int ioctl:2;
+    bool cache:1;
+    bool sd:1;
+    bool ex:1;
+    unsigned int sys_mgt:2;
+    unsigned int :1;
+    unsigned int gcr3_trp_51_31:21;
  
      /* 128 - 191 */
-    uint64_t iv:1;
-    uint64_t int_tab_len:4;
-    uint64_t ig:1;
+    bool iv:1;
+    unsigned int int_tab_len:4;
+    bool ig:1;
      uint64_t it_root:46;
-    uint64_t reserved3:4;
-    uint64_t init_pass:1;
-    uint64_t ext_int_pass:1;
-    uint64_t nmi_pass:1;
-    uint64_t reserved4:1;
-    uint64_t int_ctl:2;
-    uint64_t lint0_pass:1;
-    uint64_t lint1_pass:1;
+    unsigned int :4;
+    bool init_pass:1;
+    bool ext_int_pass:1;
+    bool nmi_pass:1;
+    unsigned int :1;
+    unsigned int int_ctl:2;
+    bool lint0_pass:1;
+    bool lint1_pass:1;
  
      /* 192 - 255 */
-    uint64_t reserved5:54;
-    uint64_t attr_v:1;
-    uint64_t mode0_fc:1;
-    uint64_t snoop_attr:8;
+    uint64_t :54;
+    bool attr_v:1;
+    bool mode0_fc:1;
+    unsigned int snoop_attr:8;
  };
  
  /* Command Buffer */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -73,14 +73,14 @@ int __must_check amd_iommu_flush_iotlb_a
  int get_dma_requestor_id(uint16_t seg, uint16_t bdf);
  void amd_iommu_set_intremap_table(struct amd_iommu_dte *dte,
                                    uint64_t intremap_ptr,
-                                  uint8_t int_valid);
+                                  bool valid);
  void amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
  				   uint64_t root_ptr, uint16_t domain_id,
-				   uint8_t paging_mode, uint8_t valid);
+				   uint8_t paging_mode, bool valid);
  void iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
-                                struct ivrs_mappings *ivrs_dev);
+                                const struct ivrs_mappings *ivrs_dev);
  void iommu_dte_set_guest_cr3(struct amd_iommu_dte *dte, uint16_t dom_id,
-                             uint64_t gcr3_mfn, uint8_t gv, uint8_t glx);
+                             uint64_t gcr3_mfn, bool gv, uint8_t glx);
  
  /* send cmd to iommu */
  void amd_iommu_flush_all_pages(struct domain *d);

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 12/12] AMD/IOMMU: miscellaneous DTE handling adjustments
Posted by Andrew Cooper 4 years, 8 months ago
On 25/07/2019 14:33, Jan Beulich wrote:
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> @@ -107,57 +107,60 @@
>   #define IOMMU_DEV_TABLE_INT_CONTROL_FORWARDED	0x1
>   #define IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED	0x2
>   
> +/* For now we always allocate maximum possible interrupt remapping tables. */

/* For now, we always allocate the maximum.  2048 remap entries. */

?

> +#define IOMMU_INTREMAP_LENGTH			0xB

Also, LENGTH isn't an appropriate name.  This is actually the order of
the number of entries.  As you're already changing the name, how about
s/LENGTH/ORDER/ here?  If so, Acked-by: Andrew Cooper
<andrew.cooper3@citrix.com>

[Not related to this patch...]

It has always occurred to me that we allocate silly quantities of memory
for interrupt remapping tables.  If I've done my sums right, for Intel
we allocate 64k entries per IOMMU (256k RAM), whereas for AMD we
allocate 2048 entries per PCI function (32k RAM, now with the larger
format).

The largest Intel system I've encountered (interrupt wise) is a few
thousand interrupts, split fairly evenly across the root-complex IOMMUs
(the PCH IOMMU not, because its mostly legacy IO behind there).

For individual functions, I have never encountered a PCI function with
more than a dozen interrupts or so, so I think in practice we can get
away with allocating a 4k (32 entry) interrupt remap table in all cases.

It would probably make sense to default to allocating less space, and
providing a command line option to allocate max.  Alternatively, we
could work this out as we walk the PCI topology, as it is encoded in
standards compliant ways in config space.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 12/12] AMD/IOMMU: miscellaneous DTE handling adjustments
Posted by Jan Beulich 4 years, 8 months ago
On 30.07.2019 15:42, Andrew Cooper wrote:
> On 25/07/2019 14:33, Jan Beulich wrote:
>> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
>> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
>> @@ -107,57 +107,60 @@
>>    #define IOMMU_DEV_TABLE_INT_CONTROL_FORWARDED	0x1
>>    #define IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED	0x2
>>    
>> +/* For now we always allocate maximum possible interrupt remapping tables. */
> 
> /* For now, we always allocate the maximum.  2048 remap entries. */
> 
> ?

Sure, done.

>> +#define IOMMU_INTREMAP_LENGTH			0xB
> 
> Also, LENGTH isn't an appropriate name.  This is actually the order of
> the number of entries.  As you're already changing the name, how about
> s/LENGTH/ORDER/ here?

I did consider this (and will change), but I didn't change it right
away because of the resulting inconsistency on this line

     dte->int_tab_len = IOMMU_INTREMAP_ORDER;

I had taken "length" to mean "encoded length" here, not "actual length".

> If so, Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

Thanks.

> [Not related to this patch...]
> 
> It has always occurred to me that we allocate silly quantities of memory
> for interrupt remapping tables.  If I've done my sums right, for Intel
> we allocate 64k entries per IOMMU (256k RAM), whereas for AMD we
> allocate 2048 entries per PCI function (32k RAM, now with the larger
> format).

Right, that's another thing I wanted to look into as a follow-on. I
too did notice this. Depending what you mean by "PCI function" it
may actually be worse than what you describe: It's not per PCI
function of present devices, but per PCI function enumerated by the
ACPI tables. On my box this means everything from 00:00.0 to
ff:1f.7, which amounts to almost 2Gb if I'm not mistaken ("almost"
because of some aliasing of devices, where only one table gets
allocated for all the aliases).

> The largest Intel system I've encountered (interrupt wise) is a few
> thousand interrupts, split fairly evenly across the root-complex IOMMUs
> (the PCH IOMMU not, because its mostly legacy IO behind there).
> 
> For individual functions, I have never encountered a PCI function with
> more than a dozen interrupts or so, so I think in practice we can get
> away with allocating a 4k (32 entry) interrupt remap table in all cases.

That's clearly a possibility. (I think you meant 256 entries per 4k
though.)

> It would probably make sense to default to allocating less space, and
> providing a command line option to allocate max.  Alternatively, we
> could work this out as we walk the PCI topology, as it is encoded in
> standards compliant ways in config space.

To be honest, first of all I'd like to avoid allocating tables for
devices which don't even exist.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 12/12] AMD/IOMMU: miscellaneous DTE handling adjustments
Posted by Woods, Brian 4 years, 7 months ago
On Thu, Jul 25, 2019 at 01:33:50PM +0000, Jan Beulich wrote:
> First and foremost switch boolean fields to bool. Adjust a few related
> function parameters as well. Then
> - in amd_iommu_set_intremap_table() don't use literal numbers,
> - in iommu_dte_add_device_entry() use a compound literal instead of many
>    assignments,
> - in amd_iommu_setup_domain_device()
>    - eliminate a pointless local variable,
>    - use || instead of && when deciding whether to clear an entry,
>    - clear the I field without any checking of ATS / IOTLB state,
> - leave reserved fields unnamed.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Brian Woods <brian.woods@amd.com>

> ---
> v4: New.
> 
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -69,8 +69,7 @@ union irte_cptr {
>       const union irte128 *ptr128;
>   } __transparent__;
>   
> -#define INTREMAP_LENGTH 0xB
> -#define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
> +#define INTREMAP_ENTRIES (1 << IOMMU_INTREMAP_LENGTH)
>   
>   struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
>   struct hpet_sbdf hpet_sbdf;
> --- a/xen/drivers/passthrough/amd/iommu_map.c
> +++ b/xen/drivers/passthrough/amd/iommu_map.c
> @@ -101,51 +101,52 @@ static unsigned int set_iommu_pte_presen
>   
>   void amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
>                                      uint64_t root_ptr, uint16_t domain_id,
> -                                   uint8_t paging_mode, uint8_t valid)
> +                                   uint8_t paging_mode, bool valid)
>   {
>       dte->domain_id = domain_id;
>       dte->pt_root = paddr_to_pfn(root_ptr);
> -    dte->iw = 1;
> -    dte->ir = 1;
> +    dte->iw = true;
> +    dte->ir = true;
>       dte->paging_mode = paging_mode;
> -    dte->tv = 1;
> +    dte->tv = true;
>       dte->v = valid;
>   }
>   
>   void __init amd_iommu_set_intremap_table(
> -    struct amd_iommu_dte *dte, uint64_t intremap_ptr, uint8_t int_valid)
> +    struct amd_iommu_dte *dte, uint64_t intremap_ptr, bool valid)
>   {
>       dte->it_root = intremap_ptr >> 6;
> -    dte->int_tab_len = 0xb; /* 2048 entries */
> -    dte->int_ctl = 2; /* fixed and arbitrated interrupts remapped */
> -    dte->ig = 0; /* unmapped interrupt results io page faults */
> -    dte->iv = int_valid;
> +    dte->int_tab_len = IOMMU_INTREMAP_LENGTH;
> +    dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED;
> +    dte->ig = false; /* unmapped interrupts result in i/o page faults */
> +    dte->iv = valid;
>   }
>   
>   void __init iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
> -                                       struct ivrs_mappings *ivrs_dev)
> +                                       const struct ivrs_mappings *ivrs_dev)
>   {
>       uint8_t flags = ivrs_dev->device_flags;
>   
> -    memset(dte, 0, sizeof(*dte));
> -
> -    dte->init_pass = MASK_EXTR(flags, ACPI_IVHD_INIT_PASS);
> -    dte->ext_int_pass = MASK_EXTR(flags, ACPI_IVHD_EINT_PASS);
> -    dte->nmi_pass = MASK_EXTR(flags, ACPI_IVHD_NMI_PASS);
> -    dte->lint0_pass = MASK_EXTR(flags, ACPI_IVHD_LINT0_PASS);
> -    dte->lint1_pass = MASK_EXTR(flags, ACPI_IVHD_LINT1_PASS);
> -    dte->sys_mgt = MASK_EXTR(flags, ACPI_IVHD_SYSTEM_MGMT);
> -    dte->ex = ivrs_dev->dte_allow_exclusion;
> +    *dte = (struct amd_iommu_dte){
> +        .init_pass = flags & ACPI_IVHD_INIT_PASS,
> +        .ext_int_pass = flags & ACPI_IVHD_EINT_PASS,
> +        .nmi_pass = flags & ACPI_IVHD_NMI_PASS,
> +        .lint0_pass = flags & ACPI_IVHD_LINT0_PASS,
> +        .lint1_pass = flags & ACPI_IVHD_LINT1_PASS,
> +        .ioctl = IOMMU_DEV_TABLE_IO_CONTROL_ABORTED,
> +        .sys_mgt = MASK_EXTR(flags, ACPI_IVHD_SYSTEM_MGMT),
> +        .ex = ivrs_dev->dte_allow_exclusion,
> +    };
>   }
>   
>   void iommu_dte_set_guest_cr3(struct amd_iommu_dte *dte, uint16_t dom_id,
> -                             uint64_t gcr3_mfn, uint8_t gv, uint8_t glx)
> +                             uint64_t gcr3_mfn, bool gv, uint8_t glx)
>   {
>   #define GCR3_MASK(hi, lo) (((1ul << ((hi) + 1)) - 1) & ~((1ul << (lo)) - 1))
>   #define GCR3_SHIFT(lo) ((lo) - PAGE_SHIFT)
>   
>       /* I bit must be set when gcr3 is enabled */
> -    dte->i = 1;
> +    dte->i = true;
>   
>       dte->gcr3_trp_14_12 = (gcr3_mfn & GCR3_MASK(14, 12)) >> GCR3_SHIFT(12);
>       dte->gcr3_trp_30_15 = (gcr3_mfn & GCR3_MASK(30, 15)) >> GCR3_SHIFT(15);
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -93,7 +93,6 @@ static void amd_iommu_setup_domain_devic
>       struct amd_iommu_dte *table, *dte;
>       unsigned long flags;
>       int req_id, valid = 1;
> -    int dte_i = 0;
>       u8 bus = pdev->bus;
>       const struct domain_iommu *hd = dom_iommu(domain);
>   
> @@ -103,9 +102,6 @@ static void amd_iommu_setup_domain_devic
>       if ( iommu_hwdom_passthrough && is_hardware_domain(domain) )
>           valid = 0;
>   
> -    if ( ats_enabled )
> -        dte_i = 1;
> -
>       /* get device-table entry */
>       req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
>       table = iommu->dev_table.buffer;
> @@ -122,7 +118,7 @@ static void amd_iommu_setup_domain_devic
>   
>           if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>                iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
> -            dte->i = dte_i;
> +            dte->i = ats_enabled;
>   
>           amd_iommu_flush_device(iommu, req_id);
>   
> @@ -288,14 +284,11 @@ void amd_iommu_disable_domain_device(str
>       dte = &table[req_id];
>   
>       spin_lock_irqsave(&iommu->lock, flags);
> -    if ( dte->tv && dte->v )
> +    if ( dte->tv || dte->v )
>       {
> -        dte->tv = 0;
> -        dte->v = 0;
> -
> -        if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> -             iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
> -            dte->i = 0;
> +        dte->tv = false;
> +        dte->v = false;
> +        dte->i = false;
>   
>           amd_iommu_flush_device(iommu, req_id);
>   
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> @@ -107,57 +107,60 @@
>   #define IOMMU_DEV_TABLE_INT_CONTROL_FORWARDED	0x1
>   #define IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED	0x2
>   
> +/* For now we always allocate maximum possible interrupt remapping tables. */
> +#define IOMMU_INTREMAP_LENGTH			0xB
> +
>   struct amd_iommu_dte {
>       /* 0 - 63 */
> -    uint64_t v:1;
> -    uint64_t tv:1;
> -    uint64_t reserved0:5;
> -    uint64_t had:2;
> -    uint64_t paging_mode:3;
> +    bool v:1;
> +    bool tv:1;
> +    unsigned int :5;
> +    unsigned int had:2;
> +    unsigned int paging_mode:3;
>       uint64_t pt_root:40;
> -    uint64_t ppr:1;
> -    uint64_t gprp:1;
> -    uint64_t giov:1;
> -    uint64_t gv:1;
> -    uint64_t glx:2;
> -    uint64_t gcr3_trp_14_12:3;
> -    uint64_t ir:1;
> -    uint64_t iw:1;
> -    uint64_t reserved1:1;
> +    bool ppr:1;
> +    bool gprp:1;
> +    bool giov:1;
> +    bool gv:1;
> +    unsigned int glx:2;
> +    unsigned int gcr3_trp_14_12:3;
> +    bool ir:1;
> +    bool iw:1;
> +    unsigned int :1;
>   
>       /* 64 - 127 */
> -    uint64_t domain_id:16;
> -    uint64_t gcr3_trp_30_15:16;
> -    uint64_t i:1;
> -    uint64_t se:1;
> -    uint64_t sa:1;
> -    uint64_t ioctl:2;
> -    uint64_t cache:1;
> -    uint64_t sd:1;
> -    uint64_t ex:1;
> -    uint64_t sys_mgt:2;
> -    uint64_t reserved2:1;
> -    uint64_t gcr3_trp_51_31:21;
> +    unsigned int domain_id:16;
> +    unsigned int gcr3_trp_30_15:16;
> +    bool i:1;
> +    bool se:1;
> +    bool sa:1;
> +    unsigned int ioctl:2;
> +    bool cache:1;
> +    bool sd:1;
> +    bool ex:1;
> +    unsigned int sys_mgt:2;
> +    unsigned int :1;
> +    unsigned int gcr3_trp_51_31:21;
>   
>       /* 128 - 191 */
> -    uint64_t iv:1;
> -    uint64_t int_tab_len:4;
> -    uint64_t ig:1;
> +    bool iv:1;
> +    unsigned int int_tab_len:4;
> +    bool ig:1;
>       uint64_t it_root:46;
> -    uint64_t reserved3:4;
> -    uint64_t init_pass:1;
> -    uint64_t ext_int_pass:1;
> -    uint64_t nmi_pass:1;
> -    uint64_t reserved4:1;
> -    uint64_t int_ctl:2;
> -    uint64_t lint0_pass:1;
> -    uint64_t lint1_pass:1;
> +    unsigned int :4;
> +    bool init_pass:1;
> +    bool ext_int_pass:1;
> +    bool nmi_pass:1;
> +    unsigned int :1;
> +    unsigned int int_ctl:2;
> +    bool lint0_pass:1;
> +    bool lint1_pass:1;
>   
>       /* 192 - 255 */
> -    uint64_t reserved5:54;
> -    uint64_t attr_v:1;
> -    uint64_t mode0_fc:1;
> -    uint64_t snoop_attr:8;
> +    uint64_t :54;
> +    bool attr_v:1;
> +    bool mode0_fc:1;
> +    unsigned int snoop_attr:8;
>   };
>   
>   /* Command Buffer */
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> @@ -73,14 +73,14 @@ int __must_check amd_iommu_flush_iotlb_a
>   int get_dma_requestor_id(uint16_t seg, uint16_t bdf);
>   void amd_iommu_set_intremap_table(struct amd_iommu_dte *dte,
>                                     uint64_t intremap_ptr,
> -                                  uint8_t int_valid);
> +                                  bool valid);
>   void amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
>   				   uint64_t root_ptr, uint16_t domain_id,
> -				   uint8_t paging_mode, uint8_t valid);
> +				   uint8_t paging_mode, bool valid);
>   void iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
> -                                struct ivrs_mappings *ivrs_dev);
> +                                const struct ivrs_mappings *ivrs_dev);
>   void iommu_dte_set_guest_cr3(struct amd_iommu_dte *dte, uint16_t dom_id,
> -                             uint64_t gcr3_mfn, uint8_t gv, uint8_t glx);
> +                             uint64_t gcr3_mfn, bool gv, uint8_t glx);
>   
>   /* send cmd to iommu */
>   void amd_iommu_flush_all_pages(struct domain *d);
> 

-- 
Brian Woods

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel