Patchwork [v4,21/28] vvtd: update hvm_gmsi_info when binding guest msi with pirq or

login
register
mail settings
Submitter Chao Gao
Date Nov. 17, 2017, 6:22 a.m.
Message ID <1510899755-40237-22-git-send-email-chao.gao@intel.com>
Download mbox | patch
Permalink /patch/385519/
State New
Headers show

Comments

Chao Gao - Nov. 17, 2017, 6:22 a.m.
... handlding guest's invalidation request.

To support pirq migration optimization and using VT-d posted interrupt to
inject msi from assigned devices, each time guest programs msi information
(affinity, vector), the struct hvm_gmsi_info should be updated accordingly.
But after introducing vvtd, guest only needs to update an IRTE, which is in
guest memory, to program msi information.  vvtd doesn't trap r/w to the memory
range. Instead, it traps the queue invalidation, which is a method used to
notify VT-d hardware that an IRTE has changed.

This patch updates hvm_gmsi_info structure and programs physical IRTEs to use
VT-d posted interrupt if possible when binding guest msi with pirq or handling
guest's invalidation request. For the latter, all physical interrupts bound
with the domain are gone through to find the ones matching with the IRTE.

Notes: calling vvtd_process_iq() in vvtd_read() rather than in
vvtd_handle_irq_request() is to avoid ABBA deadlock of d->event_lock and
vvtd->ie_lock.

Signed-off-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
---
v4:
 - new
---
 xen/arch/x86/hvm/hvm.c             |  2 +-
 xen/drivers/passthrough/io.c       | 89 ++++++++++++++++++++++++++++----------
 xen/drivers/passthrough/vtd/vvtd.c | 70 ++++++++++++++++++++++++++++--
 xen/include/asm-x86/hvm/hvm.h      |  2 +
 xen/include/asm-x86/hvm/irq.h      |  1 +
 xen/include/asm-x86/viommu.h       | 11 +++++
 6 files changed, 147 insertions(+), 28 deletions(-)

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 964418a..d2c1372 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -462,7 +462,7 @@  void hvm_migrate_timers(struct vcpu *v)
     pt_migrate(v);
 }
 
-static int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
+int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
                             void *arg)
 {
     struct vcpu *v = arg;
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index d8c66bf..9198ef5 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -21,6 +21,7 @@ 
 #include <xen/iommu.h>
 #include <xen/cpu.h>
 #include <xen/irq.h>
+#include <xen/viommu.h>
 #include <asm/hvm/irq.h>
 #include <asm/hvm/support.h>
 #include <asm/io_apic.h>
@@ -275,6 +276,61 @@  static struct vcpu *vector_hashing_dest(const struct domain *d,
     return dest;
 }
 
+void pt_update_gmsi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
+{
+    uint8_t dest, delivery_mode;
+    bool dest_mode;
+    int dest_vcpu_id;
+    const struct vcpu *vcpu;
+    struct arch_irq_remapping_request request;
+    struct arch_irq_remapping_info remap_info;
+
+    ASSERT(spin_is_locked(&d->event_lock));
+
+    /* Calculate dest_vcpu_id for MSI-type pirq migration. */
+    irq_request_msi_fill(&request, pirq_dpci->gmsi.addr, pirq_dpci->gmsi.data);
+    if ( viommu_check_irq_remapping(d, &request) )
+    {
+        /* An error in IRTE, don't perform the optimization */
+        if ( viommu_get_irq_info(d, &request, &remap_info) )
+        {
+            pirq_dpci->gmsi.posted = false;
+            pirq_dpci->gmsi.dest_vcpu_id = -1;
+            pirq_dpci->gmsi.gvec = 0;
+            return;
+        }
+
+        dest = remap_info.dest;
+        dest_mode = remap_info.dest_mode;
+        delivery_mode = remap_info.delivery_mode;
+        pirq_dpci->gmsi.gvec = remap_info.vector;
+    }
+    else
+    {
+        dest = MASK_EXTR(pirq_dpci->gmsi.addr, MSI_ADDR_DEST_ID_MASK);
+        dest_mode = pirq_dpci->gmsi.addr & MSI_ADDR_DESTMODE_MASK;
+        delivery_mode = MASK_EXTR(pirq_dpci->gmsi.data,
+                                  MSI_DATA_DELIVERY_MODE_MASK);
+        pirq_dpci->gmsi.gvec = pirq_dpci->gmsi.data & MSI_DATA_VECTOR_MASK;
+    }
+
+    dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
+    pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
+
+    pirq_dpci->gmsi.posted = false;
+    vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
+    if ( iommu_intpost )
+    {
+        if ( delivery_mode == dest_LowestPrio )
+            vcpu = vector_hashing_dest(d, dest, dest_mode, pirq_dpci->gmsi.gvec);
+        if ( vcpu )
+        {
+            pirq_dpci->gmsi.posted = true;
+            pirq_dpci->gmsi.dest_vcpu_id = vcpu->vcpu_id;
+        }
+    }
+}
+
 int pt_irq_create_bind(
     struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
 {
@@ -339,9 +395,6 @@  int pt_irq_create_bind(
     {
     case PT_IRQ_TYPE_MSI:
     {
-        uint8_t dest, delivery_mode, gvec;
-        bool dest_mode;
-        int dest_vcpu_id;
         const struct vcpu *vcpu;
 
         if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
@@ -411,35 +464,23 @@  int pt_irq_create_bind(
                 pirq_dpci->gmsi.addr = pt_irq_bind->u.msi.addr;
             }
         }
-        /* Calculate dest_vcpu_id for MSI-type pirq migration. */
-        dest = MASK_EXTR(pirq_dpci->gmsi.addr, MSI_ADDR_DEST_ID_MASK);
-        dest_mode = pirq_dpci->gmsi.addr & MSI_ADDR_DESTMODE_MASK;
-        delivery_mode = MASK_EXTR(pirq_dpci->gmsi.data,
-                                  MSI_DATA_DELIVERY_MODE_MASK);
-        gvec = pirq_dpci->gmsi.data & MSI_DATA_VECTOR_MASK;
-        pirq_dpci->gmsi.gvec = gvec;
 
-        dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
-        pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
+        pt_update_gmsi(d, pirq_dpci);
         spin_unlock(&d->event_lock);
 
-        pirq_dpci->gmsi.posted = false;
-        vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
-        if ( iommu_intpost )
-        {
-            if ( delivery_mode == dest_LowestPrio )
-                vcpu = vector_hashing_dest(d, dest, dest_mode,
-                                           pirq_dpci->gmsi.gvec);
-            if ( vcpu )
-                pirq_dpci->gmsi.posted = true;
-        }
-        if ( dest_vcpu_id >= 0 )
-            hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
+        if ( pirq_dpci->gmsi.dest_vcpu_id >= 0 )
+            hvm_migrate_pirqs(d->vcpu[pirq_dpci->gmsi.dest_vcpu_id]);
 
         /* Use interrupt posting if it is supported. */
         if ( iommu_intpost )
+        {
+            if ( pirq_dpci->gmsi.posted )
+                vcpu = d->vcpu[pirq_dpci->gmsi.dest_vcpu_id];
+            else
+                vcpu = NULL;
             pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL,
                            info, pirq_dpci->gmsi.gvec);
+        }
 
         if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
         {
diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
index f6bde69..d12ad1d 100644
--- a/xen/drivers/passthrough/vtd/vvtd.c
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -477,6 +477,50 @@  static int vvtd_record_fault(struct vvtd *vvtd,
 }
 
 /*
+ * 'arg' is the index of interrupt remapping table. This index is used to
+ * search physical irqs which satify that the gmsi mapped with the physical irq
+ * is tranlated by the IRTE refered to by the index. The struct hvm_gmsi_info
+ * contains some fields are infered from an virtual IRTE. These fields should
+ * be updated when guest invalidates an IRTE. Furthermore, the physical IRTE
+ * is updated accordingly to reduce IPIs or utilize VT-d posted interrupt.
+ *
+ * if 'arg' is -1, perform a global invalidation.
+ */
+static int invalidate_gmsi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
+                         void *arg)
+{
+    if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
+    {
+        uint32_t index, target = (long)arg;
+        struct arch_irq_remapping_request req;
+        const struct vcpu *vcpu;
+
+        irq_request_msi_fill(&req, pirq_dpci->gmsi.addr, pirq_dpci->gmsi.data);
+        if ( !irq_remapping_request_index(&req, &index) &&
+             ((target == -1) || (target == index)) )
+        {
+            pt_update_gmsi(d, pirq_dpci);
+            if ( pirq_dpci->gmsi.dest_vcpu_id >= 0 )
+                hvm_migrate_pirq(d, pirq_dpci,
+                                 d->vcpu[pirq_dpci->gmsi.dest_vcpu_id]);
+
+            /* Use interrupt posting if it is supported. */
+            if ( iommu_intpost )
+            {
+                if ( pirq_dpci->gmsi.posted )
+                    vcpu = d->vcpu[pirq_dpci->gmsi.dest_vcpu_id];
+                else
+                    vcpu = NULL;
+                pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL,
+                               dpci_pirq(pirq_dpci), pirq_dpci->gmsi.gvec);
+            }
+        }
+    }
+
+    return 0;
+}
+
+/*
  * Process an invalidation descriptor. Currently, only two types descriptors,
  * Interrupt Entry Cache Invalidation Descritor and Invalidation Wait
  * Descriptor are handled.
@@ -530,7 +574,26 @@  static int process_iqe(struct vvtd *vvtd, uint32_t i)
         break;
 
     case TYPE_INVAL_IEC:
-        /* No cache is preserved in vvtd, nothing is needed to be flushed */
+        /*
+         * If VT-d pi is enabled, pi_update_irte() may be called. It assumes
+         * pcidevs_locked().
+         */
+        pcidevs_lock();
+        spin_lock(&vvtd->domain->event_lock);
+        /* A global invalidation of the cache is requested */
+        if ( !qinval.q.iec_inv_dsc.lo.granu )
+            pt_pirq_iterate(vvtd->domain, invalidate_gmsi, (void *)(long)-1);
+        else
+        {
+            uint32_t iidx = qinval.q.iec_inv_dsc.lo.iidx;
+            uint32_t nr = 1 << qinval.q.iec_inv_dsc.lo.im;
+
+            for ( ; nr; nr--, iidx++)
+                pt_pirq_iterate(vvtd->domain, invalidate_gmsi,
+                                (void *)(long)iidx);
+        }
+        spin_unlock(&vvtd->domain->event_lock);
+        pcidevs_unlock();
         break;
 
     default:
@@ -839,6 +902,8 @@  static int vvtd_read(struct vcpu *v, unsigned long addr,
     else
         *pval = vvtd_get_reg_quad(vvtd, offset);
 
+    if ( !atomic_read(&vvtd->inflight_intr) )
+        vvtd_process_iq(vvtd);
     return X86EMUL_OKAY;
 }
 
@@ -1088,8 +1153,7 @@  static int vvtd_handle_irq_request(const struct domain *d,
                         irte.remap.tm);
 
  out:
-    if ( !atomic_dec_and_test(&vvtd->inflight_intr) )
-        vvtd_process_iq(vvtd);
+    atomic_dec(&vvtd->inflight_intr);
     return ret;
 }
 
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index b687e03..f276ab6 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -394,6 +394,8 @@  bool hvm_set_guest_bndcfgs(struct vcpu *v, u64 val);
 bool hvm_check_cpuid_faulting(struct vcpu *v);
 void hvm_migrate_timers(struct vcpu *v);
 void hvm_do_resume(struct vcpu *v);
+int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
+                            void *arg);
 void hvm_migrate_pirqs(struct vcpu *v);
 
 void hvm_inject_event(const struct x86_event *event);
diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h
index 3a8832c..3279371 100644
--- a/xen/include/asm-x86/hvm/irq.h
+++ b/xen/include/asm-x86/hvm/irq.h
@@ -176,6 +176,7 @@  struct hvm_pirq_dpci {
 
 void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *);
 bool pt_pirq_cleanup_check(struct hvm_pirq_dpci *);
+void pt_update_gmsi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci);
 int pt_pirq_iterate(struct domain *d,
                     int (*cb)(struct domain *,
                               struct hvm_pirq_dpci *, void *arg),
diff --git a/xen/include/asm-x86/viommu.h b/xen/include/asm-x86/viommu.h
index e526e38..91ebc64 100644
--- a/xen/include/asm-x86/viommu.h
+++ b/xen/include/asm-x86/viommu.h
@@ -58,6 +58,17 @@  static inline void irq_request_ioapic_fill(
     req->msg.rte = rte;
 }
 
+static inline void irq_request_msi_fill(
+    struct arch_irq_remapping_request *req, uint64_t addr, uint32_t data)
+{
+    ASSERT(req);
+    req->type = VIOMMU_REQUEST_IRQ_MSI;
+    /* Source ID isn't in use */
+    req->source_id = 0;
+    req->msg.msi.addr = addr;
+    req->msg.msi.data = data;
+}
+
 #endif /* __ARCH_X86_VIOMMU_H__ */
 
 /*