Patchwork x86/PV: fix/generalize guest nul selector handling

login
register
mail settings
Submitter Jan Beulich
Date Sept. 20, 2017, 3:08 p.m.
Message ID <59C2A0A4020000780017D82A@prv-mh.provo.novell.com>
Download mbox | patch
Permalink /patch/342857/
State New
Headers show

Comments

Jan Beulich - Sept. 20, 2017, 3:08 p.m.
FS/GS base (and limit) aren't being cleared by the loading of a nul
selector into the segment register on AMD CPUs. Therefore, if an
outgoing vCPU has a non-null base in one of these registers and the
subsequent incoming vCPU has a non-zero but nul selector in the
respective register(s), the selector value(s) would be loaded without
clearing the segment base(s) in the hidden register portion.

Since the ABI states "zero" in its description of the fields, it is
worth noting that the chosen approach to fix this alters the written
down ABI. I consider this preferrable over enforcing the previously
written down behavior, as nul selectors are far more likely to be what
was meant from the beginning.

The adjustments also eliminate an inconsistency between FS and GS
handling: Old code had an extra pointless (gs_base_user was always zero
when DIRTY_GS was set) conditional for GS. The old bitkeeper changeset
has no explanation for this asymmetry.

Inspired by Linux commit e137a4d8f4dd2e277e355495b6b2cb241a8693c3.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Patch

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1260,30 +1260,28 @@  static void load_segments(struct vcpu *n
     if ( unlikely((dirty_segment_mask & DIRTY_ES) | uregs->es) )
         all_segs_okay &= loadsegment(es, uregs->es);
 
-    /*
-     * Either selector != 0 ==> reload.
-     * Also reload to reset FS_BASE if it was non-zero.
-     */
-    if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
-                  uregs->fs) )
+    /* Either selector != 0 ==> reload. */
+    if ( unlikely((dirty_segment_mask & DIRTY_FS) | uregs->fs) )
+    {
         all_segs_okay &= loadsegment(fs, uregs->fs);
+        /* non-nul selector updates fs_base */
+        if ( uregs->fs & ~3 )
+            dirty_segment_mask &= ~DIRTY_FS_BASE;
+    }
 
-    /*
-     * Either selector != 0 ==> reload.
-     * Also reload to reset GS_BASE if it was non-zero.
-     */
-    if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
-                  uregs->gs) )
-    {
-        /* Reset GS_BASE with user %gs? */
-        if ( (dirty_segment_mask & DIRTY_GS) || !n->arch.pv_vcpu.gs_base_user )
-            all_segs_okay &= loadsegment(gs, uregs->gs);
+    /* Either selector != 0 ==> reload. */
+    if ( unlikely((dirty_segment_mask & DIRTY_GS) | uregs->gs) )
+    {
+        all_segs_okay &= loadsegment(gs, uregs->gs);
+        /* non-nul selector updates gs_base_user */
+        if ( uregs->gs & ~3 )
+            dirty_segment_mask &= ~DIRTY_GS_BASE_USER;
     }
 
     if ( !is_pv_32bit_vcpu(n) )
     {
         /* This can only be non-zero if selector is NULL. */
-        if ( n->arch.pv_vcpu.fs_base )
+        if ( n->arch.pv_vcpu.fs_base | (dirty_segment_mask & DIRTY_FS_BASE) )
             wrfsbase(n->arch.pv_vcpu.fs_base);
 
         /* Most kernels have non-zero GS base, so don't bother testing. */
@@ -1291,7 +1289,8 @@  static void load_segments(struct vcpu *n
         wrmsrl(MSR_SHADOW_GS_BASE, n->arch.pv_vcpu.gs_base_kernel);
 
         /* This can only be non-zero if selector is NULL. */
-        if ( n->arch.pv_vcpu.gs_base_user )
+        if ( n->arch.pv_vcpu.gs_base_user |
+             (dirty_segment_mask & DIRTY_GS_BASE_USER) )
             wrgsbase(n->arch.pv_vcpu.gs_base_user);
 
         /* If in kernel mode then switch the GS bases around. */
@@ -1426,22 +1425,22 @@  static void save_segments(struct vcpu *v
     if ( regs->fs || is_pv_32bit_vcpu(v) )
     {
         dirty_segment_mask |= DIRTY_FS;
-        v->arch.pv_vcpu.fs_base = 0; /* != 0 selector kills fs_base */
+        /* non-nul selector kills fs_base */
+        if ( regs->fs & ~3 )
+            v->arch.pv_vcpu.fs_base = 0;
     }
-    else if ( v->arch.pv_vcpu.fs_base )
-    {
+    if ( v->arch.pv_vcpu.fs_base )
         dirty_segment_mask |= DIRTY_FS_BASE;
-    }
 
     if ( regs->gs || is_pv_32bit_vcpu(v) )
     {
         dirty_segment_mask |= DIRTY_GS;
-        v->arch.pv_vcpu.gs_base_user = 0; /* != 0 selector kills gs_base_user */
+        /* non-nul selector kills gs_base_user */
+        if ( regs->gs & ~3 )
+            v->arch.pv_vcpu.gs_base_user = 0;
     }
-    else if ( v->arch.pv_vcpu.gs_base_user )
-    {
+    if ( v->arch.pv_vcpu.gs_base_user )
         dirty_segment_mask |= DIRTY_GS_BASE_USER;
-    }
 
     this_cpu(dirty_segment_mask) = dirty_segment_mask;
 }
--- a/xen/include/public/arch-x86/xen-x86_64.h
+++ b/xen/include/public/arch-x86/xen-x86_64.h
@@ -203,8 +203,8 @@  struct cpu_user_regs {
     uint16_t ss, _pad2[3];
     uint16_t es, _pad3[3];
     uint16_t ds, _pad4[3];
-    uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base.     */
-    uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
+    uint16_t fs, _pad5[3]; /* Non-nul => takes precedence over fs_base.      */
+    uint16_t gs, _pad6[3]; /* Non-nul => takes precedence over gs_base_user. */
 };
 typedef struct cpu_user_regs cpu_user_regs_t;
 DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);