Patchwork [6/11] KVM/MMU: Flush tlb with range list in sync_page()

login
register
mail settings
Submitter 蓝天宇
Date Jan. 4, 2019, 8:54 a.m.
Message ID <20190104085405.40356-7-Tianyu.Lan@microsoft.com>
Download mbox | patch
Permalink /patch/692719/
State New
Headers show

Comments

蓝天宇 - Jan. 4, 2019, 8:54 a.m.
From: Lan Tianyu <Tianyu.Lan@microsoft.com>

This patch is to flush tlb via flush list function.

Signed-off-by: Lan Tianyu <Tianyu.Lan@microsoft.com>
---
 arch/x86/kvm/paging_tmpl.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)
Christopherson, Sean J - Jan. 4, 2019, 4:30 p.m.
On Fri, Jan 04, 2019 at 04:54:00PM +0800, lantianyu1986@gmail.com wrote:
> From: Lan Tianyu <Tianyu.Lan@microsoft.com>
> 
> This patch is to flush tlb via flush list function.

More explanation of why this is beneficial would be nice.  Without the
context of the overall series it's not immediately obvious what
kvm_flush_remote_tlbs_with_list() does without a bit of digging.

> 
> Signed-off-by: Lan Tianyu <Tianyu.Lan@microsoft.com>
> ---
>  arch/x86/kvm/paging_tmpl.h | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 833e8855bbc9..866ccdea762e 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -973,6 +973,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
>  	bool host_writable;
>  	gpa_t first_pte_gpa;
>  	int set_spte_ret = 0;
> +	LIST_HEAD(flush_list);
>  
>  	/* direct kvm_mmu_page can not be unsync. */
>  	BUG_ON(sp->role.direct);
> @@ -980,6 +981,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
>  	first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
>  
>  	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
> +		int tmp_spte_ret = 0;
>  		unsigned pte_access;
>  		pt_element_t gpte;
>  		gpa_t pte_gpa;
> @@ -1029,14 +1031,24 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
>  
>  		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
>  
> -		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
> +		tmp_spte_ret = set_spte(vcpu, &sp->spt[i],
>  					 pte_access, PT_PAGE_TABLE_LEVEL,
>  					 gfn, spte_to_pfn(sp->spt[i]),
>  					 true, false, host_writable);
> +
> +		if (kvm_available_flush_tlb_with_range()
> +		    && (tmp_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)) {
> +			struct kvm_mmu_page *leaf_sp = page_header(sp->spt[i]
> +					& PT64_BASE_ADDR_MASK);
> +			list_add(&leaf_sp->flush_link, &flush_list);
> +		}
> +
> +		set_spte_ret |= tmp_spte_ret;
> +
>  	}
>  
>  	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
> -		kvm_flush_remote_tlbs(vcpu->kvm);
> +		kvm_flush_remote_tlbs_with_list(vcpu->kvm, &flush_list);

This is a bit confusing and potentially fragile.  It's not obvious that
kvm_flush_remote_tlbs_with_list() is guaranteed to call
kvm_flush_remote_tlbs() when kvm_available_flush_tlb_with_range() is
false, and you're relying on the kvm_flush_remote_tlbs_with_list() call
chain to never optimize away the empty list case.  Rechecking
kvm_available_flush_tlb_with_range() isn't expensive.

>  
>  	return nr_present;
>  }
> -- 
> 2.14.4
>
蓝天宇 - Jan. 7, 2019, 5:13 a.m.
On Sat, Jan 5, 2019 at 12:30 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Jan 04, 2019 at 04:54:00PM +0800, lantianyu1986@gmail.com wrote:
> > From: Lan Tianyu <Tianyu.Lan@microsoft.com>
> >
> > This patch is to flush tlb via flush list function.
>
> More explanation of why this is beneficial would be nice.  Without the
> context of the overall series it's not immediately obvious what
> kvm_flush_remote_tlbs_with_list() does without a bit of digging.
>
> >
> > Signed-off-by: Lan Tianyu <Tianyu.Lan@microsoft.com>
> > ---
> >  arch/x86/kvm/paging_tmpl.h | 16 ++++++++++++++--
> >  1 file changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> > index 833e8855bbc9..866ccdea762e 100644
> > --- a/arch/x86/kvm/paging_tmpl.h
> > +++ b/arch/x86/kvm/paging_tmpl.h
> > @@ -973,6 +973,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
> >       bool host_writable;
> >       gpa_t first_pte_gpa;
> >       int set_spte_ret = 0;
> > +     LIST_HEAD(flush_list);
> >
> >       /* direct kvm_mmu_page can not be unsync. */
> >       BUG_ON(sp->role.direct);
> > @@ -980,6 +981,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
> >       first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
> >
> >       for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
> > +             int tmp_spte_ret = 0;
> >               unsigned pte_access;
> >               pt_element_t gpte;
> >               gpa_t pte_gpa;
> > @@ -1029,14 +1031,24 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
> >
> >               host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
> >
> > -             set_spte_ret |= set_spte(vcpu, &sp->spt[i],
> > +             tmp_spte_ret = set_spte(vcpu, &sp->spt[i],
> >                                        pte_access, PT_PAGE_TABLE_LEVEL,
> >                                        gfn, spte_to_pfn(sp->spt[i]),
> >                                        true, false, host_writable);
> > +
> > +             if (kvm_available_flush_tlb_with_range()
> > +                 && (tmp_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)) {
> > +                     struct kvm_mmu_page *leaf_sp = page_header(sp->spt[i]
> > +                                     & PT64_BASE_ADDR_MASK);
> > +                     list_add(&leaf_sp->flush_link, &flush_list);
> > +             }
> > +
> > +             set_spte_ret |= tmp_spte_ret;
> > +
> >       }
> >
> >       if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
> > -             kvm_flush_remote_tlbs(vcpu->kvm);
> > +             kvm_flush_remote_tlbs_with_list(vcpu->kvm, &flush_list);
>
> This is a bit confusing and potentially fragile.  It's not obvious that
> kvm_flush_remote_tlbs_with_list() is guaranteed to call
> kvm_flush_remote_tlbs() when kvm_available_flush_tlb_with_range() is
> false, and you're relying on the kvm_flush_remote_tlbs_with_list() call
> chain to never optimize away the empty list case.  Rechecking
> kvm_available_flush_tlb_with_range() isn't expensive.

That makes sense. Will update. Thanks.

>
> >
> >       return nr_present;
> >  }
> > --
> > 2.14.4
> >
Paolo Bonzini - Jan. 7, 2019, 4:07 p.m.
On 04/01/19 17:30, Sean Christopherson wrote:
>> +
>> +		if (kvm_available_flush_tlb_with_range()
>> +		    && (tmp_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)) {
>> +			struct kvm_mmu_page *leaf_sp = page_header(sp->spt[i]
>> +					& PT64_BASE_ADDR_MASK);
>> +			list_add(&leaf_sp->flush_link, &flush_list);
>> +		}
>> +
>> +		set_spte_ret |= tmp_spte_ret;
>> +
>>  	}
>>  
>>  	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
>> -		kvm_flush_remote_tlbs(vcpu->kvm);
>> +		kvm_flush_remote_tlbs_with_list(vcpu->kvm, &flush_list);
> This is a bit confusing and potentially fragile.  It's not obvious that
> kvm_flush_remote_tlbs_with_list() is guaranteed to call
> kvm_flush_remote_tlbs() when kvm_available_flush_tlb_with_range() is
> false, and you're relying on the kvm_flush_remote_tlbs_with_list() call
> chain to never optimize away the empty list case.  Rechecking
> kvm_available_flush_tlb_with_range() isn't expensive.
> 

Alternatively, do not check it during the loop: always build the
flush_list, and always call kvm_flush_remote_tlbs_with_list.  The
function can then check whether the list is empty, and the OR of
tmp_spte_ret on every iteration goes away.

Paolo

Patch

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 833e8855bbc9..866ccdea762e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -973,6 +973,7 @@  static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	bool host_writable;
 	gpa_t first_pte_gpa;
 	int set_spte_ret = 0;
+	LIST_HEAD(flush_list);
 
 	/* direct kvm_mmu_page can not be unsync. */
 	BUG_ON(sp->role.direct);
@@ -980,6 +981,7 @@  static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+		int tmp_spte_ret = 0;
 		unsigned pte_access;
 		pt_element_t gpte;
 		gpa_t pte_gpa;
@@ -1029,14 +1031,24 @@  static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
 
-		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
+		tmp_spte_ret = set_spte(vcpu, &sp->spt[i],
 					 pte_access, PT_PAGE_TABLE_LEVEL,
 					 gfn, spte_to_pfn(sp->spt[i]),
 					 true, false, host_writable);
+
+		if (kvm_available_flush_tlb_with_range()
+		    && (tmp_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)) {
+			struct kvm_mmu_page *leaf_sp = page_header(sp->spt[i]
+					& PT64_BASE_ADDR_MASK);
+			list_add(&leaf_sp->flush_link, &flush_list);
+		}
+
+		set_spte_ret |= tmp_spte_ret;
+
 	}
 
 	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
-		kvm_flush_remote_tlbs(vcpu->kvm);
+		kvm_flush_remote_tlbs_with_list(vcpu->kvm, &flush_list);
 
 	return nr_present;
 }