Patchwork [08/14] Revert "KVM: MMU: collapse TLB flushes when zap all pages"

login
register
mail settings
Submitter Christopherson, Sean J
Date Jan. 10, 2019, 6:07 p.m.
Message ID <20190110180706.24974-9-sean.j.christopherson@intel.com>
Download mbox | patch
Permalink /patch/697169/
State New
Headers show

Comments

Christopherson, Sean J - Jan. 10, 2019, 6:07 p.m.
Unwinding optimizations related to obsolete pages is a step towards
removing x86 KVM's fast invalidate mechanism, i.e. this is one part of
a revert all patches from the series that introduced the mechanism[1].

This reverts commit f34d251d66ba263c077ed9d2bbd1874339a4c887.

[1] https://lkml.kernel.org/r/1369960590-14138-1-git-send-email-xiaoguangrong@linux.vnet.ibm.com

Cc: Xiao Guangrong <guangrong.xiao@gmail.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/mmu.c | 31 +++----------------------------
 1 file changed, 3 insertions(+), 28 deletions(-)
Paolo Bonzini - Jan. 30, 2019, 4:54 p.m.
On 10/01/19 19:07, Sean Christopherson wrote:
> Unwinding optimizations related to obsolete pages is a step towards
> removing x86 KVM's fast invalidate mechanism, i.e. this is one part of
> a revert all patches from the series that introduced the mechanism[1].
> 
> This reverts commit f34d251d66ba263c077ed9d2bbd1874339a4c887.
> 
> [1] https://lkml.kernel.org/r/1369960590-14138-1-git-send-email-xiaoguangrong@linux.vnet.ibm.com
> 
> Cc: Xiao Guangrong <guangrong.xiao@gmail.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>  arch/x86/kvm/mmu.c | 31 +++----------------------------
>  1 file changed, 3 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index df76255c8568..178b47de011f 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2202,14 +2202,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
>  static void kvm_mmu_commit_zap_page(struct kvm *kvm,
>  				    struct list_head *invalid_list);
>  
> -/*
> - * NOTE: we should pay more attention on the zapped-obsolete page
> - * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk
> - * since it has been deleted from active_mmu_pages but still can be found
> - * at hast list.
> - *
> - * for_each_valid_sp() has skipped that kind of pages.
> - */
>  #define for_each_valid_sp(_kvm, _sp, _gfn)				\
>  	hlist_for_each_entry(_sp,					\
>  	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
> @@ -5871,13 +5863,11 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
>  		if (sp->role.invalid)
>  			continue;
>  
> -		/*
> -		 * Need not flush tlb since we only zap the sp with invalid
> -		 * generation number.
> -		 */
>  		if (batch >= BATCH_ZAP_PAGES &&
> -		      cond_resched_lock(&kvm->mmu_lock)) {
> +		      (need_resched() || spin_needbreak(&kvm->mmu_lock))) {
>  			batch = 0;
> +			kvm_mmu_commit_zap_page(kvm, &invalid_list);
> +			cond_resched_lock(&kvm->mmu_lock);
>  			goto restart;
>  		}
>  
> @@ -5888,10 +5878,6 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
>  			goto restart;
>  	}
>  
> -	/*
> -	 * Should flush tlb before free page tables since lockless-walking
> -	 * may use the pages.
> -	 */
>  	kvm_mmu_commit_zap_page(kvm, &invalid_list);
>  }
>  
> @@ -5910,17 +5896,6 @@ void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
>  	trace_kvm_mmu_invalidate_zap_all_pages(kvm);
>  	kvm->arch.mmu_valid_gen++;
>  
> -	/*
> -	 * Notify all vcpus to reload its shadow page table
> -	 * and flush TLB. Then all vcpus will switch to new
> -	 * shadow page table with the new mmu_valid_gen.
> -	 *
> -	 * Note: we should do this under the protection of
> -	 * mmu-lock, otherwise, vcpu would purge shadow page
> -	 * but miss tlb flush.
> -	 */
> -	kvm_reload_remote_mmus(kvm);
> -
>  	kvm_zap_obsolete_pages(kvm);
>  	spin_unlock(&kvm->mmu_lock);
>  }
> 

I am a bit wary of removing this patch; more precisely, a similar
technique should be applied to kvm_mmu_zap_all after patch 12.

kvm_mmu_zap_all happens when destroying a VM, and I'm afraid that huge
VMs could spend a lot of time in a non-preemptible section in that case.

Paolo

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index df76255c8568..178b47de011f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2202,14 +2202,6 @@  static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 				    struct list_head *invalid_list);
 
-/*
- * NOTE: we should pay more attention on the zapped-obsolete page
- * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk
- * since it has been deleted from active_mmu_pages but still can be found
- * at hast list.
- *
- * for_each_valid_sp() has skipped that kind of pages.
- */
 #define for_each_valid_sp(_kvm, _sp, _gfn)				\
 	hlist_for_each_entry(_sp,					\
 	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
@@ -5871,13 +5863,11 @@  static void kvm_zap_obsolete_pages(struct kvm *kvm)
 		if (sp->role.invalid)
 			continue;
 
-		/*
-		 * Need not flush tlb since we only zap the sp with invalid
-		 * generation number.
-		 */
 		if (batch >= BATCH_ZAP_PAGES &&
-		      cond_resched_lock(&kvm->mmu_lock)) {
+		      (need_resched() || spin_needbreak(&kvm->mmu_lock))) {
 			batch = 0;
+			kvm_mmu_commit_zap_page(kvm, &invalid_list);
+			cond_resched_lock(&kvm->mmu_lock);
 			goto restart;
 		}
 
@@ -5888,10 +5878,6 @@  static void kvm_zap_obsolete_pages(struct kvm *kvm)
 			goto restart;
 	}
 
-	/*
-	 * Should flush tlb before free page tables since lockless-walking
-	 * may use the pages.
-	 */
 	kvm_mmu_commit_zap_page(kvm, &invalid_list);
 }
 
@@ -5910,17 +5896,6 @@  void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
 	trace_kvm_mmu_invalidate_zap_all_pages(kvm);
 	kvm->arch.mmu_valid_gen++;
 
-	/*
-	 * Notify all vcpus to reload its shadow page table
-	 * and flush TLB. Then all vcpus will switch to new
-	 * shadow page table with the new mmu_valid_gen.
-	 *
-	 * Note: we should do this under the protection of
-	 * mmu-lock, otherwise, vcpu would purge shadow page
-	 * but miss tlb flush.
-	 */
-	kvm_reload_remote_mmus(kvm);
-
 	kvm_zap_obsolete_pages(kvm);
 	spin_unlock(&kvm->mmu_lock);
 }