Patchwork KVM: PPC: Book3S HV: Load LPID and Flush TLB on Secondary Radix Threads

login
register
mail settings
Submitter Suraj Jitindar Singh
Date March 15, 2019, 5:07 a.m.
Message ID <20190315050704.30736-1-sjitindarsingh@gmail.com>
Download mbox | patch
Permalink /patch/749289/
State New
Headers show

Comments

Suraj Jitindar Singh - March 15, 2019, 5:07 a.m.
When running with independent threads off the main thread is responsible
for collecting secondary threads on the core and giving them vcpus to
run. These secondary threads spin in kvm_no_guest until they are given a
vcore to run and then take the kvmppc_hv_entry entry path from
kvm_secondary_got_guest. However this entry path doesn't load the guest
LPID for a radix guest, assuming it has already been loaded in
kvmppc_radix_check_need_tlb_flush() which would be called from
kvmppc_run_core when running in independent threads on mode. This means
that the guest runs with the host LPID which leads to an unhandleable mmu
fault giving:

KVM: Got unsupported MMU fault
error: kvm run failed Bad address
NIP c000000000028d50   LR c00000000004cad4 CTR 0000000000000000 XER 0000000000000000 CPU#0
MSR 8000000002089033 HID0 0000000000000000  HF 8000000000000000 iidx 3 didx 3
TB 00000000 00000000 DECR 0
GPR00 c00000000004cad4 c00000003e543a10 c00000000105ea00 000000000000c800
GPR04 0000000000000002 0000000000000000 0000000000000000 0000000000000000
GPR08 0000000000000000 000000000000a001 00000000a77784ea 0000000002001001
GPR12 0000000000000000 c000000001240000 c000000000010b50 0000000000000000
GPR16 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20 0000000000000000 0000000000000000 0000000000000000 c00000003fe7c800
GPR24 0000000000000000 0000000000000004 c0000000011044f8 0000000000000001
GPR28 0000000000000001 0000000000000000 c0000000011044fc 000000000000c336
CR 48000424  [ G  L  -  -  -  G  E  G  ]             RES ffffffffffffffff
 SRR0 c00000000000e708  SRR1 8000000002009033    PVR 00000000004e1202 VRSAVE 0000000000000000
SPRG0 0000000000000000 SPRG1 c000000001240000  SPRG2 0000000000000000  SPRG3 0000000000000000
SPRG4 0000000000000000 SPRG5 0000000000000000  SPRG6 0000000000000000  SPRG7 0000000000000000
HSRR0 0000000000000000 HSRR1 0000000000000000
 CFAR 0000000000000000
 LPCR 0000000003d6f41f
 PTCR 0000000000000000   DAR 0000000000000000  DSISR 0000000000000000

Also we fail to perform the tlb flush if this vcpu has moved to this
core from another physical core which could result in stale tlb entries
being used.

To fix this we call kvmppc_radix_check_need_tlb_flush() from the
secondary entry path as well. This will both set the guest LPID and
check if the tlb needs to be flushed.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
---
 arch/powerpc/include/asm/kvm_ppc.h      |  2 ++
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kvm/book3s_hv.c            | 41 +++++----------------------------
 arch/powerpc/kvm/book3s_hv_builtin.c    | 32 +++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 25 ++++++++++++--------
 5 files changed, 57 insertions(+), 44 deletions(-)
Paul Mackerras - March 18, 2019, 5:33 a.m.
On Fri, Mar 15, 2019 at 04:07:04PM +1100, Suraj Jitindar Singh wrote:
> When running with independent threads off the main thread is responsible
> for collecting secondary threads on the core and giving them vcpus to
> run. These secondary threads spin in kvm_no_guest until they are given a
> vcore to run and then take the kvmppc_hv_entry entry path from
> kvm_secondary_got_guest. However this entry path doesn't load the guest
> LPID for a radix guest, assuming it has already been loaded in
> kvmppc_radix_check_need_tlb_flush() which would be called from
> kvmppc_run_core when running in independent threads on mode. This means
> that the guest runs with the host LPID which leads to an unhandleable mmu
> fault giving:
> 
> KVM: Got unsupported MMU fault
> error: kvm run failed Bad address
> NIP c000000000028d50   LR c00000000004cad4 CTR 0000000000000000 XER 0000000000000000 CPU#0
> MSR 8000000002089033 HID0 0000000000000000  HF 8000000000000000 iidx 3 didx 3
> TB 00000000 00000000 DECR 0
> GPR00 c00000000004cad4 c00000003e543a10 c00000000105ea00 000000000000c800
> GPR04 0000000000000002 0000000000000000 0000000000000000 0000000000000000
> GPR08 0000000000000000 000000000000a001 00000000a77784ea 0000000002001001
> GPR12 0000000000000000 c000000001240000 c000000000010b50 0000000000000000
> GPR16 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> GPR20 0000000000000000 0000000000000000 0000000000000000 c00000003fe7c800
> GPR24 0000000000000000 0000000000000004 c0000000011044f8 0000000000000001
> GPR28 0000000000000001 0000000000000000 c0000000011044fc 000000000000c336
> CR 48000424  [ G  L  -  -  -  G  E  G  ]             RES ffffffffffffffff
>  SRR0 c00000000000e708  SRR1 8000000002009033    PVR 00000000004e1202 VRSAVE 0000000000000000
> SPRG0 0000000000000000 SPRG1 c000000001240000  SPRG2 0000000000000000  SPRG3 0000000000000000
> SPRG4 0000000000000000 SPRG5 0000000000000000  SPRG6 0000000000000000  SPRG7 0000000000000000
> HSRR0 0000000000000000 HSRR1 0000000000000000
>  CFAR 0000000000000000
>  LPCR 0000000003d6f41f
>  PTCR 0000000000000000   DAR 0000000000000000  DSISR 0000000000000000
> 
> Also we fail to perform the tlb flush if this vcpu has moved to this
> core from another physical core which could result in stale tlb entries
> being used.
> 
> To fix this we call kvmppc_radix_check_need_tlb_flush() from the
> secondary entry path as well. This will both set the guest LPID and
> check if the tlb needs to be flushed.
> 
> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>

Couple of comments below...

> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index 9ffc72ded73a..4af4401d50bd 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -557,6 +557,7 @@ int main(void)
>  	OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
>  	OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
>  	OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
> +	OFFSET(VCORE_PCPU, kvmppc_vcore, pcpu);

Not sure we need this, see below.

> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -2464,6 +2464,10 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
>  	else
>  		prev_cpu = vcpu->arch.prev_cpu;
>  
> +	/* TLB shared between threads so we use a single bit for all threads */
> +	pcpu = cpu_first_thread_sibling(pcpu);
> +	prev_cpu = cpu_first_thread_sibling(prev_cpu);

prev_cpu could be -1, and it's not clear what effect calling
cpu_first_thread_sibling() should have, or whether the result is
necessarily negative.  It's probably best to add an if (prev_cpu >= 0)
before that line.

> @@ -3044,7 +3015,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  	if (kvm_is_radix(vc->kvm)) {
>  		for (sub = 0; sub < core_info.n_subcores; ++sub)
>  			for_each_runnable_thread(i, vcpu, core_info.vc[sub])
> -				kvmppc_prepare_radix_vcpu(vcpu, pcpu);
> +				kvmppc_prepare_radix_vcpu(vcpu, pcpu + sub);

This change seems to be unnecessary since we would expect that
cpu_first_thread_sibling(pcpu + sub) == cpu_first_thread_sibling(pcpu)
for all of the subcores.  Is that not true, or is there something else
I've missed?  If so it would be good to explain it in the patch
description.

> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index a71e2fc00a4e..e1fb00e647a5 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -805,3 +805,35 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
>  		vcpu->arch.doorbell_request = 0;
>  	}
>  }
> +
> +void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
> +				       struct kvm_nested_guest *nested)
> +{
> +	cpumask_t *need_tlb_flush;
> +	int lpid;
> +
> +	if (!cpu_has_feature(CPU_FTR_HVMODE))
> +		return;
> +
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		pcpu = cpu_first_thread_sibling(pcpu);

I *think* this line is the only one that's different from the original
version.  It would help review if you say in the patch description
that you have moved the function so that it can be called from
built-in code, and what changes you have made to it on the way.

> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 9b8d50a7cbaf..41628b67b88c 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -369,14 +369,6 @@ kvm_secondary_got_guest:
>  	cmpdi	r6, 0
>  	beq	63f
>  BEGIN_FTR_SECTION
> -	ld	r0, KVM_SPLIT_RPR(r6)
> -	mtspr	SPRN_RPR, r0
> -	ld	r0, KVM_SPLIT_PMMAR(r6)
> -	mtspr	SPRN_PMMAR, r0
> -	ld	r0, KVM_SPLIT_LDBAR(r6)
> -	mtspr	SPRN_LDBAR, r0
> -	isync
> -FTR_SECTION_ELSE
>  	/* On P9 we use the split_info for coordinating LPCR changes */
>  	lwz	r4, KVM_SPLIT_DO_SET(r6)
>  	cmpwi	r4, 0
> @@ -384,8 +376,23 @@ FTR_SECTION_ELSE
>  	mr	r3, r6
>  	bl	kvmhv_p9_set_lpcr
>  	nop
> +	b	2f
>  1:
> -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
> +	ld	r3, VCORE_KVM(r5)	/* pointer to struct kvm */
> +	ld	r4, VCORE_PCPU(r5)	/* physical cpu number */

Isn't this always going to be paca->paca_index for the current CPU
thread?  Can't you just lhz r4, PACAPACAINDEX(r13) instead?  Though I
don't suppose it actually makes all that much difference.

Paul.

Patch

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index eb0d79f0ca45..0defe17ff3b3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -476,6 +476,8 @@  static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
 extern void kvm_hv_vm_activated(void);
 extern void kvm_hv_vm_deactivated(void);
 extern bool kvm_hv_mode_active(void);
+extern void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+					      struct kvm_nested_guest *nested);
 
 #else
 static inline void __init kvm_cma_reserve(void)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 9ffc72ded73a..4af4401d50bd 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -557,6 +557,7 @@  int main(void)
 	OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
 	OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
 	OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
+	OFFSET(VCORE_PCPU, kvmppc_vcore, pcpu);
 	OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
 	OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
 	OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5a066fc299e1..1fd0920ae57d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2464,6 +2464,10 @@  static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
 	else
 		prev_cpu = vcpu->arch.prev_cpu;
 
+	/* TLB shared between threads so we use a single bit for all threads */
+	pcpu = cpu_first_thread_sibling(pcpu);
+	prev_cpu = cpu_first_thread_sibling(prev_cpu);
+
 	/*
 	 * With radix, the guest can do TLB invalidations itself,
 	 * and it could choose to use the local form (tlbiel) if
@@ -2477,9 +2481,7 @@  static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
 	 * so we use a single bit in .need_tlb_flush for all 4 threads.
 	 */
 	if (prev_cpu != pcpu) {
-		if (prev_cpu >= 0 &&
-		    cpu_first_thread_sibling(prev_cpu) !=
-		    cpu_first_thread_sibling(pcpu))
+		if (prev_cpu >= 0)
 			radix_flush_cpu(kvm, prev_cpu, vcpu);
 		if (nested)
 			nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
@@ -2488,37 +2490,6 @@  static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
 	}
 }
 
-static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
-					      struct kvm_nested_guest *nested)
-{
-	cpumask_t *need_tlb_flush;
-	int lpid;
-
-	if (!cpu_has_feature(CPU_FTR_HVMODE))
-		return;
-
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		pcpu &= ~0x3UL;
-
-	if (nested) {
-		lpid = nested->shadow_lpid;
-		need_tlb_flush = &nested->need_tlb_flush;
-	} else {
-		lpid = kvm->arch.lpid;
-		need_tlb_flush = &kvm->arch.need_tlb_flush;
-	}
-
-	mtspr(SPRN_LPID, lpid);
-	isync();
-	smp_mb();
-
-	if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
-		radix__local_flush_tlb_lpid_guest(lpid);
-		/* Clear the bit after the TLB flush */
-		cpumask_clear_cpu(pcpu, need_tlb_flush);
-	}
-}
-
 static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
 	int cpu;
@@ -3044,7 +3015,7 @@  static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	if (kvm_is_radix(vc->kvm)) {
 		for (sub = 0; sub < core_info.n_subcores; ++sub)
 			for_each_runnable_thread(i, vcpu, core_info.vc[sub])
-				kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+				kvmppc_prepare_radix_vcpu(vcpu, pcpu + sub);
 	}
 
 	/*
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index a71e2fc00a4e..e1fb00e647a5 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -805,3 +805,35 @@  void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
 		vcpu->arch.doorbell_request = 0;
 	}
 }
+
+void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+				       struct kvm_nested_guest *nested)
+{
+	cpumask_t *need_tlb_flush;
+	int lpid;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		pcpu = cpu_first_thread_sibling(pcpu);
+
+	if (nested) {
+		lpid = nested->shadow_lpid;
+		need_tlb_flush = &nested->need_tlb_flush;
+	} else {
+		lpid = kvm->arch.lpid;
+		need_tlb_flush = &kvm->arch.need_tlb_flush;
+	}
+
+	mtspr(SPRN_LPID, lpid);
+	isync();
+	smp_mb();
+
+	if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
+		radix__local_flush_tlb_lpid_guest(lpid);
+		/* Clear the bit after the TLB flush */
+		cpumask_clear_cpu(pcpu, need_tlb_flush);
+	}
+}
+EXPORT_SYMBOL_GPL(kvmppc_radix_check_need_tlb_flush);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9b8d50a7cbaf..41628b67b88c 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -369,14 +369,6 @@  kvm_secondary_got_guest:
 	cmpdi	r6, 0
 	beq	63f
 BEGIN_FTR_SECTION
-	ld	r0, KVM_SPLIT_RPR(r6)
-	mtspr	SPRN_RPR, r0
-	ld	r0, KVM_SPLIT_PMMAR(r6)
-	mtspr	SPRN_PMMAR, r0
-	ld	r0, KVM_SPLIT_LDBAR(r6)
-	mtspr	SPRN_LDBAR, r0
-	isync
-FTR_SECTION_ELSE
 	/* On P9 we use the split_info for coordinating LPCR changes */
 	lwz	r4, KVM_SPLIT_DO_SET(r6)
 	cmpwi	r4, 0
@@ -384,8 +376,23 @@  FTR_SECTION_ELSE
 	mr	r3, r6
 	bl	kvmhv_p9_set_lpcr
 	nop
+	b	2f
 1:
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+	ld	r3, VCORE_KVM(r5)	/* pointer to struct kvm */
+	ld	r4, VCORE_PCPU(r5)	/* physical cpu number */
+	li	r5, 0			/* nested = NULL */
+	bl	kvmppc_radix_check_need_tlb_flush
+	nop
+2:
+FTR_SECTION_ELSE
+	ld	r0, KVM_SPLIT_RPR(r6)
+	mtspr	SPRN_RPR, r0
+	ld	r0, KVM_SPLIT_PMMAR(r6)
+	mtspr	SPRN_PMMAR, r0
+	ld	r0, KVM_SPLIT_LDBAR(r6)
+	mtspr	SPRN_LDBAR, r0
+	isync
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 63:
 	/* Order load of vcpu after load of vcore */
 	lwsync