Patchwork [2/6] arm64: add sysfs vulnerability show for meltdown

login
register
mail settings
Submitter Jeremy Linton
Date Dec. 6, 2018, 11:44 p.m.
Message ID <20181206234408.1287689-3-jeremy.linton@arm.com>
Download mbox | patch
Permalink /patch/674839/
State New
Headers show

Comments

Jeremy Linton - Dec. 6, 2018, 11:44 p.m.
Add a simple state machine which will track whether
all the online cores in a machine are vulnerable.

Once that is done we have a fairly authoritative view
of the machine vulnerability, which allows us to make a
judgment about machine safety if it hasn't been mitigated.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)
Jeremy Linton - Dec. 12, 2018, 2:49 p.m.
Hi Julien,

Thanks for taking a look at this!

On 12/13/2018 04:46 AM, Julien Thierry wrote:
> 
> 
> On 13/12/2018 09:23, Julien Thierry wrote:
>> Hi Jeremy,
>>
>> On 06/12/2018 23:44, Jeremy Linton wrote:
>>> Add a simple state machine which will track whether
>>> all the online cores in a machine are vulnerable.
>>>
>>> Once that is done we have a fairly authoritative view
>>> of the machine vulnerability, which allows us to make a
>>> judgment about machine safety if it hasn't been mitigated.
>>>
>>> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
>>> ---
>>>   arch/arm64/kernel/cpufeature.c | 31 ++++++++++++++++++++++++++-----
>>>   1 file changed, 26 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>>> index 242898395f68..bea9adfef7fa 100644
>>> --- a/arch/arm64/kernel/cpufeature.c
>>> +++ b/arch/arm64/kernel/cpufeature.c
>>> @@ -905,6 +905,8 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
>>>   	return has_cpuid_feature(entry, scope);
>>>   }
>>>   
>>> +static enum { A64_MELT_UNSET, A64_MELT_SAFE, A64_MELT_UNKN } __meltdown_safe = A64_MELT_UNSET;
>>> +
>>
>> I'm wondering, do we really need that tri state?
>>
>> Can't we consider that we are safe an move to unsafe/unkown if any cpu
>> during bring up is not in the safe list?
>>
>> The only user of this is cpu_show_meltdown, but I don't imagine it'll
>> get called before unmap_kernel_at_el0() is called for the boot CPU which
>> should initialise that state.
>>
>> Or is there another reason for having that UNSET state?
>>
> 
> Ok, I think I get the point of the UNSET as #ifndef
> CONFIG_UNMAP_KERNEL_AT_EL0 we don't set the state. But does that mean we
> always fall in the "Unknown" case when we don't build kpti in? Is that
> desirable?
> 
> If so, I'd suggest replacing the tri-state with the following change:
> 
> 
>>> +
>>> +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
>>> +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
>>> +		char *buf)
>>> +{
>>> +	if (arm64_kernel_unmapped_at_el0())
>>> +		return sprintf(buf, "Mitigation: KPTI\n");
>>> +
> 
> 	if (!IS_ENABLED(UNMAP_KERNEL_AT_EL0) || !meltdown_safe)
> 		sprintf(buf, "Unknown\n");
> 	else
> 		sprintf(buf, "Not affected\n");

If I'm understanding what your suggesting:

Isn't this only checking the current core, rather than the whole 
machine? IIRC that was the fundamental complaint with the original set.
Julien Thierry - Dec. 13, 2018, 9:23 a.m.
Hi Jeremy,

On 06/12/2018 23:44, Jeremy Linton wrote:
> Add a simple state machine which will track whether
> all the online cores in a machine are vulnerable.
> 
> Once that is done we have a fairly authoritative view
> of the machine vulnerability, which allows us to make a
> judgment about machine safety if it hasn't been mitigated.
> 
> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
> ---
>  arch/arm64/kernel/cpufeature.c | 31 ++++++++++++++++++++++++++-----
>  1 file changed, 26 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 242898395f68..bea9adfef7fa 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -905,6 +905,8 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
>  	return has_cpuid_feature(entry, scope);
>  }
>  
> +static enum { A64_MELT_UNSET, A64_MELT_SAFE, A64_MELT_UNKN } __meltdown_safe = A64_MELT_UNSET;
> +

I'm wondering, do we really need that tri state?

Can't we consider that we are safe an move to unsafe/unkown if any cpu
during bring up is not in the safe list?

The only user of this is cpu_show_meltdown, but I don't imagine it'll
get called before unmap_kernel_at_el0() is called for the boot CPU which
should initialise that state.

Or is there another reason for having that UNSET state?

Thanks,

>  #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
>  static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
>  
> @@ -928,6 +930,15 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
>  {
>  	char const *str = "command line option";
>  
> +	bool meltdown_safe = is_cpu_meltdown_safe() ||
> +		has_cpuid_feature(entry, scope);
> +
> +	/* Only safe if all booted cores are known safe */
> +	if (meltdown_safe && __meltdown_safe == A64_MELT_UNSET)
> +		__meltdown_safe = A64_MELT_SAFE;
> +	else if (!meltdown_safe)
> +		__meltdown_safe = A64_MELT_UNKN;
> +
>  	/*
>  	 * For reasons that aren't entirely clear, enabling KPTI on Cavium
>  	 * ThunderX leads to apparent I-cache corruption of kernel text, which
> @@ -949,11 +960,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
>  	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
>  		return true;
>  
> -	if (is_cpu_meltdown_safe())
> -		return false;
> -
> -	/* Defer to CPU feature registers */
> -	return !has_cpuid_feature(entry, scope);
> +	return !meltdown_safe;
>  }
>  
>  static void
> @@ -1920,3 +1927,17 @@ static int __init enable_mrs_emulation(void)
>  }
>  
>  core_initcall(enable_mrs_emulation);
> +
> +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
> +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
> +		char *buf)
> +{
> +	if (arm64_kernel_unmapped_at_el0())
> +		return sprintf(buf, "Mitigation: KPTI\n");
> +
> +	if (__meltdown_safe == A64_MELT_SAFE)
> +		return sprintf(buf, "Not affected\n");
> +
> +	return sprintf(buf, "Unknown\n");
> +}
> +#endif
>
Julien Thierry - Dec. 13, 2018, 10:46 a.m.
On 13/12/2018 09:23, Julien Thierry wrote:
> Hi Jeremy,
> 
> On 06/12/2018 23:44, Jeremy Linton wrote:
>> Add a simple state machine which will track whether
>> all the online cores in a machine are vulnerable.
>>
>> Once that is done we have a fairly authoritative view
>> of the machine vulnerability, which allows us to make a
>> judgment about machine safety if it hasn't been mitigated.
>>
>> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
>> ---
>>  arch/arm64/kernel/cpufeature.c | 31 ++++++++++++++++++++++++++-----
>>  1 file changed, 26 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>> index 242898395f68..bea9adfef7fa 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -905,6 +905,8 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
>>  	return has_cpuid_feature(entry, scope);
>>  }
>>  
>> +static enum { A64_MELT_UNSET, A64_MELT_SAFE, A64_MELT_UNKN } __meltdown_safe = A64_MELT_UNSET;
>> +
> 
> I'm wondering, do we really need that tri state?
> 
> Can't we consider that we are safe an move to unsafe/unkown if any cpu
> during bring up is not in the safe list?
> 
> The only user of this is cpu_show_meltdown, but I don't imagine it'll
> get called before unmap_kernel_at_el0() is called for the boot CPU which
> should initialise that state.
> 
> Or is there another reason for having that UNSET state?
> 

Ok, I think I get the point of the UNSET as #ifndef
CONFIG_UNMAP_KERNEL_AT_EL0 we don't set the state. But does that mean we
always fall in the "Unknown" case when we don't build kpti in? Is that
desirable?

If so, I'd suggest replacing the tri-state with the following change:


>> +
>> +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
>> +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
>> +		char *buf)
>> +{
>> +	if (arm64_kernel_unmapped_at_el0())
>> +		return sprintf(buf, "Mitigation: KPTI\n");
>> +

	if (!IS_ENABLED(UNMAP_KERNEL_AT_EL0) || !meltdown_safe)
		sprintf(buf, "Unknown\n");
	else
		sprintf(buf, "Not affected\n");


Thanks,
Julien Thierry - Dec. 14, 2018, 8:55 a.m.
Hi Jeremy,

On 12/12/2018 14:49, Jeremy Linton wrote:
> Hi Julien,
> 
> Thanks for taking a look at this!
> 
> On 12/13/2018 04:46 AM, Julien Thierry wrote:
>>
>>
>> On 13/12/2018 09:23, Julien Thierry wrote:
>>> Hi Jeremy,
>>>
>>> On 06/12/2018 23:44, Jeremy Linton wrote:
>>>> Add a simple state machine which will track whether
>>>> all the online cores in a machine are vulnerable.
>>>>
>>>> Once that is done we have a fairly authoritative view
>>>> of the machine vulnerability, which allows us to make a
>>>> judgment about machine safety if it hasn't been mitigated.
>>>>
>>>> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
>>>> ---
>>>>   arch/arm64/kernel/cpufeature.c | 31 ++++++++++++++++++++++++++-----
>>>>   1 file changed, 26 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/kernel/cpufeature.c
>>>> b/arch/arm64/kernel/cpufeature.c
>>>> index 242898395f68..bea9adfef7fa 100644
>>>> --- a/arch/arm64/kernel/cpufeature.c
>>>> +++ b/arch/arm64/kernel/cpufeature.c
>>>> @@ -905,6 +905,8 @@ has_useable_cnp(const struct
>>>> arm64_cpu_capabilities *entry, int scope)
>>>>       return has_cpuid_feature(entry, scope);
>>>>   }
>>>>   +static enum { A64_MELT_UNSET, A64_MELT_SAFE, A64_MELT_UNKN }
>>>> __meltdown_safe = A64_MELT_UNSET;
>>>> +
>>>
>>> I'm wondering, do we really need that tri state?
>>>
>>> Can't we consider that we are safe an move to unsafe/unkown if any cpu
>>> during bring up is not in the safe list?
>>>
>>> The only user of this is cpu_show_meltdown, but I don't imagine it'll
>>> get called before unmap_kernel_at_el0() is called for the boot CPU which
>>> should initialise that state.
>>>
>>> Or is there another reason for having that UNSET state?
>>>
>>
>> Ok, I think I get the point of the UNSET as #ifndef
>> CONFIG_UNMAP_KERNEL_AT_EL0 we don't set the state. But does that mean we
>> always fall in the "Unknown" case when we don't build kpti in? Is that
>> desirable?
>>
>> If so, I'd suggest replacing the tri-state with the following change:
>>
>>
>>>> +
>>>> +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
>>>> +ssize_t cpu_show_meltdown(struct device *dev, struct
>>>> device_attribute *attr,
>>>> +        char *buf)
>>>> +{
>>>> +    if (arm64_kernel_unmapped_at_el0())
>>>> +        return sprintf(buf, "Mitigation: KPTI\n");
>>>> +
>>
>>     if (!IS_ENABLED(UNMAP_KERNEL_AT_EL0) || !meltdown_safe)
>>         sprintf(buf, "Unknown\n");
>>     else
>>         sprintf(buf, "Not affected\n");
> 
> If I'm understanding what your suggesting:
> 
> Isn't this only checking the current core, rather than the whole
> machine? IIRC that was the fundamental complaint with the original set.
> 

Sorry, yes, I meant to check "!__meltdown_safe". Basically my suggestion
is to replace the static enum variable with a static bool and handle the
"UNSET" case on whether we built the mitigation.

Does that make sense?

However, there's still the same issue as with patch 4. If we don't build
the mitigation, we say that we don't know the status of the system. I
think it would be nice to be able to say that a system is safe even when
the mitigation is not built. People knowing they have a safe system
might be inclined to not build additional stuff they don't need.

Cheers.

Patch

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 242898395f68..bea9adfef7fa 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -905,6 +905,8 @@  has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
 	return has_cpuid_feature(entry, scope);
 }
 
+static enum { A64_MELT_UNSET, A64_MELT_SAFE, A64_MELT_UNKN } __meltdown_safe = A64_MELT_UNSET;
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 
@@ -928,6 +930,15 @@  static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 {
 	char const *str = "command line option";
 
+	bool meltdown_safe = is_cpu_meltdown_safe() ||
+		has_cpuid_feature(entry, scope);
+
+	/* Only safe if all booted cores are known safe */
+	if (meltdown_safe && __meltdown_safe == A64_MELT_UNSET)
+		__meltdown_safe = A64_MELT_SAFE;
+	else if (!meltdown_safe)
+		__meltdown_safe = A64_MELT_UNKN;
+
 	/*
 	 * For reasons that aren't entirely clear, enabling KPTI on Cavium
 	 * ThunderX leads to apparent I-cache corruption of kernel text, which
@@ -949,11 +960,7 @@  static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
 		return true;
 
-	if (is_cpu_meltdown_safe())
-		return false;
-
-	/* Defer to CPU feature registers */
-	return !has_cpuid_feature(entry, scope);
+	return !meltdown_safe;
 }
 
 static void
@@ -1920,3 +1927,17 @@  static int __init enable_mrs_emulation(void)
 }
 
 core_initcall(enable_mrs_emulation);
+
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	if (arm64_kernel_unmapped_at_el0())
+		return sprintf(buf, "Mitigation: KPTI\n");
+
+	if (__meltdown_safe == A64_MELT_SAFE)
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "Unknown\n");
+}
+#endif