Patchwork iommu: Add a quirk for ARM Mali midgard MMU

login
register
mail settings
Submitter Rob Herring
Date Feb. 26, 2019, 6:17 p.m.
Message ID <20190226181716.14556-1-robh@kernel.org>
Download mbox | patch
Permalink /patch/736329/
State New
Headers show

Comments

Rob Herring - Feb. 26, 2019, 6:17 p.m.
ARM Mali midgard GPUs have a few differences from standard 64-bit
stage 1 page tables.

The 3rd level page entry bits are 0x1 instead of 0x3 for page entries.

The access flags are not read-only and unprivileged, but read and write.
This is similar to stage 2 entries, but the memory attributes field matches
stage 1 being an index.

The nG bit is also not set by the vendor driver, but that one doesn't
seem to matter.

Add a quirk to handle all of these differences.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: iommu@lists.linux-foundation.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++------------
 include/linux/io-pgtable.h     |  4 +++
 2 files changed, 37 insertions(+), 18 deletions(-)
Robin Murphy - Feb. 26, 2019, 7:25 p.m.
Hi Rob,

On 26/02/2019 18:17, Rob Herring wrote:
> ARM Mali midgard GPUs have a few differences from standard 64-bit
> stage 1 page tables.
> 
> The 3rd level page entry bits are 0x1 instead of 0x3 for page entries.
> 
> The access flags are not read-only and unprivileged, but read and write.
> This is similar to stage 2 entries, but the memory attributes field matches
> stage 1 being an index.
> 
> The nG bit is also not set by the vendor driver, but that one doesn't
> seem to matter.
> 
> Add a quirk to handle all of these differences.

 From the look of these changes, this isn't a quirk but a distinct 
format. AFAICS from the mali_kbase driver, this must be "LPAE mode" 
rather than "AArch64 mode", so it seems unlikely that it really supports 
the full VMSAv8 gamut of granules, address sizes, and page sizes that 
this patch will happily let through.

Robin.

> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Robin Murphy <robin.murphy@arm.com>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: iommu@lists.linux-foundation.org
> Signed-off-by: Rob Herring <robh@kernel.org>
> ---
>   drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++------------
>   include/linux/io-pgtable.h     |  4 +++
>   2 files changed, 37 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index d3700ec15cbd..ff6b29fdf38f 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -180,11 +180,6 @@
>   
>   #define iopte_prot(pte)	((pte) & ARM_LPAE_PTE_ATTR_MASK)
>   
> -#define iopte_leaf(pte,l)					\
> -	(l == (ARM_LPAE_MAX_LEVELS - 1) ?			\
> -		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) :	\
> -		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK))
> -
>   struct arm_lpae_io_pgtable {
>   	struct io_pgtable	iop;
>   
> @@ -198,6 +193,15 @@ struct arm_lpae_io_pgtable {
>   
>   typedef u64 arm_lpae_iopte;
>   
> +static inline bool iopte_leaf(arm_lpae_iopte pte, int l, unsigned long quirks)
> +{
> +	if ((l == (ARM_LPAE_MAX_LEVELS - 1)) &&
> +	    !(quirks & IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD))
> +		return iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE;
> +
> +	return iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK;
> +}
> +
>   static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
>   				     struct arm_lpae_io_pgtable *data)
>   {
> @@ -304,11 +308,14 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
>   		pte |= ARM_LPAE_PTE_NS;
>   
>   	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
> -		pte |= ARM_LPAE_PTE_TYPE_PAGE;
> +		pte |= (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD) ?
> +			ARM_LPAE_PTE_TYPE_BLOCK : ARM_LPAE_PTE_TYPE_PAGE;
>   	else
>   		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
>   
> -	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
> +	if (!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD))
> +		pte |= ARM_LPAE_PTE_AF;
> +	pte |= ARM_LPAE_PTE_SH_IS;
>   	pte |= paddr_to_iopte(paddr, data);
>   
>   	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
> @@ -321,7 +328,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
>   {
>   	arm_lpae_iopte pte = *ptep;
>   
> -	if (iopte_leaf(pte, lvl)) {
> +	if (iopte_leaf(pte, lvl, data->iop.cfg.quirks)) {
>   		/* We require an unmap first */
>   		WARN_ON(!selftest_running);
>   		return -EEXIST;
> @@ -409,7 +416,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
>   		__arm_lpae_sync_pte(ptep, cfg);
>   	}
>   
> -	if (pte && !iopte_leaf(pte, lvl)) {
> +	if (pte && !iopte_leaf(pte, lvl, cfg->quirks)) {
>   		cptep = iopte_deref(pte, data);
>   	} else if (pte) {
>   		/* We require an unmap first */
> @@ -430,12 +437,19 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
>   	    data->iop.fmt == ARM_32_LPAE_S1) {
>   		pte = ARM_LPAE_PTE_nG;
>   
> -		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
> -			pte |= ARM_LPAE_PTE_AP_RDONLY;
> +		if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD) {
> +			if (prot & IOMMU_WRITE)
> +				pte |= ARM_LPAE_PTE_AP_RDONLY;
>   
> -		if (!(prot & IOMMU_PRIV))
> -			pte |= ARM_LPAE_PTE_AP_UNPRIV;
> +			if (prot & IOMMU_READ)
> +				pte |= ARM_LPAE_PTE_AP_UNPRIV;
> +		} else {
> +			if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
> +				pte |= ARM_LPAE_PTE_AP_RDONLY;
>   
> +			if (!(prot & IOMMU_PRIV))
> +				pte |= ARM_LPAE_PTE_AP_UNPRIV;
> +		}
>   		if (prot & IOMMU_MMIO)
>   			pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
>   				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
> @@ -511,7 +525,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
>   	while (ptep != end) {
>   		arm_lpae_iopte pte = *ptep++;
>   
> -		if (!pte || iopte_leaf(pte, lvl))
> +		if (!pte || iopte_leaf(pte, lvl, data->iop.cfg.quirks))
>   			continue;
>   
>   		__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
> @@ -602,7 +616,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
>   	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
>   		__arm_lpae_set_pte(ptep, 0, &iop->cfg);
>   
> -		if (!iopte_leaf(pte, lvl)) {
> +		if (!iopte_leaf(pte, lvl, iop->cfg.quirks)) {
>   			/* Also flush any partial walks */
>   			io_pgtable_tlb_add_flush(iop, iova, size,
>   						ARM_LPAE_GRANULE(data), false);
> @@ -621,7 +635,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
>   		}
>   
>   		return size;
> -	} else if (iopte_leaf(pte, lvl)) {
> +	} else if (iopte_leaf(pte, lvl, iop->cfg.quirks)) {
>   		/*
>   		 * Insert a table at the next level to map the old region,
>   		 * minus the part we want to unmap
> @@ -669,7 +683,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
>   			return 0;
>   
>   		/* Leaf entry? */
> -		if (iopte_leaf(pte,lvl))
> +		if (iopte_leaf(pte,lvl, data->iop.cfg.quirks))
>   			goto found_translation;
>   
>   		/* Take it to the next level */
> @@ -779,7 +793,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
>   	struct arm_lpae_io_pgtable *data;
>   
>   	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
> -			    IO_PGTABLE_QUIRK_NON_STRICT))
> +			    IO_PGTABLE_QUIRK_NON_STRICT |
> +			    IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD))
>   		return NULL;
>   
>   	data = arm_lpae_alloc_pgtable(cfg);
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index 47d5ae559329..eed037423331 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -75,6 +75,9 @@ struct io_pgtable_cfg {
>   	 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
>   	 *	on unmap, for DMA domains using the flush queue mechanism for
>   	 *	delayed invalidation.
> +	 *
> +	 * IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD: ARM Mali Midgard MMU has different
> +	 *	mapping of access flags and PTE page bits.
>   	 */
>   	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
>   	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
> @@ -82,6 +85,7 @@ struct io_pgtable_cfg {
>   	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
>   	#define IO_PGTABLE_QUIRK_NO_DMA		BIT(4)
>   	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(5)
> +	#define IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD BIT(6)
>   	unsigned long			quirks;
>   	unsigned long			pgsize_bitmap;
>   	unsigned int			ias;
>
Rob Herring - Feb. 26, 2019, 8:05 p.m.
On Tue, Feb 26, 2019 at 1:25 PM Robin Murphy <robin.murphy@arm.com> wrote:
>
> Hi Rob,
>
> On 26/02/2019 18:17, Rob Herring wrote:
> > ARM Mali midgard GPUs have a few differences from standard 64-bit
> > stage 1 page tables.
> >
> > The 3rd level page entry bits are 0x1 instead of 0x3 for page entries.
> >
> > The access flags are not read-only and unprivileged, but read and write.
> > This is similar to stage 2 entries, but the memory attributes field matches
> > stage 1 being an index.
> >
> > The nG bit is also not set by the vendor driver, but that one doesn't
> > seem to matter.
> >
> > Add a quirk to handle all of these differences.
>
>  From the look of these changes, this isn't a quirk but a distinct
> format. AFAICS from the mali_kbase driver, this must be "LPAE mode"
> rather than "AArch64 mode", so it seems unlikely that it really supports
> the full VMSAv8 gamut of granules, address sizes, and page sizes that
> this patch will happily let through.

Right, but the page size bitmap and the in and out address sizes in
the config struct should be enough to restrict those.

What do you propose? Add another init function which hardcodes all
those or add some checks of allowed settings?

Rob
Robin Murphy - Feb. 27, 2019, 12:54 a.m.
On 2019-02-26 8:05 pm, Rob Herring wrote:
> On Tue, Feb 26, 2019 at 1:25 PM Robin Murphy <robin.murphy@arm.com> wrote:
>>
>> Hi Rob,
>>
>> On 26/02/2019 18:17, Rob Herring wrote:
>>> ARM Mali midgard GPUs have a few differences from standard 64-bit
>>> stage 1 page tables.
>>>
>>> The 3rd level page entry bits are 0x1 instead of 0x3 for page entries.
>>>
>>> The access flags are not read-only and unprivileged, but read and write.
>>> This is similar to stage 2 entries, but the memory attributes field matches
>>> stage 1 being an index.
>>>
>>> The nG bit is also not set by the vendor driver, but that one doesn't
>>> seem to matter.
>>>
>>> Add a quirk to handle all of these differences.
>>
>>   From the look of these changes, this isn't a quirk but a distinct
>> format. AFAICS from the mali_kbase driver, this must be "LPAE mode"
>> rather than "AArch64 mode", so it seems unlikely that it really supports
>> the full VMSAv8 gamut of granules, address sizes, and page sizes that
>> this patch will happily let through.
> 
> Right, but the page size bitmap and the in and out address sizes in
> the config struct should be enough to restrict those.
> 
> What do you propose? Add another init function which hardcodes all
> those or add some checks of allowed settings?

Or rather somewhere in between, i.e. add a new io_pgtable_fmt with an 
init function which sanity checks/constrains the request appropriately 
(like the existing ones do), and which could at least help make 
arm_lpae_pte_to_prot() less of a mess.

We don't treat v7 LPAE as a quirk of v8 AArch64, even though we 
implement it as a strict subset, and neither do we treat stage 2 as a 
quirk of stage 1 (or vice versa). This Midgard "LPAE" format appears 
more different from any of the VMSA long-descriptor formats than they 
are from each other - it certainly doesn't seem like a case of someone 
aiming to implement AArch64 stage 1 but getting one or two details 
slightly wrong.

(I know I implemented Mediatek's Frankein-short-descriptor as a set of 
quirks rather than introducing multiple v7s formats, but that's largely 
because it's the only real user of that code, and most of them are 
generic enough that they could well be common to other formats in future)

Robin.

Patch

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index d3700ec15cbd..ff6b29fdf38f 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -180,11 +180,6 @@ 
 
 #define iopte_prot(pte)	((pte) & ARM_LPAE_PTE_ATTR_MASK)
 
-#define iopte_leaf(pte,l)					\
-	(l == (ARM_LPAE_MAX_LEVELS - 1) ?			\
-		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) :	\
-		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK))
-
 struct arm_lpae_io_pgtable {
 	struct io_pgtable	iop;
 
@@ -198,6 +193,15 @@  struct arm_lpae_io_pgtable {
 
 typedef u64 arm_lpae_iopte;
 
+static inline bool iopte_leaf(arm_lpae_iopte pte, int l, unsigned long quirks)
+{
+	if ((l == (ARM_LPAE_MAX_LEVELS - 1)) &&
+	    !(quirks & IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD))
+		return iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE;
+
+	return iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK;
+}
+
 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
 				     struct arm_lpae_io_pgtable *data)
 {
@@ -304,11 +308,14 @@  static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 		pte |= ARM_LPAE_PTE_NS;
 
 	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
-		pte |= ARM_LPAE_PTE_TYPE_PAGE;
+		pte |= (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD) ?
+			ARM_LPAE_PTE_TYPE_BLOCK : ARM_LPAE_PTE_TYPE_PAGE;
 	else
 		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
-	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
+	if (!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD))
+		pte |= ARM_LPAE_PTE_AF;
+	pte |= ARM_LPAE_PTE_SH_IS;
 	pte |= paddr_to_iopte(paddr, data);
 
 	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
@@ -321,7 +328,7 @@  static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 {
 	arm_lpae_iopte pte = *ptep;
 
-	if (iopte_leaf(pte, lvl)) {
+	if (iopte_leaf(pte, lvl, data->iop.cfg.quirks)) {
 		/* We require an unmap first */
 		WARN_ON(!selftest_running);
 		return -EEXIST;
@@ -409,7 +416,7 @@  static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		__arm_lpae_sync_pte(ptep, cfg);
 	}
 
-	if (pte && !iopte_leaf(pte, lvl)) {
+	if (pte && !iopte_leaf(pte, lvl, cfg->quirks)) {
 		cptep = iopte_deref(pte, data);
 	} else if (pte) {
 		/* We require an unmap first */
@@ -430,12 +437,19 @@  static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 	    data->iop.fmt == ARM_32_LPAE_S1) {
 		pte = ARM_LPAE_PTE_nG;
 
-		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
-			pte |= ARM_LPAE_PTE_AP_RDONLY;
+		if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD) {
+			if (prot & IOMMU_WRITE)
+				pte |= ARM_LPAE_PTE_AP_RDONLY;
 
-		if (!(prot & IOMMU_PRIV))
-			pte |= ARM_LPAE_PTE_AP_UNPRIV;
+			if (prot & IOMMU_READ)
+				pte |= ARM_LPAE_PTE_AP_UNPRIV;
+		} else {
+			if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
+				pte |= ARM_LPAE_PTE_AP_RDONLY;
 
+			if (!(prot & IOMMU_PRIV))
+				pte |= ARM_LPAE_PTE_AP_UNPRIV;
+		}
 		if (prot & IOMMU_MMIO)
 			pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
 				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
@@ -511,7 +525,7 @@  static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 	while (ptep != end) {
 		arm_lpae_iopte pte = *ptep++;
 
-		if (!pte || iopte_leaf(pte, lvl))
+		if (!pte || iopte_leaf(pte, lvl, data->iop.cfg.quirks))
 			continue;
 
 		__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
@@ -602,7 +616,7 @@  static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
 		__arm_lpae_set_pte(ptep, 0, &iop->cfg);
 
-		if (!iopte_leaf(pte, lvl)) {
+		if (!iopte_leaf(pte, lvl, iop->cfg.quirks)) {
 			/* Also flush any partial walks */
 			io_pgtable_tlb_add_flush(iop, iova, size,
 						ARM_LPAE_GRANULE(data), false);
@@ -621,7 +635,7 @@  static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 		}
 
 		return size;
-	} else if (iopte_leaf(pte, lvl)) {
+	} else if (iopte_leaf(pte, lvl, iop->cfg.quirks)) {
 		/*
 		 * Insert a table at the next level to map the old region,
 		 * minus the part we want to unmap
@@ -669,7 +683,7 @@  static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 			return 0;
 
 		/* Leaf entry? */
-		if (iopte_leaf(pte,lvl))
+		if (iopte_leaf(pte,lvl, data->iop.cfg.quirks))
 			goto found_translation;
 
 		/* Take it to the next level */
@@ -779,7 +793,8 @@  arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	struct arm_lpae_io_pgtable *data;
 
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
-			    IO_PGTABLE_QUIRK_NON_STRICT))
+			    IO_PGTABLE_QUIRK_NON_STRICT |
+			    IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD))
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 47d5ae559329..eed037423331 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -75,6 +75,9 @@  struct io_pgtable_cfg {
 	 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
 	 *	on unmap, for DMA domains using the flush queue mechanism for
 	 *	delayed invalidation.
+	 *
+	 * IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD: ARM Mali Midgard MMU has different
+	 *	mapping of access flags and PTE page bits.
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
@@ -82,6 +85,7 @@  struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
 	#define IO_PGTABLE_QUIRK_NO_DMA		BIT(4)
 	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(5)
+	#define IO_PGTABLE_QUIRK_ARM_MALI_MIDGARD BIT(6)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;