Patchwork [RFC,v3,01/21] iommu: Introduce set_pasid_table API

login
register
mail settings
Submitter Auger Eric
Date Jan. 8, 2019, 10:26 a.m.
Message ID <20190108102633.17482-2-eric.auger@redhat.com>
Download mbox | patch
Permalink /patch/694541/
State New
Headers show

Comments

Auger Eric - Jan. 8, 2019, 10:26 a.m.
From: Jacob Pan <jacob.jun.pan@linux.intel.com>

In virtualization use case, when a guest is assigned
a PCI host device, protected by a virtual IOMMU on a guest,
the physical IOMMU must be programmed to be consistent with
the guest mappings. If the physical IOMMU supports two
translation stages it makes sense to program guest mappings
onto the first stage/level (ARM/VTD terminology) while to host
owns the stage/level 2.

In that case, it is mandated to trap on guest configuration
settings and pass those to the physical iommu driver.

This patch adds a new API to the iommu subsystem that allows
to set the pasid table information.

A generic iommu_pasid_table_config struct is introduced in
a new iommu.h uapi header. This is going to be used by the VFIO
user API. We foresee at least two specializations of this struct,
for PASID table passing and ARM SMMUv3.

Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

This patch generalizes the API introduced by Jacob & co-authors in
https://lwn.net/Articles/754331/

v2 -> v3:
- replace unbind/bind by set_pasid_table
- move table pointer and pasid bits in the generic part of the struct

v1 -> v2:
- restore the original pasid table name
- remove the struct device * parameter in the API
- reworked iommu_pasid_smmuv3
---
 drivers/iommu/iommu.c      | 10 ++++++++
 include/linux/iommu.h      | 14 +++++++++++
 include/uapi/linux/iommu.h | 50 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 74 insertions(+)
 create mode 100644 include/uapi/linux/iommu.h
Jean-Philippe Brucker - Jan. 11, 2019, 6:16 p.m.
On 08/01/2019 10:26, Eric Auger wrote:
> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> 
> In virtualization use case, when a guest is assigned
> a PCI host device, protected by a virtual IOMMU on a guest,
> the physical IOMMU must be programmed to be consistent with
> the guest mappings. If the physical IOMMU supports two
> translation stages it makes sense to program guest mappings
> onto the first stage/level (ARM/VTD terminology) while to host
> owns the stage/level 2.
> 
> In that case, it is mandated to trap on guest configuration
> settings and pass those to the physical iommu driver.
> 
> This patch adds a new API to the iommu subsystem that allows
> to set the pasid table information.
> 
> A generic iommu_pasid_table_config struct is introduced in
> a new iommu.h uapi header. This is going to be used by the VFIO
> user API. We foresee at least two specializations of this struct,
> for PASID table passing and ARM SMMUv3.

Last sentence is a bit confusing. With SMMUv3 it is also used for the
PASID table, even when it only has one entry and PASID is disabled.

> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> This patch generalizes the API introduced by Jacob & co-authors in
> https://lwn.net/Articles/754331/
> 
> v2 -> v3:
> - replace unbind/bind by set_pasid_table
> - move table pointer and pasid bits in the generic part of the struct
> 
> v1 -> v2:
> - restore the original pasid table name
> - remove the struct device * parameter in the API
> - reworked iommu_pasid_smmuv3
> ---
>  drivers/iommu/iommu.c      | 10 ++++++++
>  include/linux/iommu.h      | 14 +++++++++++
>  include/uapi/linux/iommu.h | 50 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 74 insertions(+)
>  create mode 100644 include/uapi/linux/iommu.h
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 3ed4db334341..0f2b7f1fc7c8 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1393,6 +1393,16 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>  
> +int iommu_set_pasid_table(struct iommu_domain *domain,
> +			  struct iommu_pasid_table_config *cfg)
> +{
> +	if (unlikely(!domain->ops->set_pasid_table))
> +		return -ENODEV;
> +
> +	return domain->ops->set_pasid_table(domain, cfg);
> +}
> +EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
>  				  struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index e90da6b6f3d1..1da2a2357ea4 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
>  #include <linux/errno.h>
>  #include <linux/err.h>
>  #include <linux/of.h>
> +#include <uapi/linux/iommu.h>
>  
>  #define IOMMU_READ	(1 << 0)
>  #define IOMMU_WRITE	(1 << 1)
> @@ -184,6 +185,7 @@ struct iommu_resv_region {
>   * @domain_window_disable: Disable a particular window for a domain
>   * @of_xlate: add OF master IDs to iommu grouping
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
> + * @set_pasid_table: set pasid table
>   */
>  struct iommu_ops {
>  	bool (*capable)(enum iommu_cap);
> @@ -226,6 +228,9 @@ struct iommu_ops {
>  	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>  	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
>  
> +	int (*set_pasid_table)(struct iommu_domain *domain,
> +			       struct iommu_pasid_table_config *cfg);
> +
>  	unsigned long pgsize_bitmap;
>  };
>  
> @@ -287,6 +292,8 @@ extern int iommu_attach_device(struct iommu_domain *domain,
>  			       struct device *dev);
>  extern void iommu_detach_device(struct iommu_domain *domain,
>  				struct device *dev);
> +extern int iommu_set_pasid_table(struct iommu_domain *domain,
> +				 struct iommu_pasid_table_config *cfg);
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -696,6 +703,13 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
>  	return NULL;
>  }
>  
> +static inline
> +int iommu_set_pasid_table(struct iommu_domain *domain,
> +			  struct iommu_pasid_table_config *cfg)
> +{
> +	return -ENODEV;
> +}
> +
>  #endif /* CONFIG_IOMMU_API */
>  
>  #ifdef CONFIG_IOMMU_DEBUGFS
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> new file mode 100644
> index 000000000000..7a7cf7a3de7c
> --- /dev/null
> +++ b/include/uapi/linux/iommu.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * IOMMU user API definitions
> + *
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.

I don't think we need both the boilerplate and the SPDX header

> + */
> +
> +#ifndef _UAPI_IOMMU_H
> +#define _UAPI_IOMMU_H
> +
> +#include <linux/types.h>
> +
> +/**
> + * SMMUv3 Stream Table Entry stage 1 related information
> + * @abort: shall the STE lead to abort
> + * @s1fmt: STE s1fmt field as set by the guest
> + * @s1dss: STE s1dss as set by the guest
> + * All field names match the smmu 3.0/3.1 spec (ARM IHI 0070A)

Not really the case for @abort. Could you clarify whether @abort is
valid in combination with @bypass?

> + */
> +struct iommu_pasid_smmuv3 {
> +	__u8 abort;
> +	__u8 s1fmt;
> +	__u8 s1dss;
> +};
> +
> +/**
> + * PASID table data used to bind guest PASID table to the host IOMMU
> + * Note PASID table corresponds to the Context Table on ARM SMMUv3.
> + *
> + * @version: API version to prepare for future extensions
> + * @format: format of the PASID table
> + *
> + */
> +struct iommu_pasid_table_config {
> +#define PASID_TABLE_CFG_VERSION_1 1
> +	__u32	version;
> +#define IOMMU_PASID_FORMAT_SMMUV3	(1 << 0)
> +	__u32	format;
> +	__u64	base_ptr;
> +	__u8	pasid_bits;
> +	__u8	bypass;

We need some padding, in case someone adds a new struct to the union
that requires 64-byte alignment

And 'bypass' might not be the right name if we're making it common,
maybe 'reset' would be clearer? Or we just need to explain that bypass
is the initial state of a nesting domain

Thanks,
Jean

> +	union {
> +		struct iommu_pasid_smmuv3 smmuv3;
> +	};
> +};
> +
> +#endif /* _UAPI_IOMMU_H */
>
Alex Williamson - Jan. 11, 2019, 6:43 p.m.
On Tue,  8 Jan 2019 11:26:13 +0100
Eric Auger <eric.auger@redhat.com> wrote:

> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> 
> In virtualization use case, when a guest is assigned
> a PCI host device, protected by a virtual IOMMU on a guest,
> the physical IOMMU must be programmed to be consistent with
> the guest mappings. If the physical IOMMU supports two
> translation stages it makes sense to program guest mappings
> onto the first stage/level (ARM/VTD terminology) while to host
> owns the stage/level 2.
> 
> In that case, it is mandated to trap on guest configuration
> settings and pass those to the physical iommu driver.
> 
> This patch adds a new API to the iommu subsystem that allows
> to set the pasid table information.
> 
> A generic iommu_pasid_table_config struct is introduced in
> a new iommu.h uapi header. This is going to be used by the VFIO
> user API. We foresee at least two specializations of this struct,
> for PASID table passing and ARM SMMUv3.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> This patch generalizes the API introduced by Jacob & co-authors in
> https://lwn.net/Articles/754331/
> 
> v2 -> v3:
> - replace unbind/bind by set_pasid_table
> - move table pointer and pasid bits in the generic part of the struct
> 
> v1 -> v2:
> - restore the original pasid table name
> - remove the struct device * parameter in the API
> - reworked iommu_pasid_smmuv3
> ---
>  drivers/iommu/iommu.c      | 10 ++++++++
>  include/linux/iommu.h      | 14 +++++++++++
>  include/uapi/linux/iommu.h | 50 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 74 insertions(+)
>  create mode 100644 include/uapi/linux/iommu.h
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 3ed4db334341..0f2b7f1fc7c8 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1393,6 +1393,16 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>  
> +int iommu_set_pasid_table(struct iommu_domain *domain,
> +			  struct iommu_pasid_table_config *cfg)
> +{
> +	if (unlikely(!domain->ops->set_pasid_table))
> +		return -ENODEV;
> +
> +	return domain->ops->set_pasid_table(domain, cfg);
> +}
> +EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
>  				  struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index e90da6b6f3d1..1da2a2357ea4 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
>  #include <linux/errno.h>
>  #include <linux/err.h>
>  #include <linux/of.h>
> +#include <uapi/linux/iommu.h>
>  
>  #define IOMMU_READ	(1 << 0)
>  #define IOMMU_WRITE	(1 << 1)
> @@ -184,6 +185,7 @@ struct iommu_resv_region {
>   * @domain_window_disable: Disable a particular window for a domain
>   * @of_xlate: add OF master IDs to iommu grouping
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
> + * @set_pasid_table: set pasid table
>   */
>  struct iommu_ops {
>  	bool (*capable)(enum iommu_cap);
> @@ -226,6 +228,9 @@ struct iommu_ops {
>  	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>  	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
>  
> +	int (*set_pasid_table)(struct iommu_domain *domain,
> +			       struct iommu_pasid_table_config *cfg);
> +
>  	unsigned long pgsize_bitmap;
>  };
>  
> @@ -287,6 +292,8 @@ extern int iommu_attach_device(struct iommu_domain *domain,
>  			       struct device *dev);
>  extern void iommu_detach_device(struct iommu_domain *domain,
>  				struct device *dev);
> +extern int iommu_set_pasid_table(struct iommu_domain *domain,
> +				 struct iommu_pasid_table_config *cfg);
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -696,6 +703,13 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
>  	return NULL;
>  }
>  
> +static inline
> +int iommu_set_pasid_table(struct iommu_domain *domain,
> +			  struct iommu_pasid_table_config *cfg)
> +{
> +	return -ENODEV;
> +}
> +
>  #endif /* CONFIG_IOMMU_API */
>  
>  #ifdef CONFIG_IOMMU_DEBUGFS
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> new file mode 100644
> index 000000000000..7a7cf7a3de7c
> --- /dev/null
> +++ b/include/uapi/linux/iommu.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * IOMMU user API definitions
> + *
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef _UAPI_IOMMU_H
> +#define _UAPI_IOMMU_H
> +
> +#include <linux/types.h>
> +
> +/**
> + * SMMUv3 Stream Table Entry stage 1 related information
> + * @abort: shall the STE lead to abort
> + * @s1fmt: STE s1fmt field as set by the guest
> + * @s1dss: STE s1dss as set by the guest
> + * All field names match the smmu 3.0/3.1 spec (ARM IHI 0070A)
> + */
> +struct iommu_pasid_smmuv3 {
> +	__u8 abort;
> +	__u8 s1fmt;
> +	__u8 s1dss;
> +};
> +

I can find STE.S1DSS and STE.S1FMT in the spec, but not STE.ABORT, is
this something to do with Config[2:0]?  Are we allowed to describe what
these fields are beyond their name and why they're necessary here vs
the other fields or do the spec restrictions preclude that?

> +/**
> + * PASID table data used to bind guest PASID table to the host IOMMU
> + * Note PASID table corresponds to the Context Table on ARM SMMUv3.
> + *
> + * @version: API version to prepare for future extensions
> + * @format: format of the PASID table
> + *
> + */
> +struct iommu_pasid_table_config {
> +#define PASID_TABLE_CFG_VERSION_1 1
> +	__u32	version;
> +#define IOMMU_PASID_FORMAT_SMMUV3	(1 << 0)
> +	__u32	format;
> +	__u64	base_ptr;
> +	__u8	pasid_bits;
> +	__u8	bypass;
> +	union {
> +		struct iommu_pasid_smmuv3 smmuv3;
> +	};
> +};

Structure is not naturally aligned or explicitly aligned for
interchange with userspace.  It might work for smmuv3 since the
structure is only composed of bytes, but looks troublesome in general.
Should each format type also contain a version?  Is format intended to
be a bit-field or a signature?  It seems we only need a signature, but
only having a single format defined, it looks like a bit-field, which
makes me worry what we do when we exhaust the bits.  The bypass field
should be better defined, is it 0/1?  zero/non-zero?  more selective?
Thanks,

Alex

> +
> +#endif /* _UAPI_IOMMU_H */
Auger Eric - Jan. 25, 2019, 8:39 a.m.
Hi Jean-Philippe,

On 1/11/19 7:16 PM, Jean-Philippe Brucker wrote:
> On 08/01/2019 10:26, Eric Auger wrote:
>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>
>> In virtualization use case, when a guest is assigned
>> a PCI host device, protected by a virtual IOMMU on a guest,
>> the physical IOMMU must be programmed to be consistent with
>> the guest mappings. If the physical IOMMU supports two
>> translation stages it makes sense to program guest mappings
>> onto the first stage/level (ARM/VTD terminology) while to host
>> owns the stage/level 2.
>>
>> In that case, it is mandated to trap on guest configuration
>> settings and pass those to the physical iommu driver.
>>
>> This patch adds a new API to the iommu subsystem that allows
>> to set the pasid table information.
>>
>> A generic iommu_pasid_table_config struct is introduced in
>> a new iommu.h uapi header. This is going to be used by the VFIO
>> user API. We foresee at least two specializations of this struct,
>> for PASID table passing and ARM SMMUv3.
> 
> Last sentence is a bit confusing. With SMMUv3 it is also used for the
> PASID table, even when it only has one entry and PASID is disabled.
OK removed
> 
>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
>> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> This patch generalizes the API introduced by Jacob & co-authors in
>> https://lwn.net/Articles/754331/
>>
>> v2 -> v3:
>> - replace unbind/bind by set_pasid_table
>> - move table pointer and pasid bits in the generic part of the struct
>>
>> v1 -> v2:
>> - restore the original pasid table name
>> - remove the struct device * parameter in the API
>> - reworked iommu_pasid_smmuv3
>> ---
>>  drivers/iommu/iommu.c      | 10 ++++++++
>>  include/linux/iommu.h      | 14 +++++++++++
>>  include/uapi/linux/iommu.h | 50 ++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 74 insertions(+)
>>  create mode 100644 include/uapi/linux/iommu.h
>>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 3ed4db334341..0f2b7f1fc7c8 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -1393,6 +1393,16 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
>>  }
>>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>>  
>> +int iommu_set_pasid_table(struct iommu_domain *domain,
>> +			  struct iommu_pasid_table_config *cfg)
>> +{
>> +	if (unlikely(!domain->ops->set_pasid_table))
>> +		return -ENODEV;
>> +
>> +	return domain->ops->set_pasid_table(domain, cfg);
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
>> +
>>  static void __iommu_detach_device(struct iommu_domain *domain,
>>  				  struct device *dev)
>>  {
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index e90da6b6f3d1..1da2a2357ea4 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -25,6 +25,7 @@
>>  #include <linux/errno.h>
>>  #include <linux/err.h>
>>  #include <linux/of.h>
>> +#include <uapi/linux/iommu.h>
>>  
>>  #define IOMMU_READ	(1 << 0)
>>  #define IOMMU_WRITE	(1 << 1)
>> @@ -184,6 +185,7 @@ struct iommu_resv_region {
>>   * @domain_window_disable: Disable a particular window for a domain
>>   * @of_xlate: add OF master IDs to iommu grouping
>>   * @pgsize_bitmap: bitmap of all possible supported page sizes
>> + * @set_pasid_table: set pasid table
>>   */
>>  struct iommu_ops {
>>  	bool (*capable)(enum iommu_cap);
>> @@ -226,6 +228,9 @@ struct iommu_ops {
>>  	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>>  	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
>>  
>> +	int (*set_pasid_table)(struct iommu_domain *domain,
>> +			       struct iommu_pasid_table_config *cfg);
>> +
>>  	unsigned long pgsize_bitmap;
>>  };
>>  
>> @@ -287,6 +292,8 @@ extern int iommu_attach_device(struct iommu_domain *domain,
>>  			       struct device *dev);
>>  extern void iommu_detach_device(struct iommu_domain *domain,
>>  				struct device *dev);
>> +extern int iommu_set_pasid_table(struct iommu_domain *domain,
>> +				 struct iommu_pasid_table_config *cfg);
>>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
>> @@ -696,6 +703,13 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
>>  	return NULL;
>>  }
>>  
>> +static inline
>> +int iommu_set_pasid_table(struct iommu_domain *domain,
>> +			  struct iommu_pasid_table_config *cfg)
>> +{
>> +	return -ENODEV;
>> +}
>> +
>>  #endif /* CONFIG_IOMMU_API */
>>  
>>  #ifdef CONFIG_IOMMU_DEBUGFS
>> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
>> new file mode 100644
>> index 000000000000..7a7cf7a3de7c
>> --- /dev/null
>> +++ b/include/uapi/linux/iommu.h
>> @@ -0,0 +1,50 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +/*
>> + * IOMMU user API definitions
>> + *
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
> 
> I don't think we need both the boilerplate and the SPDX header
OK I only kept the SPDX header.
> 
>> + */
>> +
>> +#ifndef _UAPI_IOMMU_H
>> +#define _UAPI_IOMMU_H
>> +
>> +#include <linux/types.h>
>> +
>> +/**
>> + * SMMUv3 Stream Table Entry stage 1 related information
>> + * @abort: shall the STE lead to abort
>> + * @s1fmt: STE s1fmt field as set by the guest
>> + * @s1dss: STE s1dss as set by the guest
>> + * All field names match the smmu 3.0/3.1 spec (ARM IHI 0070A)
> 
> Not really the case for @abort. Could you clarify whether @abort is
> valid in combination with @bypass?
> abort corresponds to !Config[2]. In that case the spec says "report
abort to device, no event recorded". S1 bypass corresponds to
Config=0b1x0. What about removing abort in the SMMUv3 specific part and
encode the stage state in the generic part. See below proposal ...
>> + */
>> +struct iommu_pasid_smmuv3 {
>> +	__u8 abort;
>> +	__u8 s1fmt;
>> +	__u8 s1dss;
>> +};
>> +
>> +/**
>> + * PASID table data used to bind guest PASID table to the host IOMMU
>> + * Note PASID table corresponds to the Context Table on ARM SMMUv3.
>> + *
>> + * @version: API version to prepare for future extensions
>> + * @format: format of the PASID table
>> + *
>> + */
>> +struct iommu_pasid_table_config {
>> +#define PASID_TABLE_CFG_VERSION_1 1
>> +	__u32	version;
>> +#define IOMMU_PASID_FORMAT_SMMUV3	(1 << 0)
>> +	__u32	format;
>> +	__u64	base_ptr;
>> +	__u8	pasid_bits;
>> +	__u8	bypass
#define IOMMU_PASID_STREAM_ABORT  (1 << 0)
#define IOMMU_PASID_STREAM_BYPASS (1 << 1)
#define IOMMU_PASID_STREAM_TRANSLATE (1 << 2)
__u8 config;
> 
> We need some padding, in case someone adds a new struct to the union
> that requires 64-byte alignment
OK
> 
> And 'bypass' might not be the right name if we're making it common,
> maybe 'reset' would be clearer? Or we just need to explain that bypass
> is the initial state of a nesting domain
I will add such comment. To me the "bypass" terminology sounds clearer
than "reset"
> 
> Thanks,
> Jean
> 
>> +	union {
>> +		struct iommu_pasid_smmuv3 smmuv3;
>> +	};
>> +};
>> +
>> +#endif /* _UAPI_IOMMU_H */
>>
>
Auger Eric - Jan. 25, 2019, 8:55 a.m.
Hi Jean-Philippe,

On 1/25/19 9:39 AM, Auger Eric wrote:
> Hi Jean-Philippe,
> 
> On 1/11/19 7:16 PM, Jean-Philippe Brucker wrote:
>> On 08/01/2019 10:26, Eric Auger wrote:
>>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>>
>>> In virtualization use case, when a guest is assigned
>>> a PCI host device, protected by a virtual IOMMU on a guest,
>>> the physical IOMMU must be programmed to be consistent with
>>> the guest mappings. If the physical IOMMU supports two
>>> translation stages it makes sense to program guest mappings
>>> onto the first stage/level (ARM/VTD terminology) while to host
>>> owns the stage/level 2.
>>>
>>> In that case, it is mandated to trap on guest configuration
>>> settings and pass those to the physical iommu driver.
>>>
>>> This patch adds a new API to the iommu subsystem that allows
>>> to set the pasid table information.
>>>
>>> A generic iommu_pasid_table_config struct is introduced in
>>> a new iommu.h uapi header. This is going to be used by the VFIO
>>> user API. We foresee at least two specializations of this struct,
>>> for PASID table passing and ARM SMMUv3.
>>
>> Last sentence is a bit confusing. With SMMUv3 it is also used for the
>> PASID table, even when it only has one entry and PASID is disabled.
> OK removed
>>
>>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>>> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
>>> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
>>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>
>>> ---
>>>
>>> This patch generalizes the API introduced by Jacob & co-authors in
>>> https://lwn.net/Articles/754331/
>>>
>>> v2 -> v3:
>>> - replace unbind/bind by set_pasid_table
>>> - move table pointer and pasid bits in the generic part of the struct
>>>
>>> v1 -> v2:
>>> - restore the original pasid table name
>>> - remove the struct device * parameter in the API
>>> - reworked iommu_pasid_smmuv3
>>> ---
>>>  drivers/iommu/iommu.c      | 10 ++++++++
>>>  include/linux/iommu.h      | 14 +++++++++++
>>>  include/uapi/linux/iommu.h | 50 ++++++++++++++++++++++++++++++++++++++
>>>  3 files changed, 74 insertions(+)
>>>  create mode 100644 include/uapi/linux/iommu.h
>>>
>>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>>> index 3ed4db334341..0f2b7f1fc7c8 100644
>>> --- a/drivers/iommu/iommu.c
>>> +++ b/drivers/iommu/iommu.c
>>> @@ -1393,6 +1393,16 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
>>>  }
>>>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>>>  
>>> +int iommu_set_pasid_table(struct iommu_domain *domain,
>>> +			  struct iommu_pasid_table_config *cfg)
>>> +{
>>> +	if (unlikely(!domain->ops->set_pasid_table))
>>> +		return -ENODEV;
>>> +
>>> +	return domain->ops->set_pasid_table(domain, cfg);
>>> +}
>>> +EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
>>> +
>>>  static void __iommu_detach_device(struct iommu_domain *domain,
>>>  				  struct device *dev)
>>>  {
>>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>>> index e90da6b6f3d1..1da2a2357ea4 100644
>>> --- a/include/linux/iommu.h
>>> +++ b/include/linux/iommu.h
>>> @@ -25,6 +25,7 @@
>>>  #include <linux/errno.h>
>>>  #include <linux/err.h>
>>>  #include <linux/of.h>
>>> +#include <uapi/linux/iommu.h>
>>>  
>>>  #define IOMMU_READ	(1 << 0)
>>>  #define IOMMU_WRITE	(1 << 1)
>>> @@ -184,6 +185,7 @@ struct iommu_resv_region {
>>>   * @domain_window_disable: Disable a particular window for a domain
>>>   * @of_xlate: add OF master IDs to iommu grouping
>>>   * @pgsize_bitmap: bitmap of all possible supported page sizes
>>> + * @set_pasid_table: set pasid table
>>>   */
>>>  struct iommu_ops {
>>>  	bool (*capable)(enum iommu_cap);
>>> @@ -226,6 +228,9 @@ struct iommu_ops {
>>>  	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>>>  	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
>>>  
>>> +	int (*set_pasid_table)(struct iommu_domain *domain,
>>> +			       struct iommu_pasid_table_config *cfg);
>>> +
>>>  	unsigned long pgsize_bitmap;
>>>  };
>>>  
>>> @@ -287,6 +292,8 @@ extern int iommu_attach_device(struct iommu_domain *domain,
>>>  			       struct device *dev);
>>>  extern void iommu_detach_device(struct iommu_domain *domain,
>>>  				struct device *dev);
>>> +extern int iommu_set_pasid_table(struct iommu_domain *domain,
>>> +				 struct iommu_pasid_table_config *cfg);
>>>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>>>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>>>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
>>> @@ -696,6 +703,13 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
>>>  	return NULL;
>>>  }
>>>  
>>> +static inline
>>> +int iommu_set_pasid_table(struct iommu_domain *domain,
>>> +			  struct iommu_pasid_table_config *cfg)
>>> +{
>>> +	return -ENODEV;
>>> +}
>>> +
>>>  #endif /* CONFIG_IOMMU_API */
>>>  
>>>  #ifdef CONFIG_IOMMU_DEBUGFS
>>> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
>>> new file mode 100644
>>> index 000000000000..7a7cf7a3de7c
>>> --- /dev/null
>>> +++ b/include/uapi/linux/iommu.h
>>> @@ -0,0 +1,50 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>>> +/*
>>> + * IOMMU user API definitions
>>> + *
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License version 2 as
>>> + * published by the Free Software Foundation.
>>
>> I don't think we need both the boilerplate and the SPDX header
> OK I only kept the SPDX header.
>>
>>> + */
>>> +
>>> +#ifndef _UAPI_IOMMU_H
>>> +#define _UAPI_IOMMU_H
>>> +
>>> +#include <linux/types.h>
>>> +
>>> +/**
>>> + * SMMUv3 Stream Table Entry stage 1 related information
>>> + * @abort: shall the STE lead to abort
>>> + * @s1fmt: STE s1fmt field as set by the guest
>>> + * @s1dss: STE s1dss as set by the guest
>>> + * All field names match the smmu 3.0/3.1 spec (ARM IHI 0070A)
>>
>> Not really the case for @abort. Could you clarify whether @abort is
>> valid in combination with @bypass?
>> abort corresponds to !Config[2]. In that case the spec says "report
> abort to device, no event recorded". S1 bypass corresponds to
> Config=0b1x0. What about removing abort in the SMMUv3 specific part and
> encode the stage state in the generic part. See below proposal ...
>>> + */
>>> +struct iommu_pasid_smmuv3 {
>>> +	__u8 abort;
>>> +	__u8 s1fmt;
>>> +	__u8 s1dss;
>>> +};
>>> +
>>> +/**
>>> + * PASID table data used to bind guest PASID table to the host IOMMU
>>> + * Note PASID table corresponds to the Context Table on ARM SMMUv3.
>>> + *
>>> + * @version: API version to prepare for future extensions
>>> + * @format: format of the PASID table
>>> + *
>>> + */
>>> +struct iommu_pasid_table_config {
>>> +#define PASID_TABLE_CFG_VERSION_1 1
>>> +	__u32	version;
>>> +#define IOMMU_PASID_FORMAT_SMMUV3	(1 << 0)
>>> +	__u32	format;
>>> +	__u64	base_ptr;
>>> +	__u8	pasid_bits;
>>> +	__u8	bypass
> #define IOMMU_PASID_STREAM_ABORT  (1 << 0)
> #define IOMMU_PASID_STREAM_BYPASS (1 << 1)
> #define IOMMU_PASID_STREAM_TRANSLATE (1 << 2)
> __u8 config;
Sorry for the confusion, we don't want a bitfield here as those values
are exclusive.

What about:
struct iommu_pasid_table_config {
#define PASID_TABLE_CFG_VERSION_1 1
        __u32   version;
#define IOMMU_PASID_FORMAT_SMMUV3       (1 << 0)
        __u32   format;
        __u64   base_ptr;
        __u8    pasid_bits;
#define IOMMU_PASID_CONFIG_BYPASS       1
#define IOMMU_PASID_CONFIG_ABORT        2
#define IOMMU_PASID_CONFIG_TRANSLATE    3
        __u8    config;
        __u8    padding[6];
        union {
                struct iommu_pasid_smmuv3 smmuv3;
        };
};


Thanks

Eric
>>
>> We need some padding, in case someone adds a new struct to the union
>> that requires 64-byte alignment
> OK
>>
>> And 'bypass' might not be the right name if we're making it common,
>> maybe 'reset' would be clearer? Or we just need to explain that bypass
>> is the initial state of a nesting domain
> I will add such comment. To me the "bypass" terminology sounds clearer
> than "reset"
>>
>> Thanks,
>> Jean
>>
>>> +	union {
>>> +		struct iommu_pasid_smmuv3 smmuv3;
>>> +	};
>>> +};
>>> +
>>> +#endif /* _UAPI_IOMMU_H */
>>>
>>
Auger Eric - Jan. 25, 2019, 9:20 a.m.
Hi Alex,

On 1/11/19 7:43 PM, Alex Williamson wrote:
> On Tue,  8 Jan 2019 11:26:13 +0100
> Eric Auger <eric.auger@redhat.com> wrote:
> 
>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>
>> In virtualization use case, when a guest is assigned
>> a PCI host device, protected by a virtual IOMMU on a guest,
>> the physical IOMMU must be programmed to be consistent with
>> the guest mappings. If the physical IOMMU supports two
>> translation stages it makes sense to program guest mappings
>> onto the first stage/level (ARM/VTD terminology) while to host
>> owns the stage/level 2.
>>
>> In that case, it is mandated to trap on guest configuration
>> settings and pass those to the physical iommu driver.
>>
>> This patch adds a new API to the iommu subsystem that allows
>> to set the pasid table information.
>>
>> A generic iommu_pasid_table_config struct is introduced in
>> a new iommu.h uapi header. This is going to be used by the VFIO
>> user API. We foresee at least two specializations of this struct,
>> for PASID table passing and ARM SMMUv3.
>>
>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
>> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> This patch generalizes the API introduced by Jacob & co-authors in
>> https://lwn.net/Articles/754331/
>>
>> v2 -> v3:
>> - replace unbind/bind by set_pasid_table
>> - move table pointer and pasid bits in the generic part of the struct
>>
>> v1 -> v2:
>> - restore the original pasid table name
>> - remove the struct device * parameter in the API
>> - reworked iommu_pasid_smmuv3
>> ---
>>  drivers/iommu/iommu.c      | 10 ++++++++
>>  include/linux/iommu.h      | 14 +++++++++++
>>  include/uapi/linux/iommu.h | 50 ++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 74 insertions(+)
>>  create mode 100644 include/uapi/linux/iommu.h
>>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 3ed4db334341..0f2b7f1fc7c8 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -1393,6 +1393,16 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
>>  }
>>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>>  
>> +int iommu_set_pasid_table(struct iommu_domain *domain,
>> +			  struct iommu_pasid_table_config *cfg)
>> +{
>> +	if (unlikely(!domain->ops->set_pasid_table))
>> +		return -ENODEV;
>> +
>> +	return domain->ops->set_pasid_table(domain, cfg);
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
>> +
>>  static void __iommu_detach_device(struct iommu_domain *domain,
>>  				  struct device *dev)
>>  {
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index e90da6b6f3d1..1da2a2357ea4 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -25,6 +25,7 @@
>>  #include <linux/errno.h>
>>  #include <linux/err.h>
>>  #include <linux/of.h>
>> +#include <uapi/linux/iommu.h>
>>  
>>  #define IOMMU_READ	(1 << 0)
>>  #define IOMMU_WRITE	(1 << 1)
>> @@ -184,6 +185,7 @@ struct iommu_resv_region {
>>   * @domain_window_disable: Disable a particular window for a domain
>>   * @of_xlate: add OF master IDs to iommu grouping
>>   * @pgsize_bitmap: bitmap of all possible supported page sizes
>> + * @set_pasid_table: set pasid table
>>   */
>>  struct iommu_ops {
>>  	bool (*capable)(enum iommu_cap);
>> @@ -226,6 +228,9 @@ struct iommu_ops {
>>  	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>>  	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
>>  
>> +	int (*set_pasid_table)(struct iommu_domain *domain,
>> +			       struct iommu_pasid_table_config *cfg);
>> +
>>  	unsigned long pgsize_bitmap;
>>  };
>>  
>> @@ -287,6 +292,8 @@ extern int iommu_attach_device(struct iommu_domain *domain,
>>  			       struct device *dev);
>>  extern void iommu_detach_device(struct iommu_domain *domain,
>>  				struct device *dev);
>> +extern int iommu_set_pasid_table(struct iommu_domain *domain,
>> +				 struct iommu_pasid_table_config *cfg);
>>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
>> @@ -696,6 +703,13 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
>>  	return NULL;
>>  }
>>  
>> +static inline
>> +int iommu_set_pasid_table(struct iommu_domain *domain,
>> +			  struct iommu_pasid_table_config *cfg)
>> +{
>> +	return -ENODEV;
>> +}
>> +
>>  #endif /* CONFIG_IOMMU_API */
>>  
>>  #ifdef CONFIG_IOMMU_DEBUGFS
>> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
>> new file mode 100644
>> index 000000000000..7a7cf7a3de7c
>> --- /dev/null
>> +++ b/include/uapi/linux/iommu.h
>> @@ -0,0 +1,50 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +/*
>> + * IOMMU user API definitions
>> + *
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#ifndef _UAPI_IOMMU_H
>> +#define _UAPI_IOMMU_H
>> +
>> +#include <linux/types.h>
>> +
>> +/**
>> + * SMMUv3 Stream Table Entry stage 1 related information
>> + * @abort: shall the STE lead to abort
>> + * @s1fmt: STE s1fmt field as set by the guest
>> + * @s1dss: STE s1dss as set by the guest
>> + * All field names match the smmu 3.0/3.1 spec (ARM IHI 0070A)
>> + */
>> +struct iommu_pasid_smmuv3 {
>> +	__u8 abort;
>> +	__u8 s1fmt;
>> +	__u8 s1dss;
>> +};
>> +
> 
> I can find STE.S1DSS and STE.S1FMT in the spec, but not STE.ABORT, is
> this something to do with Config[2:0]?  Are we allowed to describe what
> these fields are beyond their name and why they're necessary here vs
> the other fields or do the spec restrictions preclude that?
Yes you're right abort matches !Config[2]

what about:

/**
 * SMMUv3 Stream Table Entry stage 1 related information
 * The PASID table is referred to as the context descriptor (CD) table.
 *
 * @s1fmt: STE s1fmt (format of the CD table: single CD, linear table
   or 2-level table)
 * @s1dss: STE s1dss (specifies the behavior when pasid_bits != 0
   an no pasid is passed along with the incoming transaction)
 * Please refer to the smmu 3.x spec (ARM IHI 0070A) for full details
 */
struct iommu_pasid_smmuv3 {
#define PASID_TABLE_SMMUV3_CFG_VERSION_1 1
        __u32   version;
        __u8 s1fmt;
        __u8 s1dss;
        __u8 padding[2];
};


> 
>> +/**
>> + * PASID table data used to bind guest PASID table to the host IOMMU
>> + * Note PASID table corresponds to the Context Table on ARM SMMUv3.
>> + *
>> + * @version: API version to prepare for future extensions
>> + * @format: format of the PASID table
>> + *
>> + */
>> +struct iommu_pasid_table_config {
>> +#define PASID_TABLE_CFG_VERSION_1 1
>> +	__u32	version;
>> +#define IOMMU_PASID_FORMAT_SMMUV3	(1 << 0)
>> +	__u32	format;
>> +	__u64	base_ptr;
>> +	__u8	pasid_bits;
>> +	__u8	bypass;
>> +	union {
>> +		struct iommu_pasid_smmuv3 smmuv3;
>> +	};
>> +};
> 
> Structure is not naturally aligned or explicitly aligned for
> interchange with userspace.  It might work for smmuv3 since the
> structure is only composed of bytes, but looks troublesome in general.
> Should each format type also contain a version?  Is format intended to
> be a bit-field or a signature?  It seems we only need a signature, but
> only having a single format defined, it looks like a bit-field, which
> makes me worry what we do when we exhaust the bits.

I think a signature is what we need.

  The bypass field
> should be better defined, is it 0/1?  zero/non-zero?  more selective?

I suggest to replace by a signature config field (bypass, abort, translate)

Thanks

Eric
> Thanks,
> 
> Alex
> 
>> +
>> +#endif /* _UAPI_IOMMU_H */
>
Jean-Philippe Brucker - Jan. 25, 2019, 10:33 a.m.
On 25/01/2019 08:55, Auger Eric wrote:
> Hi Jean-Philippe,
> 
> On 1/25/19 9:39 AM, Auger Eric wrote:
>> Hi Jean-Philippe,
>>
>> On 1/11/19 7:16 PM, Jean-Philippe Brucker wrote:
>>> On 08/01/2019 10:26, Eric Auger wrote:
>>>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>>>
>>>> In virtualization use case, when a guest is assigned
>>>> a PCI host device, protected by a virtual IOMMU on a guest,
>>>> the physical IOMMU must be programmed to be consistent with
>>>> the guest mappings. If the physical IOMMU supports two
>>>> translation stages it makes sense to program guest mappings
>>>> onto the first stage/level (ARM/VTD terminology) while to host
>>>> owns the stage/level 2.
>>>>
>>>> In that case, it is mandated to trap on guest configuration
>>>> settings and pass those to the physical iommu driver.
>>>>
>>>> This patch adds a new API to the iommu subsystem that allows
>>>> to set the pasid table information.
>>>>
>>>> A generic iommu_pasid_table_config struct is introduced in
>>>> a new iommu.h uapi header. This is going to be used by the VFIO
>>>> user API. We foresee at least two specializations of this struct,
>>>> for PASID table passing and ARM SMMUv3.
>>>
>>> Last sentence is a bit confusing. With SMMUv3 it is also used for the
>>> PASID table, even when it only has one entry and PASID is disabled.
>> OK removed
>>>
>>>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>>>> Signed-off-by: Liu, Yi L <yi.l.liu@linux.intel.com>
>>>> Signed-off-by: Ashok Raj <ashok.raj@intel.com>
>>>> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>
>>>> ---
>>>>
>>>> This patch generalizes the API introduced by Jacob & co-authors in
>>>> https://lwn.net/Articles/754331/
>>>>
>>>> v2 -> v3:
>>>> - replace unbind/bind by set_pasid_table
>>>> - move table pointer and pasid bits in the generic part of the struct
>>>>
>>>> v1 -> v2:
>>>> - restore the original pasid table name
>>>> - remove the struct device * parameter in the API
>>>> - reworked iommu_pasid_smmuv3
>>>> ---
>>>>  drivers/iommu/iommu.c      | 10 ++++++++
>>>>  include/linux/iommu.h      | 14 +++++++++++
>>>>  include/uapi/linux/iommu.h | 50 ++++++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 74 insertions(+)
>>>>  create mode 100644 include/uapi/linux/iommu.h
>>>>
>>>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>>>> index 3ed4db334341..0f2b7f1fc7c8 100644
>>>> --- a/drivers/iommu/iommu.c
>>>> +++ b/drivers/iommu/iommu.c
>>>> @@ -1393,6 +1393,16 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>>>>  
>>>> +int iommu_set_pasid_table(struct iommu_domain *domain,
>>>> +			  struct iommu_pasid_table_config *cfg)
>>>> +{
>>>> +	if (unlikely(!domain->ops->set_pasid_table))
>>>> +		return -ENODEV;
>>>> +
>>>> +	return domain->ops->set_pasid_table(domain, cfg);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
>>>> +
>>>>  static void __iommu_detach_device(struct iommu_domain *domain,
>>>>  				  struct device *dev)
>>>>  {
>>>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>>>> index e90da6b6f3d1..1da2a2357ea4 100644
>>>> --- a/include/linux/iommu.h
>>>> +++ b/include/linux/iommu.h
>>>> @@ -25,6 +25,7 @@
>>>>  #include <linux/errno.h>
>>>>  #include <linux/err.h>
>>>>  #include <linux/of.h>
>>>> +#include <uapi/linux/iommu.h>
>>>>  
>>>>  #define IOMMU_READ	(1 << 0)
>>>>  #define IOMMU_WRITE	(1 << 1)
>>>> @@ -184,6 +185,7 @@ struct iommu_resv_region {
>>>>   * @domain_window_disable: Disable a particular window for a domain
>>>>   * @of_xlate: add OF master IDs to iommu grouping
>>>>   * @pgsize_bitmap: bitmap of all possible supported page sizes
>>>> + * @set_pasid_table: set pasid table
>>>>   */
>>>>  struct iommu_ops {
>>>>  	bool (*capable)(enum iommu_cap);
>>>> @@ -226,6 +228,9 @@ struct iommu_ops {
>>>>  	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>>>>  	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
>>>>  
>>>> +	int (*set_pasid_table)(struct iommu_domain *domain,
>>>> +			       struct iommu_pasid_table_config *cfg);
>>>> +
>>>>  	unsigned long pgsize_bitmap;
>>>>  };
>>>>  
>>>> @@ -287,6 +292,8 @@ extern int iommu_attach_device(struct iommu_domain *domain,
>>>>  			       struct device *dev);
>>>>  extern void iommu_detach_device(struct iommu_domain *domain,
>>>>  				struct device *dev);
>>>> +extern int iommu_set_pasid_table(struct iommu_domain *domain,
>>>> +				 struct iommu_pasid_table_config *cfg);
>>>>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>>>>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>>>>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
>>>> @@ -696,6 +703,13 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
>>>>  	return NULL;
>>>>  }
>>>>  
>>>> +static inline
>>>> +int iommu_set_pasid_table(struct iommu_domain *domain,
>>>> +			  struct iommu_pasid_table_config *cfg)
>>>> +{
>>>> +	return -ENODEV;
>>>> +}
>>>> +
>>>>  #endif /* CONFIG_IOMMU_API */
>>>>  
>>>>  #ifdef CONFIG_IOMMU_DEBUGFS
>>>> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
>>>> new file mode 100644
>>>> index 000000000000..7a7cf7a3de7c
>>>> --- /dev/null
>>>> +++ b/include/uapi/linux/iommu.h
>>>> @@ -0,0 +1,50 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>>>> +/*
>>>> + * IOMMU user API definitions
>>>> + *
>>>> + *
>>>> + * This program is free software; you can redistribute it and/or modify
>>>> + * it under the terms of the GNU General Public License version 2 as
>>>> + * published by the Free Software Foundation.
>>>
>>> I don't think we need both the boilerplate and the SPDX header
>> OK I only kept the SPDX header.
>>>
>>>> + */
>>>> +
>>>> +#ifndef _UAPI_IOMMU_H
>>>> +#define _UAPI_IOMMU_H
>>>> +
>>>> +#include <linux/types.h>
>>>> +
>>>> +/**
>>>> + * SMMUv3 Stream Table Entry stage 1 related information
>>>> + * @abort: shall the STE lead to abort
>>>> + * @s1fmt: STE s1fmt field as set by the guest
>>>> + * @s1dss: STE s1dss as set by the guest
>>>> + * All field names match the smmu 3.0/3.1 spec (ARM IHI 0070A)
>>>
>>> Not really the case for @abort. Could you clarify whether @abort is
>>> valid in combination with @bypass?
>>> abort corresponds to !Config[2]. In that case the spec says "report
>> abort to device, no event recorded". S1 bypass corresponds to
>> Config=0b1x0. What about removing abort in the SMMUv3 specific part and
>> encode the stage state in the generic part. See below proposal ...
>>>> + */
>>>> +struct iommu_pasid_smmuv3 {
>>>> +	__u8 abort;
>>>> +	__u8 s1fmt;
>>>> +	__u8 s1dss;
>>>> +};
>>>> +
>>>> +/**
>>>> + * PASID table data used to bind guest PASID table to the host IOMMU
>>>> + * Note PASID table corresponds to the Context Table on ARM SMMUv3.
>>>> + *
>>>> + * @version: API version to prepare for future extensions
>>>> + * @format: format of the PASID table
>>>> + *
>>>> + */
>>>> +struct iommu_pasid_table_config {
>>>> +#define PASID_TABLE_CFG_VERSION_1 1
>>>> +	__u32	version;
>>>> +#define IOMMU_PASID_FORMAT_SMMUV3	(1 << 0)
>>>> +	__u32	format;
>>>> +	__u64	base_ptr;
>>>> +	__u8	pasid_bits;
>>>> +	__u8	bypass
>> #define IOMMU_PASID_STREAM_ABORT  (1 << 0)
>> #define IOMMU_PASID_STREAM_BYPASS (1 << 1)
>> #define IOMMU_PASID_STREAM_TRANSLATE (1 << 2)
>> __u8 config;
> Sorry for the confusion, we don't want a bitfield here as those values
> are exclusive.
> 
> What about:
> struct iommu_pasid_table_config {
> #define PASID_TABLE_CFG_VERSION_1 1
>         __u32   version;
> #define IOMMU_PASID_FORMAT_SMMUV3       (1 << 0)
>         __u32   format;
>         __u64   base_ptr;
>         __u8    pasid_bits;
> #define IOMMU_PASID_CONFIG_BYPASS       1
> #define IOMMU_PASID_CONFIG_ABORT        2
> #define IOMMU_PASID_CONFIG_TRANSLATE    3

Yes, this makes more sense :)

>         __u8    config;
>         __u8    padding[6];
>         union {
>                 struct iommu_pasid_smmuv3 smmuv3;
>         };
> };

Thanks,
Jean

> 
> Thanks
> 
> Eric
>>>
>>> We need some padding, in case someone adds a new struct to the union
>>> that requires 64-byte alignment
>> OK
>>>
>>> And 'bypass' might not be the right name if we're making it common,
>>> maybe 'reset' would be clearer? Or we just need to explain that bypass
>>> is the initial state of a nesting domain
>> I will add such comment. To me the "bypass" terminology sounds clearer
>> than "reset"
>>>
>>> Thanks,
>>> Jean
>>>
>>>> +	union {
>>>> +		struct iommu_pasid_smmuv3 smmuv3;
>>>> +	};
>>>> +};
>>>> +
>>>> +#endif /* _UAPI_IOMMU_H */
>>>>
>>>

Patch

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3ed4db334341..0f2b7f1fc7c8 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1393,6 +1393,16 @@  int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 }
 EXPORT_SYMBOL_GPL(iommu_attach_device);
 
+int iommu_set_pasid_table(struct iommu_domain *domain,
+			  struct iommu_pasid_table_config *cfg)
+{
+	if (unlikely(!domain->ops->set_pasid_table))
+		return -ENODEV;
+
+	return domain->ops->set_pasid_table(domain, cfg);
+}
+EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
+
 static void __iommu_detach_device(struct iommu_domain *domain,
 				  struct device *dev)
 {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e90da6b6f3d1..1da2a2357ea4 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -25,6 +25,7 @@ 
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/of.h>
+#include <uapi/linux/iommu.h>
 
 #define IOMMU_READ	(1 << 0)
 #define IOMMU_WRITE	(1 << 1)
@@ -184,6 +185,7 @@  struct iommu_resv_region {
  * @domain_window_disable: Disable a particular window for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of all possible supported page sizes
+ * @set_pasid_table: set pasid table
  */
 struct iommu_ops {
 	bool (*capable)(enum iommu_cap);
@@ -226,6 +228,9 @@  struct iommu_ops {
 	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
 	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
 
+	int (*set_pasid_table)(struct iommu_domain *domain,
+			       struct iommu_pasid_table_config *cfg);
+
 	unsigned long pgsize_bitmap;
 };
 
@@ -287,6 +292,8 @@  extern int iommu_attach_device(struct iommu_domain *domain,
 			       struct device *dev);
 extern void iommu_detach_device(struct iommu_domain *domain,
 				struct device *dev);
+extern int iommu_set_pasid_table(struct iommu_domain *domain,
+				 struct iommu_pasid_table_config *cfg);
 extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
 extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
@@ -696,6 +703,13 @@  const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 	return NULL;
 }
 
+static inline
+int iommu_set_pasid_table(struct iommu_domain *domain,
+			  struct iommu_pasid_table_config *cfg)
+{
+	return -ENODEV;
+}
+
 #endif /* CONFIG_IOMMU_API */
 
 #ifdef CONFIG_IOMMU_DEBUGFS
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
new file mode 100644
index 000000000000..7a7cf7a3de7c
--- /dev/null
+++ b/include/uapi/linux/iommu.h
@@ -0,0 +1,50 @@ 
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * IOMMU user API definitions
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _UAPI_IOMMU_H
+#define _UAPI_IOMMU_H
+
+#include <linux/types.h>
+
+/**
+ * SMMUv3 Stream Table Entry stage 1 related information
+ * @abort: shall the STE lead to abort
+ * @s1fmt: STE s1fmt field as set by the guest
+ * @s1dss: STE s1dss as set by the guest
+ * All field names match the smmu 3.0/3.1 spec (ARM IHI 0070A)
+ */
+struct iommu_pasid_smmuv3 {
+	__u8 abort;
+	__u8 s1fmt;
+	__u8 s1dss;
+};
+
+/**
+ * PASID table data used to bind guest PASID table to the host IOMMU
+ * Note PASID table corresponds to the Context Table on ARM SMMUv3.
+ *
+ * @version: API version to prepare for future extensions
+ * @format: format of the PASID table
+ *
+ */
+struct iommu_pasid_table_config {
+#define PASID_TABLE_CFG_VERSION_1 1
+	__u32	version;
+#define IOMMU_PASID_FORMAT_SMMUV3	(1 << 0)
+	__u32	format;
+	__u64	base_ptr;
+	__u8	pasid_bits;
+	__u8	bypass;
+	union {
+		struct iommu_pasid_smmuv3 smmuv3;
+	};
+};
+
+#endif /* _UAPI_IOMMU_H */