Patchwork [4/4] iommu/vt-d: Remove lazy allocation of domains

login
register
mail settings
Submitter Benjamin Serebrin via iommu
Date March 4, 2019, 3:47 p.m.
Message ID <7F6B5F6A-EC76-4A9F-8EB6-AEAB9994D91A@arista.com>
Download mbox | patch
Permalink /patch/740771/
State New
Headers show

Comments

Benjamin Serebrin via iommu - March 4, 2019, 3:47 p.m.
The generic IOMMU code will allocate and attach a dma ops domain to each
device that comes online, replacing any lazy allocated domain. Removes
RMRR application at iommu init time as we won't have a domain attached
to any device. iommu.c will do this after attaching a device using
create_direct_mappings.

Signed-off-by: James Sewart <jamessewart@arista.com>
---
 drivers/iommu/intel-iommu.c | 202 ++----------------------------------
 1 file changed, 8 insertions(+), 194 deletions(-)
Lu Baolu - March 5, 2019, 6:59 a.m.
Hi,

It's hard for me to understand why do we remove the rmrr related
code in this patch.

And, now we have two places to hold a domain for a device: group and
dev->info. We can consider to optimize the use of per device iommu
arch data. This should be later work anyway.

More comments inline.

On 3/4/19 11:47 PM, James Sewart wrote:
> The generic IOMMU code will allocate and attach a dma ops domain to each
> device that comes online, replacing any lazy allocated domain. Removes
> RMRR application at iommu init time as we won't have a domain attached
> to any device. iommu.c will do this after attaching a device using
> create_direct_mappings.
> 
> Signed-off-by: James Sewart <jamessewart@arista.com>
> ---
>   drivers/iommu/intel-iommu.c | 202 ++----------------------------------
>   1 file changed, 8 insertions(+), 194 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 71cd6bbfec05..282257e2628d 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -2595,118 +2595,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
>   	return domain;
>   }
>   
> -static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
> -{
> -	*(u16 *)opaque = alias;
> -	return 0;
> -}
> -
> -static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
> -{
> -	struct device_domain_info *info = NULL;
> -	struct dmar_domain *domain = NULL;
> -	struct intel_iommu *iommu;
> -	u16 dma_alias;
> -	unsigned long flags;
> -	u8 bus, devfn;
> -
> -	iommu = device_to_iommu(dev, &bus, &devfn);
> -	if (!iommu)
> -		return NULL;
> -
> -	if (dev_is_pci(dev)) {
> -		struct pci_dev *pdev = to_pci_dev(dev);
> -
> -		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
> -
> -		spin_lock_irqsave(&device_domain_lock, flags);
> -		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
> -						      PCI_BUS_NUM(dma_alias),
> -						      dma_alias & 0xff);
> -		if (info) {
> -			iommu = info->iommu;
> -			domain = info->domain;
> -		}
> -		spin_unlock_irqrestore(&device_domain_lock, flags);
> -
> -		/* DMA alias already has a domain, use it */
> -		if (info)
> -			goto out;
> -	}
> -
> -	/* Allocate and initialize new domain for the device */
> -	domain = alloc_domain(0);
> -	if (!domain)
> -		return NULL;
> -	if (domain_init(domain, iommu, gaw)) {
> -		domain_exit(domain);
> -		return NULL;
> -	}
> -
> -out:
> -
> -	return domain;
> -}
> -
> -static struct dmar_domain *set_domain_for_dev(struct device *dev,
> -					      struct dmar_domain *domain)
> -{
> -	struct intel_iommu *iommu;
> -	struct dmar_domain *tmp;
> -	u16 req_id, dma_alias;
> -	u8 bus, devfn;
> -
> -	iommu = device_to_iommu(dev, &bus, &devfn);
> -	if (!iommu)
> -		return NULL;
> -
> -	req_id = ((u16)bus << 8) | devfn;
> -
> -	if (dev_is_pci(dev)) {
> -		struct pci_dev *pdev = to_pci_dev(dev);
> -
> -		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
> -
> -		/* register PCI DMA alias device */
> -		if (req_id != dma_alias) {
> -			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
> -					dma_alias & 0xff, NULL, domain);
> -
> -			if (!tmp || tmp != domain)
> -				return tmp;
> -		}
> -	}
> -
> -	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
> -	if (!tmp || tmp != domain)
> -		return tmp;
> -
> -	return domain;
> -}
> -
> -static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
> -{
> -	struct dmar_domain *domain, *tmp;
> -
> -	domain = find_domain(dev);
> -	if (domain)
> -		goto out;
> -
> -	domain = find_or_alloc_domain(dev, gaw);
> -	if (!domain)
> -		goto out;
> -
> -	tmp = set_domain_for_dev(dev, domain);
> -	if (!tmp || domain != tmp) {
> -		domain_exit(domain);
> -		domain = tmp;
> -	}
> -
> -out:
> -
> -	return domain;
> -}
> -
>   static int iommu_domain_identity_map(struct dmar_domain *domain,
>   				     unsigned long long start,
>   				     unsigned long long end)
> @@ -2779,7 +2667,7 @@ static int iommu_prepare_identity_map(struct device *dev,
>   	struct dmar_domain *domain;
>   	int ret;
>   
> -	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
> +	domain = find_domain(dev);
>   	if (!domain)
>   		return -ENOMEM;
>   
> @@ -3301,11 +3189,9 @@ static int copy_translation_tables(struct intel_iommu *iommu)
>   static int __init init_dmars(void)
>   {
>   	struct dmar_drhd_unit *drhd;
> -	struct dmar_rmrr_unit *rmrr;
>   	bool copied_tables = false;
> -	struct device *dev;
>   	struct intel_iommu *iommu;
> -	int i, ret;
> +	int ret;
>   
>   	/*
>   	 * for each drhd
> @@ -3466,32 +3352,6 @@ static int __init init_dmars(void)
>   			goto free_iommu;
>   		}
>   	}
> -	/*
> -	 * For each rmrr
> -	 *   for each dev attached to rmrr
> -	 *   do
> -	 *     locate drhd for dev, alloc domain for dev
> -	 *     allocate free domain
> -	 *     allocate page table entries for rmrr
> -	 *     if context not allocated for bus
> -	 *           allocate and init context
> -	 *           set present in root table for this bus
> -	 *     init context with domain, translation etc
> -	 *    endfor
> -	 * endfor
> -	 */
> -	pr_info("Setting RMRR:\n");
> -	for_each_rmrr_units(rmrr) {
> -		/* some BIOS lists non-exist devices in DMAR table. */
> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
> -					  i, dev) {
> -			ret = iommu_prepare_rmrr_dev(rmrr, dev);
> -			if (ret)
> -				pr_err("Mapping reserved region failed\n");
> -		}
> -	}
> -
> -	iommu_prepare_isa();

Why do you want to remove this segment of code?

>   
>   domains_done:
>   
> @@ -3580,53 +3440,6 @@ static unsigned long intel_alloc_iova(struct device *dev,
>   	return iova_pfn;
>   }
>   
> -struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
> -{
> -	struct dmar_domain *domain, *tmp;
> -	struct dmar_rmrr_unit *rmrr;
> -	struct device *i_dev;
> -	int i, ret;
> -
> -	domain = find_domain(dev);
> -	if (domain)
> -		goto out;
> -
> -	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
> -	if (!domain)
> -		goto out;
> -
> -	/* We have a new domain - setup possible RMRRs for the device */
> -	rcu_read_lock();
> -	for_each_rmrr_units(rmrr) {
> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
> -					  i, i_dev) {
> -			if (i_dev != dev)
> -				continue;
> -
> -			ret = domain_prepare_identity_map(dev, domain,
> -							  rmrr->base_address,
> -							  rmrr->end_address);
> -			if (ret)
> -				dev_err(dev, "Mapping reserved region failed\n");

We can't simply remove this segment of code, right? At least it should
be moved to the domain allocation interface.

> -		}
> -	}
> -	rcu_read_unlock();
> -
> -	tmp = set_domain_for_dev(dev, domain);
> -	if (!tmp || domain != tmp) {
> -		domain_exit(domain);
> -		domain = tmp;
> -	}
> -
> -out:
> -
> -	if (!domain)
> -		pr_err("Allocating domain for %s failed\n", dev_name(dev));
> -
> -
> -	return domain;
> -}
> -
>   /* Check if the dev needs to go through non-identity map and unmap process.*/
>   static int iommu_no_mapping(struct device *dev)
>   {
> @@ -3689,7 +3502,7 @@ static dma_addr_t __intel_map_page(struct device *dev, struct page *page,
>   	if (iommu_no_mapping(dev))
>   		return paddr;
>   
> -	domain = get_valid_domain_for_dev(dev);
> +	domain = find_domain(dev);
>   	if (!domain)
>   		return DMA_MAPPING_ERROR;
>   
> @@ -3753,7 +3566,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
>   		return;
>   
>   	domain = find_domain(dev);
> -	BUG_ON(!domain);
> +	if (!domain)
> +		return;
>   

This is not related to this patch. Let's do it in a separated patch.

>   	iommu = domain_get_iommu(domain);
>   
> @@ -3899,7 +3713,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
>   	if (iommu_no_mapping(dev))
>   		return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
>   
> -	domain = get_valid_domain_for_dev(dev);
> +	domain = find_domain(dev);
>   	if (!domain)
>   		return 0;
>   
> @@ -5377,9 +5191,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
>   	u64 ctx_lo;
>   	int ret;
>   
> -	domain = get_valid_domain_for_dev(sdev->dev);
> +	domain = find_domain(sdev->dev);
>   	if (!domain)
> -		return -EINVAL;
> +		return -ENOMEM;

This is not related to this patch. Let's do it in a separated patch.

>   
>   	spin_lock_irqsave(&device_domain_lock, flags);
>   	spin_lock(&iommu->lock);
> 

Best regards,
Lu Baolu
Benjamin Serebrin via iommu - March 5, 2019, 11:46 a.m.
Hey Lu,

> On 5 Mar 2019, at 06:59, Lu Baolu <baolu.lu@linux.intel.com> wrote:
> 
> Hi,
> 
> It's hard for me to understand why do we remove the rmrr related
> code in this patch.

The RMRR code removed here requires the lazy allocation of domains to 
exist, as it is run before iommu.c would assign IOMMU groups and attach a 
domain. Before patch 3, removing this code would mean the RMRR regions are 
never mapped for a domain: iommu.c will allocate a default domain for the 
group that a device is about to be put in, it will attach the domain to 
the device, then for each region returned by get_resv_regions it will 
create an identity map, this is where the RMRRs are setup for the default 
domain.

> 
> And, now we have two places to hold a domain for a device: group and
> dev->info. We can consider to optimize the use of per device iommu
> arch data. This should be later work anyway.
> 
> More comments inline.
> 
> On 3/4/19 11:47 PM, James Sewart wrote:
>> The generic IOMMU code will allocate and attach a dma ops domain to each
>> device that comes online, replacing any lazy allocated domain. Removes
>> RMRR application at iommu init time as we won't have a domain attached
>> to any device. iommu.c will do this after attaching a device using
>> create_direct_mappings.
>> Signed-off-by: James Sewart <jamessewart@arista.com>
>> ---
>>  drivers/iommu/intel-iommu.c | 202 ++----------------------------------
>>  1 file changed, 8 insertions(+), 194 deletions(-)
>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>> index 71cd6bbfec05..282257e2628d 100644
>> --- a/drivers/iommu/intel-iommu.c
>> +++ b/drivers/iommu/intel-iommu.c
>> @@ -2595,118 +2595,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
>>  	return domain;
>>  }
>>  -static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
>> -{
>> -	*(u16 *)opaque = alias;
>> -	return 0;
>> -}
>> -
>> -static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
>> -{
>> -	struct device_domain_info *info = NULL;
>> -	struct dmar_domain *domain = NULL;
>> -	struct intel_iommu *iommu;
>> -	u16 dma_alias;
>> -	unsigned long flags;
>> -	u8 bus, devfn;
>> -
>> -	iommu = device_to_iommu(dev, &bus, &devfn);
>> -	if (!iommu)
>> -		return NULL;
>> -
>> -	if (dev_is_pci(dev)) {
>> -		struct pci_dev *pdev = to_pci_dev(dev);
>> -
>> -		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
>> -
>> -		spin_lock_irqsave(&device_domain_lock, flags);
>> -		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
>> -						      PCI_BUS_NUM(dma_alias),
>> -						      dma_alias & 0xff);
>> -		if (info) {
>> -			iommu = info->iommu;
>> -			domain = info->domain;
>> -		}
>> -		spin_unlock_irqrestore(&device_domain_lock, flags);
>> -
>> -		/* DMA alias already has a domain, use it */
>> -		if (info)
>> -			goto out;
>> -	}
>> -
>> -	/* Allocate and initialize new domain for the device */
>> -	domain = alloc_domain(0);
>> -	if (!domain)
>> -		return NULL;
>> -	if (domain_init(domain, iommu, gaw)) {
>> -		domain_exit(domain);
>> -		return NULL;
>> -	}
>> -
>> -out:
>> -
>> -	return domain;
>> -}
>> -
>> -static struct dmar_domain *set_domain_for_dev(struct device *dev,
>> -					      struct dmar_domain *domain)
>> -{
>> -	struct intel_iommu *iommu;
>> -	struct dmar_domain *tmp;
>> -	u16 req_id, dma_alias;
>> -	u8 bus, devfn;
>> -
>> -	iommu = device_to_iommu(dev, &bus, &devfn);
>> -	if (!iommu)
>> -		return NULL;
>> -
>> -	req_id = ((u16)bus << 8) | devfn;
>> -
>> -	if (dev_is_pci(dev)) {
>> -		struct pci_dev *pdev = to_pci_dev(dev);
>> -
>> -		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
>> -
>> -		/* register PCI DMA alias device */
>> -		if (req_id != dma_alias) {
>> -			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
>> -					dma_alias & 0xff, NULL, domain);
>> -
>> -			if (!tmp || tmp != domain)
>> -				return tmp;
>> -		}
>> -	}
>> -
>> -	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
>> -	if (!tmp || tmp != domain)
>> -		return tmp;
>> -
>> -	return domain;
>> -}
>> -
>> -static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
>> -{
>> -	struct dmar_domain *domain, *tmp;
>> -
>> -	domain = find_domain(dev);
>> -	if (domain)
>> -		goto out;
>> -
>> -	domain = find_or_alloc_domain(dev, gaw);
>> -	if (!domain)
>> -		goto out;
>> -
>> -	tmp = set_domain_for_dev(dev, domain);
>> -	if (!tmp || domain != tmp) {
>> -		domain_exit(domain);
>> -		domain = tmp;
>> -	}
>> -
>> -out:
>> -
>> -	return domain;
>> -}
>> -
>>  static int iommu_domain_identity_map(struct dmar_domain *domain,
>>  				     unsigned long long start,
>>  				     unsigned long long end)
>> @@ -2779,7 +2667,7 @@ static int iommu_prepare_identity_map(struct device *dev,
>>  	struct dmar_domain *domain;
>>  	int ret;
>>  -	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
>> +	domain = find_domain(dev);
>>  	if (!domain)
>>  		return -ENOMEM;
>>  @@ -3301,11 +3189,9 @@ static int copy_translation_tables(struct intel_iommu *iommu)
>>  static int __init init_dmars(void)
>>  {
>>  	struct dmar_drhd_unit *drhd;
>> -	struct dmar_rmrr_unit *rmrr;
>>  	bool copied_tables = false;
>> -	struct device *dev;
>>  	struct intel_iommu *iommu;
>> -	int i, ret;
>> +	int ret;
>>    	/*
>>  	 * for each drhd
>> @@ -3466,32 +3352,6 @@ static int __init init_dmars(void)
>>  			goto free_iommu;
>>  		}
>>  	}
>> -	/*
>> -	 * For each rmrr
>> -	 *   for each dev attached to rmrr
>> -	 *   do
>> -	 *     locate drhd for dev, alloc domain for dev
>> -	 *     allocate free domain
>> -	 *     allocate page table entries for rmrr
>> -	 *     if context not allocated for bus
>> -	 *           allocate and init context
>> -	 *           set present in root table for this bus
>> -	 *     init context with domain, translation etc
>> -	 *    endfor
>> -	 * endfor
>> -	 */
>> -	pr_info("Setting RMRR:\n");
>> -	for_each_rmrr_units(rmrr) {
>> -		/* some BIOS lists non-exist devices in DMAR table. */
>> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
>> -					  i, dev) {
>> -			ret = iommu_prepare_rmrr_dev(rmrr, dev);
>> -			if (ret)
>> -				pr_err("Mapping reserved region failed\n");
>> -		}
>> -	}
>> -
>> -	iommu_prepare_isa();
> 
> Why do you want to remove this segment of code?

This will only work if the lazy allocation of domains exists, these 
mappings will disappear once a default domain is attached to a device and 
then remade by iommu_group_create_direct_mappings. This code is redundant 
and removing it allows us to remove all the lazy allocation logic.

iommu_prepare_isa does need moving to get_resv_regions for its mappings to 
be applied, this will need some refactoring.

> 
>>    domains_done:
>>  @@ -3580,53 +3440,6 @@ static unsigned long intel_alloc_iova(struct device *dev,
>>  	return iova_pfn;
>>  }
>>  -struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
>> -{
>> -	struct dmar_domain *domain, *tmp;
>> -	struct dmar_rmrr_unit *rmrr;
>> -	struct device *i_dev;
>> -	int i, ret;
>> -
>> -	domain = find_domain(dev);
>> -	if (domain)
>> -		goto out;
>> -
>> -	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
>> -	if (!domain)
>> -		goto out;
>> -
>> -	/* We have a new domain - setup possible RMRRs for the device */
>> -	rcu_read_lock();
>> -	for_each_rmrr_units(rmrr) {
>> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
>> -					  i, i_dev) {
>> -			if (i_dev != dev)
>> -				continue;
>> -
>> -			ret = domain_prepare_identity_map(dev, domain,
>> -							  rmrr->base_address,
>> -							  rmrr->end_address);
>> -			if (ret)
>> -				dev_err(dev, "Mapping reserved region failed\n");
> 
> We can't simply remove this segment of code, right? At least it should
> be moved to the domain allocation interface.

iommu_group_create_direct_mappings will take care of these mappings, this 
code is not used once an externally managed domain(group domain) is 
attached to the device.

> 
>> -		}
>> -	}
>> -	rcu_read_unlock();
>> -
>> -	tmp = set_domain_for_dev(dev, domain);
>> -	if (!tmp || domain != tmp) {
>> -		domain_exit(domain);
>> -		domain = tmp;
>> -	}
>> -
>> -out:
>> -
>> -	if (!domain)
>> -		pr_err("Allocating domain for %s failed\n", dev_name(dev));
>> -
>> -
>> -	return domain;
>> -}
>> -
>>  /* Check if the dev needs to go through non-identity map and unmap process.*/
>>  static int iommu_no_mapping(struct device *dev)
>>  {
>> @@ -3689,7 +3502,7 @@ static dma_addr_t __intel_map_page(struct device *dev, struct page *page,
>>  	if (iommu_no_mapping(dev))
>>  		return paddr;
>>  -	domain = get_valid_domain_for_dev(dev);
>> +	domain = find_domain(dev);
>>  	if (!domain)
>>  		return DMA_MAPPING_ERROR;
>>  @@ -3753,7 +3566,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
>>  		return;
>>    	domain = find_domain(dev);
>> -	BUG_ON(!domain);
>> +	if (!domain)
>> +		return;
>>  
> 
> This is not related to this patch. Let's do it in a separated patch.
> 
>>  	iommu = domain_get_iommu(domain);
>>  @@ -3899,7 +3713,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
>>  	if (iommu_no_mapping(dev))
>>  		return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
>>  -	domain = get_valid_domain_for_dev(dev);
>> +	domain = find_domain(dev);
>>  	if (!domain)
>>  		return 0;
>>  @@ -5377,9 +5191,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
>>  	u64 ctx_lo;
>>  	int ret;
>>  -	domain = get_valid_domain_for_dev(sdev->dev);
>> +	domain = find_domain(sdev->dev);
>>  	if (!domain)
>> -		return -EINVAL;
>> +		return -ENOMEM;
> 
> This is not related to this patch. Let's do it in a separated patch.
> 
>>    	spin_lock_irqsave(&device_domain_lock, flags);
>>  	spin_lock(&iommu->lock);
> 
> Best regards,
> Lu Baolu
Lu Baolu - March 6, 2019, 7 a.m.
Hi,

On 3/5/19 7:46 PM, James Sewart wrote:
> Hey Lu,
> 
>> On 5 Mar 2019, at 06:59, Lu Baolu <baolu.lu@linux.intel.com> wrote:
>>
>> Hi,
>>
>> It's hard for me to understand why do we remove the rmrr related
>> code in this patch.
> 
> The RMRR code removed here requires the lazy allocation of domains to
> exist, as it is run before iommu.c would assign IOMMU groups and attach a
> domain. Before patch 3, removing this code would mean the RMRR regions are
> never mapped for a domain: iommu.c will allocate a default domain for the
> group that a device is about to be put in, it will attach the domain to
> the device, then for each region returned by get_resv_regions it will
> create an identity map, this is where the RMRRs are setup for the default
> domain. >
>>
>> And, now we have two places to hold a domain for a device: group and
>> dev->info. We can consider to optimize the use of per device iommu
>> arch data. This should be later work anyway.
>>
>> More comments inline.
>>
>> On 3/4/19 11:47 PM, James Sewart wrote:
>>> The generic IOMMU code will allocate and attach a dma ops domain to each
>>> device that comes online, replacing any lazy allocated domain. Removes
>>> RMRR application at iommu init time as we won't have a domain attached
>>> to any device. iommu.c will do this after attaching a device using
>>> create_direct_mappings.
>>> Signed-off-by: James Sewart <jamessewart@arista.com>
>>> ---
>>>   drivers/iommu/intel-iommu.c | 202 ++----------------------------------
>>>   1 file changed, 8 insertions(+), 194 deletions(-)
>>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>>> index 71cd6bbfec05..282257e2628d 100644
>>> --- a/drivers/iommu/intel-iommu.c
>>> +++ b/drivers/iommu/intel-iommu.c
>>> @@ -2595,118 +2595,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
>>>   	return domain;
>>>   }
>>>   -static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
>>> -{
>>> -	*(u16 *)opaque = alias;
>>> -	return 0;
>>> -}
>>> -
>>> -static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
>>> -{
>>> -	struct device_domain_info *info = NULL;
>>> -	struct dmar_domain *domain = NULL;
>>> -	struct intel_iommu *iommu;
>>> -	u16 dma_alias;
>>> -	unsigned long flags;
>>> -	u8 bus, devfn;
>>> -
>>> -	iommu = device_to_iommu(dev, &bus, &devfn);
>>> -	if (!iommu)
>>> -		return NULL;
>>> -
>>> -	if (dev_is_pci(dev)) {
>>> -		struct pci_dev *pdev = to_pci_dev(dev);
>>> -
>>> -		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
>>> -
>>> -		spin_lock_irqsave(&device_domain_lock, flags);
>>> -		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
>>> -						      PCI_BUS_NUM(dma_alias),
>>> -						      dma_alias & 0xff);
>>> -		if (info) {
>>> -			iommu = info->iommu;
>>> -			domain = info->domain;
>>> -		}
>>> -		spin_unlock_irqrestore(&device_domain_lock, flags);
>>> -
>>> -		/* DMA alias already has a domain, use it */
>>> -		if (info)
>>> -			goto out;
>>> -	}
>>> -
>>> -	/* Allocate and initialize new domain for the device */
>>> -	domain = alloc_domain(0);
>>> -	if (!domain)
>>> -		return NULL;
>>> -	if (domain_init(domain, iommu, gaw)) {
>>> -		domain_exit(domain);
>>> -		return NULL;
>>> -	}
>>> -
>>> -out:
>>> -
>>> -	return domain;
>>> -}
>>> -
>>> -static struct dmar_domain *set_domain_for_dev(struct device *dev,
>>> -					      struct dmar_domain *domain)
>>> -{
>>> -	struct intel_iommu *iommu;
>>> -	struct dmar_domain *tmp;
>>> -	u16 req_id, dma_alias;
>>> -	u8 bus, devfn;
>>> -
>>> -	iommu = device_to_iommu(dev, &bus, &devfn);
>>> -	if (!iommu)
>>> -		return NULL;
>>> -
>>> -	req_id = ((u16)bus << 8) | devfn;
>>> -
>>> -	if (dev_is_pci(dev)) {
>>> -		struct pci_dev *pdev = to_pci_dev(dev);
>>> -
>>> -		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
>>> -
>>> -		/* register PCI DMA alias device */
>>> -		if (req_id != dma_alias) {
>>> -			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
>>> -					dma_alias & 0xff, NULL, domain);
>>> -
>>> -			if (!tmp || tmp != domain)
>>> -				return tmp;
>>> -		}
>>> -	}
>>> -
>>> -	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
>>> -	if (!tmp || tmp != domain)
>>> -		return tmp;
>>> -
>>> -	return domain;
>>> -}
>>> -
>>> -static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
>>> -{
>>> -	struct dmar_domain *domain, *tmp;
>>> -
>>> -	domain = find_domain(dev);
>>> -	if (domain)
>>> -		goto out;
>>> -
>>> -	domain = find_or_alloc_domain(dev, gaw);
>>> -	if (!domain)
>>> -		goto out;
>>> -
>>> -	tmp = set_domain_for_dev(dev, domain);
>>> -	if (!tmp || domain != tmp) {
>>> -		domain_exit(domain);
>>> -		domain = tmp;
>>> -	}
>>> -
>>> -out:
>>> -
>>> -	return domain;
>>> -}
>>> -
>>>   static int iommu_domain_identity_map(struct dmar_domain *domain,
>>>   				     unsigned long long start,
>>>   				     unsigned long long end)
>>> @@ -2779,7 +2667,7 @@ static int iommu_prepare_identity_map(struct device *dev,
>>>   	struct dmar_domain *domain;
>>>   	int ret;
>>>   -	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
>>> +	domain = find_domain(dev);
>>>   	if (!domain)
>>>   		return -ENOMEM;
>>>   @@ -3301,11 +3189,9 @@ static int copy_translation_tables(struct intel_iommu *iommu)
>>>   static int __init init_dmars(void)
>>>   {
>>>   	struct dmar_drhd_unit *drhd;
>>> -	struct dmar_rmrr_unit *rmrr;
>>>   	bool copied_tables = false;
>>> -	struct device *dev;
>>>   	struct intel_iommu *iommu;
>>> -	int i, ret;
>>> +	int ret;
>>>     	/*
>>>   	 * for each drhd
>>> @@ -3466,32 +3352,6 @@ static int __init init_dmars(void)
>>>   			goto free_iommu;
>>>   		}
>>>   	}
>>> -	/*
>>> -	 * For each rmrr
>>> -	 *   for each dev attached to rmrr
>>> -	 *   do
>>> -	 *     locate drhd for dev, alloc domain for dev
>>> -	 *     allocate free domain
>>> -	 *     allocate page table entries for rmrr
>>> -	 *     if context not allocated for bus
>>> -	 *           allocate and init context
>>> -	 *           set present in root table for this bus
>>> -	 *     init context with domain, translation etc
>>> -	 *    endfor
>>> -	 * endfor
>>> -	 */
>>> -	pr_info("Setting RMRR:\n");
>>> -	for_each_rmrr_units(rmrr) {
>>> -		/* some BIOS lists non-exist devices in DMAR table. */
>>> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
>>> -					  i, dev) {
>>> -			ret = iommu_prepare_rmrr_dev(rmrr, dev);
>>> -			if (ret)
>>> -				pr_err("Mapping reserved region failed\n");
>>> -		}
>>> -	}
>>> -
>>> -	iommu_prepare_isa();
>>
>> Why do you want to remove this segment of code?
> 
> This will only work if the lazy allocation of domains exists, these
> mappings will disappear once a default domain is attached to a device and
> then remade by iommu_group_create_direct_mappings. This code is redundant
> and removing it allows us to remove all the lazy allocation logic.

No exactly.

We need to setup the rmrr mapping before enabling dma remapping,
otherwise, there will be a window after dma remapping enabling and
rmrr getting mapped, during which people might see dma faults.

> 
> iommu_prepare_isa does need moving to get_resv_regions for its mappings to
> be applied, this will need some refactoring.

Yes, agree.

> 
>>
>>>     domains_done:
>>>   @@ -3580,53 +3440,6 @@ static unsigned long intel_alloc_iova(struct device *dev,
>>>   	return iova_pfn;
>>>   }
>>>   -struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
>>> -{
>>> -	struct dmar_domain *domain, *tmp;
>>> -	struct dmar_rmrr_unit *rmrr;
>>> -	struct device *i_dev;
>>> -	int i, ret;
>>> -
>>> -	domain = find_domain(dev);
>>> -	if (domain)
>>> -		goto out;
>>> -
>>> -	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
>>> -	if (!domain)
>>> -		goto out;
>>> -
>>> -	/* We have a new domain - setup possible RMRRs for the device */
>>> -	rcu_read_lock();
>>> -	for_each_rmrr_units(rmrr) {
>>> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
>>> -					  i, i_dev) {
>>> -			if (i_dev != dev)
>>> -				continue;
>>> -
>>> -			ret = domain_prepare_identity_map(dev, domain,
>>> -							  rmrr->base_address,
>>> -							  rmrr->end_address);
>>> -			if (ret)
>>> -				dev_err(dev, "Mapping reserved region failed\n");
>>
>> We can't simply remove this segment of code, right? At least it should
>> be moved to the domain allocation interface.
> 
> iommu_group_create_direct_mappings will take care of these mappings, this
> code is not used once an externally managed domain(group domain) is
> attached to the device.

Yes, this seems to be duplicated even with lazy domain allocation.

Best regards,
Lu Baolu

> 
>>
>>> -		}
>>> -	}
>>> -	rcu_read_unlock();
>>> -
>>> -	tmp = set_domain_for_dev(dev, domain);
>>> -	if (!tmp || domain != tmp) {
>>> -		domain_exit(domain);
>>> -		domain = tmp;
>>> -	}
>>> -
>>> -out:
>>> -
>>> -	if (!domain)
>>> -		pr_err("Allocating domain for %s failed\n", dev_name(dev));
>>> -
>>> -
>>> -	return domain;
>>> -}
>>> -
>>>   /* Check if the dev needs to go through non-identity map and unmap process.*/
>>>   static int iommu_no_mapping(struct device *dev)
>>>   {
>>> @@ -3689,7 +3502,7 @@ static dma_addr_t __intel_map_page(struct device *dev, struct page *page,
>>>   	if (iommu_no_mapping(dev))
>>>   		return paddr;
>>>   -	domain = get_valid_domain_for_dev(dev);
>>> +	domain = find_domain(dev);
>>>   	if (!domain)
>>>   		return DMA_MAPPING_ERROR;
>>>   @@ -3753,7 +3566,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
>>>   		return;
>>>     	domain = find_domain(dev);
>>> -	BUG_ON(!domain);
>>> +	if (!domain)
>>> +		return;
>>>   
>>
>> This is not related to this patch. Let's do it in a separated patch.
>>
>>>   	iommu = domain_get_iommu(domain);
>>>   @@ -3899,7 +3713,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
>>>   	if (iommu_no_mapping(dev))
>>>   		return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
>>>   -	domain = get_valid_domain_for_dev(dev);
>>> +	domain = find_domain(dev);
>>>   	if (!domain)
>>>   		return 0;
>>>   @@ -5377,9 +5191,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
>>>   	u64 ctx_lo;
>>>   	int ret;
>>>   -	domain = get_valid_domain_for_dev(sdev->dev);
>>> +	domain = find_domain(sdev->dev);
>>>   	if (!domain)
>>> -		return -EINVAL;
>>> +		return -ENOMEM;
>>
>> This is not related to this patch. Let's do it in a separated patch.
>>
>>>     	spin_lock_irqsave(&device_domain_lock, flags);
>>>   	spin_lock(&iommu->lock);
>>
>> Best regards,
>> Lu Baolu
> 
>
Benjamin Serebrin via iommu - March 6, 2019, 6:08 p.m.
Hey,

> On 6 Mar 2019, at 07:00, Lu Baolu <baolu.lu@linux.intel.com> wrote:
> 
> Hi,
> 
> On 3/5/19 7:46 PM, James Sewart wrote:
>> Hey Lu,
>>> On 5 Mar 2019, at 06:59, Lu Baolu <baolu.lu@linux.intel.com> wrote:
>>> 
>>> Hi,
>>> 
>>> It's hard for me to understand why do we remove the rmrr related
>>> code in this patch.
>> The RMRR code removed here requires the lazy allocation of domains to
>> exist, as it is run before iommu.c would assign IOMMU groups and attach a
>> domain. Before patch 3, removing this code would mean the RMRR regions are
>> never mapped for a domain: iommu.c will allocate a default domain for the
>> group that a device is about to be put in, it will attach the domain to
>> the device, then for each region returned by get_resv_regions it will
>> create an identity map, this is where the RMRRs are setup for the default
>> domain. >
>>> 
>>> And, now we have two places to hold a domain for a device: group and
>>> dev->info. We can consider to optimize the use of per device iommu
>>> arch data. This should be later work anyway.
>>> 
>>> More comments inline.
>>> 
>>> On 3/4/19 11:47 PM, James Sewart wrote:
>>>> The generic IOMMU code will allocate and attach a dma ops domain to each
>>>> device that comes online, replacing any lazy allocated domain. Removes
>>>> RMRR application at iommu init time as we won't have a domain attached
>>>> to any device. iommu.c will do this after attaching a device using
>>>> create_direct_mappings.
>>>> Signed-off-by: James Sewart <jamessewart@arista.com>
>>>> ---
>>>>  drivers/iommu/intel-iommu.c | 202 ++----------------------------------
>>>>  1 file changed, 8 insertions(+), 194 deletions(-)
>>>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>>>> index 71cd6bbfec05..282257e2628d 100644
>>>> --- a/drivers/iommu/intel-iommu.c
>>>> +++ b/drivers/iommu/intel-iommu.c
>>>> @@ -2595,118 +2595,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
>>>>  	return domain;
>>>>  }
>>>>  -static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
>>>> -{
>>>> -	*(u16 *)opaque = alias;
>>>> -	return 0;
>>>> -}
>>>> -
>>>> -static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
>>>> -{
>>>> -	struct device_domain_info *info = NULL;
>>>> -	struct dmar_domain *domain = NULL;
>>>> -	struct intel_iommu *iommu;
>>>> -	u16 dma_alias;
>>>> -	unsigned long flags;
>>>> -	u8 bus, devfn;
>>>> -
>>>> -	iommu = device_to_iommu(dev, &bus, &devfn);
>>>> -	if (!iommu)
>>>> -		return NULL;
>>>> -
>>>> -	if (dev_is_pci(dev)) {
>>>> -		struct pci_dev *pdev = to_pci_dev(dev);
>>>> -
>>>> -		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
>>>> -
>>>> -		spin_lock_irqsave(&device_domain_lock, flags);
>>>> -		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
>>>> -						      PCI_BUS_NUM(dma_alias),
>>>> -						      dma_alias & 0xff);
>>>> -		if (info) {
>>>> -			iommu = info->iommu;
>>>> -			domain = info->domain;
>>>> -		}
>>>> -		spin_unlock_irqrestore(&device_domain_lock, flags);
>>>> -
>>>> -		/* DMA alias already has a domain, use it */
>>>> -		if (info)
>>>> -			goto out;
>>>> -	}
>>>> -
>>>> -	/* Allocate and initialize new domain for the device */
>>>> -	domain = alloc_domain(0);
>>>> -	if (!domain)
>>>> -		return NULL;
>>>> -	if (domain_init(domain, iommu, gaw)) {
>>>> -		domain_exit(domain);
>>>> -		return NULL;
>>>> -	}
>>>> -
>>>> -out:
>>>> -
>>>> -	return domain;
>>>> -}
>>>> -
>>>> -static struct dmar_domain *set_domain_for_dev(struct device *dev,
>>>> -					      struct dmar_domain *domain)
>>>> -{
>>>> -	struct intel_iommu *iommu;
>>>> -	struct dmar_domain *tmp;
>>>> -	u16 req_id, dma_alias;
>>>> -	u8 bus, devfn;
>>>> -
>>>> -	iommu = device_to_iommu(dev, &bus, &devfn);
>>>> -	if (!iommu)
>>>> -		return NULL;
>>>> -
>>>> -	req_id = ((u16)bus << 8) | devfn;
>>>> -
>>>> -	if (dev_is_pci(dev)) {
>>>> -		struct pci_dev *pdev = to_pci_dev(dev);
>>>> -
>>>> -		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
>>>> -
>>>> -		/* register PCI DMA alias device */
>>>> -		if (req_id != dma_alias) {
>>>> -			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
>>>> -					dma_alias & 0xff, NULL, domain);
>>>> -
>>>> -			if (!tmp || tmp != domain)
>>>> -				return tmp;
>>>> -		}
>>>> -	}
>>>> -
>>>> -	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
>>>> -	if (!tmp || tmp != domain)
>>>> -		return tmp;
>>>> -
>>>> -	return domain;
>>>> -}
>>>> -
>>>> -static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
>>>> -{
>>>> -	struct dmar_domain *domain, *tmp;
>>>> -
>>>> -	domain = find_domain(dev);
>>>> -	if (domain)
>>>> -		goto out;
>>>> -
>>>> -	domain = find_or_alloc_domain(dev, gaw);
>>>> -	if (!domain)
>>>> -		goto out;
>>>> -
>>>> -	tmp = set_domain_for_dev(dev, domain);
>>>> -	if (!tmp || domain != tmp) {
>>>> -		domain_exit(domain);
>>>> -		domain = tmp;
>>>> -	}
>>>> -
>>>> -out:
>>>> -
>>>> -	return domain;
>>>> -}
>>>> -
>>>>  static int iommu_domain_identity_map(struct dmar_domain *domain,
>>>>  				     unsigned long long start,
>>>>  				     unsigned long long end)
>>>> @@ -2779,7 +2667,7 @@ static int iommu_prepare_identity_map(struct device *dev,
>>>>  	struct dmar_domain *domain;
>>>>  	int ret;
>>>>  -	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
>>>> +	domain = find_domain(dev);
>>>>  	if (!domain)
>>>>  		return -ENOMEM;
>>>>  @@ -3301,11 +3189,9 @@ static int copy_translation_tables(struct intel_iommu *iommu)
>>>>  static int __init init_dmars(void)
>>>>  {
>>>>  	struct dmar_drhd_unit *drhd;
>>>> -	struct dmar_rmrr_unit *rmrr;
>>>>  	bool copied_tables = false;
>>>> -	struct device *dev;
>>>>  	struct intel_iommu *iommu;
>>>> -	int i, ret;
>>>> +	int ret;
>>>>    	/*
>>>>  	 * for each drhd
>>>> @@ -3466,32 +3352,6 @@ static int __init init_dmars(void)
>>>>  			goto free_iommu;
>>>>  		}
>>>>  	}
>>>> -	/*
>>>> -	 * For each rmrr
>>>> -	 *   for each dev attached to rmrr
>>>> -	 *   do
>>>> -	 *     locate drhd for dev, alloc domain for dev
>>>> -	 *     allocate free domain
>>>> -	 *     allocate page table entries for rmrr
>>>> -	 *     if context not allocated for bus
>>>> -	 *           allocate and init context
>>>> -	 *           set present in root table for this bus
>>>> -	 *     init context with domain, translation etc
>>>> -	 *    endfor
>>>> -	 * endfor
>>>> -	 */
>>>> -	pr_info("Setting RMRR:\n");
>>>> -	for_each_rmrr_units(rmrr) {
>>>> -		/* some BIOS lists non-exist devices in DMAR table. */
>>>> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
>>>> -					  i, dev) {
>>>> -			ret = iommu_prepare_rmrr_dev(rmrr, dev);
>>>> -			if (ret)
>>>> -				pr_err("Mapping reserved region failed\n");
>>>> -		}
>>>> -	}
>>>> -
>>>> -	iommu_prepare_isa();
>>> 
>>> Why do you want to remove this segment of code?
>> This will only work if the lazy allocation of domains exists, these
>> mappings will disappear once a default domain is attached to a device and
>> then remade by iommu_group_create_direct_mappings. This code is redundant
>> and removing it allows us to remove all the lazy allocation logic.
> 
> No exactly.
> 
> We need to setup the rmrr mapping before enabling dma remapping,
> otherwise, there will be a window after dma remapping enabling and
> rmrr getting mapped, during which people might see dma faults.

Do you think this patch instead should be a refactoring to simplify this initial domain setup before the default domain takes over? It seems like duplicated effort to have both lazy allocated domains and externally managed domains. We could allocate a domain here for any devices with RMRR and call get_resv_regions to avoid duplicating RMRR loop.

> 
>> iommu_prepare_isa does need moving to get_resv_regions for its mappings to
>> be applied, this will need some refactoring.
> 
> Yes, agree.
> 
>>> 
>>>>    domains_done:
>>>>  @@ -3580,53 +3440,6 @@ static unsigned long intel_alloc_iova(struct device *dev,
>>>>  	return iova_pfn;
>>>>  }
>>>>  -struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
>>>> -{
>>>> -	struct dmar_domain *domain, *tmp;
>>>> -	struct dmar_rmrr_unit *rmrr;
>>>> -	struct device *i_dev;
>>>> -	int i, ret;
>>>> -
>>>> -	domain = find_domain(dev);
>>>> -	if (domain)
>>>> -		goto out;
>>>> -
>>>> -	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
>>>> -	if (!domain)
>>>> -		goto out;
>>>> -
>>>> -	/* We have a new domain - setup possible RMRRs for the device */
>>>> -	rcu_read_lock();
>>>> -	for_each_rmrr_units(rmrr) {
>>>> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
>>>> -					  i, i_dev) {
>>>> -			if (i_dev != dev)
>>>> -				continue;
>>>> -
>>>> -			ret = domain_prepare_identity_map(dev, domain,
>>>> -							  rmrr->base_address,
>>>> -							  rmrr->end_address);
>>>> -			if (ret)
>>>> -				dev_err(dev, "Mapping reserved region failed\n");
>>> 
>>> We can't simply remove this segment of code, right? At least it should
>>> be moved to the domain allocation interface.
>> iommu_group_create_direct_mappings will take care of these mappings, this
>> code is not used once an externally managed domain(group domain) is
>> attached to the device.
> 
> Yes, this seems to be duplicated even with lazy domain allocation.
> 
> Best regards,
> Lu Baolu
> 
>>> 
>>>> -		}
>>>> -	}
>>>> -	rcu_read_unlock();
>>>> -
>>>> -	tmp = set_domain_for_dev(dev, domain);
>>>> -	if (!tmp || domain != tmp) {
>>>> -		domain_exit(domain);
>>>> -		domain = tmp;
>>>> -	}
>>>> -
>>>> -out:
>>>> -
>>>> -	if (!domain)
>>>> -		pr_err("Allocating domain for %s failed\n", dev_name(dev));
>>>> -
>>>> -
>>>> -	return domain;
>>>> -}
>>>> -
>>>>  /* Check if the dev needs to go through non-identity map and unmap process.*/
>>>>  static int iommu_no_mapping(struct device *dev)
>>>>  {
>>>> @@ -3689,7 +3502,7 @@ static dma_addr_t __intel_map_page(struct device *dev, struct page *page,
>>>>  	if (iommu_no_mapping(dev))
>>>>  		return paddr;
>>>>  -	domain = get_valid_domain_for_dev(dev);
>>>> +	domain = find_domain(dev);
>>>>  	if (!domain)
>>>>  		return DMA_MAPPING_ERROR;
>>>>  @@ -3753,7 +3566,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
>>>>  		return;
>>>>    	domain = find_domain(dev);
>>>> -	BUG_ON(!domain);
>>>> +	if (!domain)
>>>> +		return;
>>>>  
>>> 
>>> This is not related to this patch. Let's do it in a separated patch.
>>> 
>>>>  	iommu = domain_get_iommu(domain);
>>>>  @@ -3899,7 +3713,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
>>>>  	if (iommu_no_mapping(dev))
>>>>  		return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
>>>>  -	domain = get_valid_domain_for_dev(dev);
>>>> +	domain = find_domain(dev);
>>>>  	if (!domain)
>>>>  		return 0;
>>>>  @@ -5377,9 +5191,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
>>>>  	u64 ctx_lo;
>>>>  	int ret;
>>>>  -	domain = get_valid_domain_for_dev(sdev->dev);
>>>> +	domain = find_domain(sdev->dev);
>>>>  	if (!domain)
>>>> -		return -EINVAL;
>>>> +		return -ENOMEM;
>>> 
>>> This is not related to this patch. Let's do it in a separated patch.
>>> 
>>>>    	spin_lock_irqsave(&device_domain_lock, flags);
>>>>  	spin_lock(&iommu->lock);
>>> 
>>> Best regards,
>>> Lu Baolu

Cheers,
James.
Lu Baolu - March 7, 2019, 6:31 a.m.
Hi James,

On 3/7/19 2:08 AM, James Sewart wrote:
>>>>> -	/*
>>>>> -	 * For each rmrr
>>>>> -	 *   for each dev attached to rmrr
>>>>> -	 *   do
>>>>> -	 *     locate drhd for dev, alloc domain for dev
>>>>> -	 *     allocate free domain
>>>>> -	 *     allocate page table entries for rmrr
>>>>> -	 *     if context not allocated for bus
>>>>> -	 *           allocate and init context
>>>>> -	 *           set present in root table for this bus
>>>>> -	 *     init context with domain, translation etc
>>>>> -	 *    endfor
>>>>> -	 * endfor
>>>>> -	 */
>>>>> -	pr_info("Setting RMRR:\n");
>>>>> -	for_each_rmrr_units(rmrr) {
>>>>> -		/* some BIOS lists non-exist devices in DMAR table. */
>>>>> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
>>>>> -					  i, dev) {
>>>>> -			ret = iommu_prepare_rmrr_dev(rmrr, dev);
>>>>> -			if (ret)
>>>>> -				pr_err("Mapping reserved region failed\n");
>>>>> -		}
>>>>> -	}
>>>>> -
>>>>> -	iommu_prepare_isa();
>>>> Why do you want to remove this segment of code?
>>> This will only work if the lazy allocation of domains exists, these
>>> mappings will disappear once a default domain is attached to a device and
>>> then remade by iommu_group_create_direct_mappings. This code is redundant
>>> and removing it allows us to remove all the lazy allocation logic.
>> No exactly.
>>
>> We need to setup the rmrr mapping before enabling dma remapping,
>> otherwise, there will be a window after dma remapping enabling and
>> rmrr getting mapped, during which people might see dma faults.
> Do you think this patch instead should be a refactoring to simplify this initial domain setup before the default domain takes over? It seems like duplicated effort to have both lazy allocated domains and externally managed domains. We could allocate a domain here for any devices with RMRR and call get_resv_regions to avoid duplicating RMRR loop.
> 

Agree. We should replace the lazy allocated domains with default domain
in a clean way. Actually, your patches look great to me. But I do think
we need further cleanups. The rmrr code is one example, and the identity
domain (si_domain) is another.

Do you mind if I work on top of your patches for further cleanups and
sign off a v2 together with you?

Best regards,
Lu Baolu
Benjamin Serebrin via iommu - March 7, 2019, 10:21 a.m.
Hey Lu,

> On 7 Mar 2019, at 06:31, Lu Baolu <baolu.lu@linux.intel.com> wrote:
> 
> Hi James,
> 
> On 3/7/19 2:08 AM, James Sewart wrote:
>>>>>> -	/*
>>>>>> -	 * For each rmrr
>>>>>> -	 *   for each dev attached to rmrr
>>>>>> -	 *   do
>>>>>> -	 *     locate drhd for dev, alloc domain for dev
>>>>>> -	 *     allocate free domain
>>>>>> -	 *     allocate page table entries for rmrr
>>>>>> -	 *     if context not allocated for bus
>>>>>> -	 *           allocate and init context
>>>>>> -	 *           set present in root table for this bus
>>>>>> -	 *     init context with domain, translation etc
>>>>>> -	 *    endfor
>>>>>> -	 * endfor
>>>>>> -	 */
>>>>>> -	pr_info("Setting RMRR:\n");
>>>>>> -	for_each_rmrr_units(rmrr) {
>>>>>> -		/* some BIOS lists non-exist devices in DMAR table. */
>>>>>> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
>>>>>> -					  i, dev) {
>>>>>> -			ret = iommu_prepare_rmrr_dev(rmrr, dev);
>>>>>> -			if (ret)
>>>>>> -				pr_err("Mapping reserved region failed\n");
>>>>>> -		}
>>>>>> -	}
>>>>>> -
>>>>>> -	iommu_prepare_isa();
>>>>> Why do you want to remove this segment of code?
>>>> This will only work if the lazy allocation of domains exists, these
>>>> mappings will disappear once a default domain is attached to a device and
>>>> then remade by iommu_group_create_direct_mappings. This code is redundant
>>>> and removing it allows us to remove all the lazy allocation logic.
>>> No exactly.
>>> 
>>> We need to setup the rmrr mapping before enabling dma remapping,
>>> otherwise, there will be a window after dma remapping enabling and
>>> rmrr getting mapped, during which people might see dma faults.
>> Do you think this patch instead should be a refactoring to simplify this initial domain setup before the default domain takes over? It seems like duplicated effort to have both lazy allocated domains and externally managed domains. We could allocate a domain here for any devices with RMRR and call get_resv_regions to avoid duplicating RMRR loop.
> 
> Agree. We should replace the lazy allocated domains with default domain
> in a clean way. Actually, your patches look great to me. But I do think
> we need further cleanups. The rmrr code is one example, and the identity
> domain (si_domain) is another.
> 
> Do you mind if I work on top of your patches for further cleanups and
> sign off a v2 together with you?

Sure, sounds good. I’ll fixup patch 3 and have a go at integrating 
iommu_prepare_isa into get_resv_regions. This should make the initial 
domain logic here quite concise.

> 
> Best regards,
> Lu Baolu

Cheers,
James.
Lu Baolu - March 8, 2019, 1:09 a.m.
Hi,

On 3/7/19 6:21 PM, James Sewart wrote:
> Hey Lu,
> 
>> On 7 Mar 2019, at 06:31, Lu Baolu <baolu.lu@linux.intel.com> wrote:
>>
>> Hi James,
>>
>> On 3/7/19 2:08 AM, James Sewart wrote:
>>>>>>> -	/*
>>>>>>> -	 * For each rmrr
>>>>>>> -	 *   for each dev attached to rmrr
>>>>>>> -	 *   do
>>>>>>> -	 *     locate drhd for dev, alloc domain for dev
>>>>>>> -	 *     allocate free domain
>>>>>>> -	 *     allocate page table entries for rmrr
>>>>>>> -	 *     if context not allocated for bus
>>>>>>> -	 *           allocate and init context
>>>>>>> -	 *           set present in root table for this bus
>>>>>>> -	 *     init context with domain, translation etc
>>>>>>> -	 *    endfor
>>>>>>> -	 * endfor
>>>>>>> -	 */
>>>>>>> -	pr_info("Setting RMRR:\n");
>>>>>>> -	for_each_rmrr_units(rmrr) {
>>>>>>> -		/* some BIOS lists non-exist devices in DMAR table. */
>>>>>>> -		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
>>>>>>> -					  i, dev) {
>>>>>>> -			ret = iommu_prepare_rmrr_dev(rmrr, dev);
>>>>>>> -			if (ret)
>>>>>>> -				pr_err("Mapping reserved region failed\n");
>>>>>>> -		}
>>>>>>> -	}
>>>>>>> -
>>>>>>> -	iommu_prepare_isa();
>>>>>> Why do you want to remove this segment of code?
>>>>> This will only work if the lazy allocation of domains exists, these
>>>>> mappings will disappear once a default domain is attached to a device and
>>>>> then remade by iommu_group_create_direct_mappings. This code is redundant
>>>>> and removing it allows us to remove all the lazy allocation logic.
>>>> No exactly.
>>>>
>>>> We need to setup the rmrr mapping before enabling dma remapping,
>>>> otherwise, there will be a window after dma remapping enabling and
>>>> rmrr getting mapped, during which people might see dma faults.
>>> Do you think this patch instead should be a refactoring to simplify this initial domain setup before the default domain takes over? It seems like duplicated effort to have both lazy allocated domains and externally managed domains. We could allocate a domain here for any devices with RMRR and call get_resv_regions to avoid duplicating RMRR loop.
>>
>> Agree. We should replace the lazy allocated domains with default domain
>> in a clean way. Actually, your patches look great to me. But I do think
>> we need further cleanups. The rmrr code is one example, and the identity
>> domain (si_domain) is another.
>>
>> Do you mind if I work on top of your patches for further cleanups and
>> sign off a v2 together with you?
> 
> Sure, sounds good. I’ll fixup patch 3 and have a go at integrating
> iommu_prepare_isa into get_resv_regions. This should make the initial
> domain logic here quite concise.

Very appreciated! Thank you!

Best regards,
Lu Baolu

Patch

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 71cd6bbfec05..282257e2628d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2595,118 +2595,6 @@  static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	return domain;
 }
 
-static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
-{
-	*(u16 *)opaque = alias;
-	return 0;
-}
-
-static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
-{
-	struct device_domain_info *info = NULL;
-	struct dmar_domain *domain = NULL;
-	struct intel_iommu *iommu;
-	u16 dma_alias;
-	unsigned long flags;
-	u8 bus, devfn;
-
-	iommu = device_to_iommu(dev, &bus, &devfn);
-	if (!iommu)
-		return NULL;
-
-	if (dev_is_pci(dev)) {
-		struct pci_dev *pdev = to_pci_dev(dev);
-
-		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
-		spin_lock_irqsave(&device_domain_lock, flags);
-		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
-						      PCI_BUS_NUM(dma_alias),
-						      dma_alias & 0xff);
-		if (info) {
-			iommu = info->iommu;
-			domain = info->domain;
-		}
-		spin_unlock_irqrestore(&device_domain_lock, flags);
-
-		/* DMA alias already has a domain, use it */
-		if (info)
-			goto out;
-	}
-
-	/* Allocate and initialize new domain for the device */
-	domain = alloc_domain(0);
-	if (!domain)
-		return NULL;
-	if (domain_init(domain, iommu, gaw)) {
-		domain_exit(domain);
-		return NULL;
-	}
-
-out:
-
-	return domain;
-}
-
-static struct dmar_domain *set_domain_for_dev(struct device *dev,
-					      struct dmar_domain *domain)
-{
-	struct intel_iommu *iommu;
-	struct dmar_domain *tmp;
-	u16 req_id, dma_alias;
-	u8 bus, devfn;
-
-	iommu = device_to_iommu(dev, &bus, &devfn);
-	if (!iommu)
-		return NULL;
-
-	req_id = ((u16)bus << 8) | devfn;
-
-	if (dev_is_pci(dev)) {
-		struct pci_dev *pdev = to_pci_dev(dev);
-
-		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
-		/* register PCI DMA alias device */
-		if (req_id != dma_alias) {
-			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
-					dma_alias & 0xff, NULL, domain);
-
-			if (!tmp || tmp != domain)
-				return tmp;
-		}
-	}
-
-	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
-	if (!tmp || tmp != domain)
-		return tmp;
-
-	return domain;
-}
-
-static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
-{
-	struct dmar_domain *domain, *tmp;
-
-	domain = find_domain(dev);
-	if (domain)
-		goto out;
-
-	domain = find_or_alloc_domain(dev, gaw);
-	if (!domain)
-		goto out;
-
-	tmp = set_domain_for_dev(dev, domain);
-	if (!tmp || domain != tmp) {
-		domain_exit(domain);
-		domain = tmp;
-	}
-
-out:
-
-	return domain;
-}
-
 static int iommu_domain_identity_map(struct dmar_domain *domain,
 				     unsigned long long start,
 				     unsigned long long end)
@@ -2779,7 +2667,7 @@  static int iommu_prepare_identity_map(struct device *dev,
 	struct dmar_domain *domain;
 	int ret;
 
-	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+	domain = find_domain(dev);
 	if (!domain)
 		return -ENOMEM;
 
@@ -3301,11 +3189,9 @@  static int copy_translation_tables(struct intel_iommu *iommu)
 static int __init init_dmars(void)
 {
 	struct dmar_drhd_unit *drhd;
-	struct dmar_rmrr_unit *rmrr;
 	bool copied_tables = false;
-	struct device *dev;
 	struct intel_iommu *iommu;
-	int i, ret;
+	int ret;
 
 	/*
 	 * for each drhd
@@ -3466,32 +3352,6 @@  static int __init init_dmars(void)
 			goto free_iommu;
 		}
 	}
-	/*
-	 * For each rmrr
-	 *   for each dev attached to rmrr
-	 *   do
-	 *     locate drhd for dev, alloc domain for dev
-	 *     allocate free domain
-	 *     allocate page table entries for rmrr
-	 *     if context not allocated for bus
-	 *           allocate and init context
-	 *           set present in root table for this bus
-	 *     init context with domain, translation etc
-	 *    endfor
-	 * endfor
-	 */
-	pr_info("Setting RMRR:\n");
-	for_each_rmrr_units(rmrr) {
-		/* some BIOS lists non-exist devices in DMAR table. */
-		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
-					  i, dev) {
-			ret = iommu_prepare_rmrr_dev(rmrr, dev);
-			if (ret)
-				pr_err("Mapping reserved region failed\n");
-		}
-	}
-
-	iommu_prepare_isa();
 
 domains_done:
 
@@ -3580,53 +3440,6 @@  static unsigned long intel_alloc_iova(struct device *dev,
 	return iova_pfn;
 }
 
-struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
-{
-	struct dmar_domain *domain, *tmp;
-	struct dmar_rmrr_unit *rmrr;
-	struct device *i_dev;
-	int i, ret;
-
-	domain = find_domain(dev);
-	if (domain)
-		goto out;
-
-	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
-	if (!domain)
-		goto out;
-
-	/* We have a new domain - setup possible RMRRs for the device */
-	rcu_read_lock();
-	for_each_rmrr_units(rmrr) {
-		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
-					  i, i_dev) {
-			if (i_dev != dev)
-				continue;
-
-			ret = domain_prepare_identity_map(dev, domain,
-							  rmrr->base_address,
-							  rmrr->end_address);
-			if (ret)
-				dev_err(dev, "Mapping reserved region failed\n");
-		}
-	}
-	rcu_read_unlock();
-
-	tmp = set_domain_for_dev(dev, domain);
-	if (!tmp || domain != tmp) {
-		domain_exit(domain);
-		domain = tmp;
-	}
-
-out:
-
-	if (!domain)
-		pr_err("Allocating domain for %s failed\n", dev_name(dev));
-
-
-	return domain;
-}
-
 /* Check if the dev needs to go through non-identity map and unmap process.*/
 static int iommu_no_mapping(struct device *dev)
 {
@@ -3689,7 +3502,7 @@  static dma_addr_t __intel_map_page(struct device *dev, struct page *page,
 	if (iommu_no_mapping(dev))
 		return paddr;
 
-	domain = get_valid_domain_for_dev(dev);
+	domain = find_domain(dev);
 	if (!domain)
 		return DMA_MAPPING_ERROR;
 
@@ -3753,7 +3566,8 @@  static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 		return;
 
 	domain = find_domain(dev);
-	BUG_ON(!domain);
+	if (!domain)
+		return;
 
 	iommu = domain_get_iommu(domain);
 
@@ -3899,7 +3713,7 @@  static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
 	if (iommu_no_mapping(dev))
 		return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
 
-	domain = get_valid_domain_for_dev(dev);
+	domain = find_domain(dev);
 	if (!domain)
 		return 0;
 
@@ -5377,9 +5191,9 @@  int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
 	u64 ctx_lo;
 	int ret;
 
-	domain = get_valid_domain_for_dev(sdev->dev);
+	domain = find_domain(sdev->dev);
 	if (!domain)
-		return -EINVAL;
+		return -ENOMEM;
 
 	spin_lock_irqsave(&device_domain_lock, flags);
 	spin_lock(&iommu->lock);