Patchwork [v5,4/7] s390: ap: setup relation betwen KVM and mediated device

login
register
mail settings
Submitter Pierre Morel
Date March 13, 2019, 4:05 p.m.
Message ID <1552493104-30510-5-git-send-email-pmorel@linux.ibm.com>
Download mbox | patch
Permalink /patch/748255/
State New
Headers show

Comments

Pierre Morel - March 13, 2019, 4:05 p.m.
When the mediated device is open we setup the relation with KVM unset it
when the mediated device is released.

We ensure KVM is present on opening of the mediated device.

We ensure that KVM survives the mediated device, and establish a direct
link from KVM to the mediated device to simplify the relationship.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 80 ++++++++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 34 deletions(-)
Halil Pasic - March 15, 2019, 6:15 p.m.
On Wed, 13 Mar 2019 17:05:01 +0100
Pierre Morel <pmorel@linux.ibm.com> wrote:

> When the mediated device is open we setup the relation with KVM unset it
> when the mediated device is released.
> 
> We ensure KVM is present on opening of the mediated device.
> 
> We ensure that KVM survives the mediated device, and establish a direct

survives? 

> link from KVM to the mediated device to simplify the relationship.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
>  drivers/s390/crypto/vfio_ap_ops.c | 80 ++++++++++++++++++++++-----------------
>  1 file changed, 46 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index 0f8952c23..6b559ca 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -790,7 +790,6 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>   * vfio_ap_mdev_set_kvm
>   *
>   * @matrix_mdev: a mediated matrix device
> - * @kvm: reference to KVM instance
>   *
>   * Verifies no other mediated matrix device has @kvm and sets a reference to
>   * it in @matrix_mdev->kvm.
> @@ -798,53 +797,39 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>   * Return 0 if no other mediated matrix device has a reference to @kvm;
>   * otherwise, returns an -EPERM.
>   */
> -static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
> -				struct kvm *kvm)
> +static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev)
>  {
> -	struct ap_matrix_mdev *m;
> -
>  	mutex_lock(&matrix_dev->lock);
> +	if (matrix_mdev->kvm->arch.crypto.pqap_hook)
> +		goto err_unlock;
>  
> -	list_for_each_entry(m, &matrix_dev->mdev_list, node) {
> -		if ((m != matrix_mdev) && (m->kvm == kvm)) {
> -			mutex_unlock(&matrix_dev->lock);
> -			return -EPERM;
> -		}
> -	}
> +	if (!matrix_mdev->kvm->arch.crypto.crycbd)
> +		goto err_unlock;
>  
> -	matrix_mdev->kvm = kvm;
> -	mutex_unlock(&matrix_dev->lock);
> +	matrix_mdev->kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
>  
> +	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
> +				  matrix_mdev->matrix.aqm,
> +				  matrix_mdev->matrix.adm);
> +	kvm_get_kvm(matrix_mdev->kvm);
> +	mutex_unlock(&matrix_dev->lock);
>  	return 0;
> +
> +err_unlock:
> +	mutex_unlock(&matrix_dev->lock);
> +	return -EPERM;
>  }
>  
>  static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>  				       unsigned long action, void *data)
>  {
> -	int ret;
>  	struct ap_matrix_mdev *matrix_mdev;
>  
>  	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
>  		return NOTIFY_OK;
>  
>  	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
> -
> -	if (!data) {
> -		matrix_mdev->kvm = NULL;
> -		return NOTIFY_OK;
> -	}
> -
> -	ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
> -	if (ret)
> -		return NOTIFY_DONE;
> -
> -	/* If there is no CRYCB pointer, then we can't copy the masks */
> -	if (!matrix_mdev->kvm->arch.crypto.crycbd)
> -		return NOTIFY_DONE;
> -
> -	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
> -				  matrix_mdev->matrix.aqm,
> -				  matrix_mdev->matrix.adm);
> +	matrix_mdev->kvm = data;
>  
>  	return NOTIFY_OK;
>  }
> @@ -888,6 +873,12 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
>  	if (ret)
>  		goto err_group;
>  
> +	/* We do not support opening the mediated device without KVM */
> +	if (!matrix_mdev->kvm) {
> +		ret = -ENODEV;
> +		goto err_group;
> +	}
> +
>  	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
>  	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
>  
> @@ -896,8 +887,15 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
>  	if (ret)
>  		goto err_iommu;
>  
> +	ret = vfio_ap_mdev_set_kvm(matrix_mdev);

At this point the matrix_mdev->kvm ain't guaranteed to be valid IMHO. Or
am I wrong? If I'm right kvm_get_kvm(matrix_mdev->kvm) could be too late.

> +	if (ret)
> +		goto err_kvm;
> +
>  	return 0;
>  
> +err_kvm:
> +	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> +				 &matrix_mdev->iommu_notifier);
>  err_iommu:
>  	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
>  				 &matrix_mdev->group_notifier);
> @@ -906,19 +904,33 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
>  	return ret;
>  }
>  
> -static void vfio_ap_mdev_release(struct mdev_device *mdev)
> +static int vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
>  {
> -	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> +	struct kvm *kvm = matrix_mdev->kvm;
>  
>  	if (matrix_mdev->kvm)
>  		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);

This still conditional?

> -
> +	vfio_ap_mdev_reset_queues(matrix_mdev->mdev);

I guess your intention was to move vfio_ap_mdev_reset_queues()
here from vfio_ap_mdev_release(), but you still have a
vfio_ap_mdev_reset_queues() call in vfio_ap_mdev_release().

> +	matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
>  	matrix_mdev->kvm = NULL;
> +
> +	kvm_put_kvm(kvm);
> +	return 0;
> +}
> +
> +static void vfio_ap_mdev_release(struct mdev_device *mdev)
> +{
> +	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> +
> +	mutex_lock(&matrix_dev->lock);
> +
>  	vfio_ap_mdev_reset_queues(mdev);

Here.

Regards,
Halil

> +	vfio_ap_mdev_unset_kvm(matrix_mdev);
>  	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
>  				 &matrix_mdev->iommu_notifier);
>  	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
>  				 &matrix_mdev->group_notifier);
> +	mutex_unlock(&matrix_dev->lock);
>  	module_put(THIS_MODULE);
>  }
>
Pierre Morel - March 19, 2019, 9:38 a.m.
On 15/03/2019 19:15, Halil Pasic wrote:
> On Wed, 13 Mar 2019 17:05:01 +0100
> Pierre Morel <pmorel@linux.ibm.com> wrote:
> 
>> When the mediated device is open we setup the relation with KVM unset it
>> when the mediated device is released.
>>
>> We ensure KVM is present on opening of the mediated device.
>>
>> We ensure that KVM survives the mediated device, and establish a direct
> 
> survives?

what alternative do you prefer?

> 
>> link from KVM to the mediated device to simplify the relationship.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---

...snip...

>>   static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>   				       unsigned long action, void *data)
>>   {
>> -	int ret;
>>   	struct ap_matrix_mdev *matrix_mdev;
>>   
>>   	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
>>   		return NOTIFY_OK;
>>   
>>   	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>> -
>> -	if (!data) {
>> -		matrix_mdev->kvm = NULL;
>> -		return NOTIFY_OK;
>> -	}
>> -
>> -	ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
>> -	if (ret)
>> -		return NOTIFY_DONE;
>> -
>> -	/* If there is no CRYCB pointer, then we can't copy the masks */
>> -	if (!matrix_mdev->kvm->arch.crypto.crycbd)
>> -		return NOTIFY_DONE;
>> -
>> -	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
>> -				  matrix_mdev->matrix.aqm,
>> -				  matrix_mdev->matrix.adm);
>> +	matrix_mdev->kvm = data;
>>   
>>   	return NOTIFY_OK;
>>   }
>> @@ -888,6 +873,12 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
>>   	if (ret)
>>   		goto err_group;
>>   
>> +	/* We do not support opening the mediated device without KVM */
>> +	if (!matrix_mdev->kvm) {
>> +		ret = -ENODEV;
>> +		goto err_group;
>> +	}
>> +
>>   	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
>>   	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
>>   
>> @@ -896,8 +887,15 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
>>   	if (ret)
>>   		goto err_iommu;
>>   
>> +	ret = vfio_ap_mdev_set_kvm(matrix_mdev);
> 
> At this point the matrix_mdev->kvm ain't guaranteed to be valid IMHO. Or
> am I wrong? If I'm right kvm_get_kvm(matrix_mdev->kvm) could be too late.

What about the if (!matrix_mdev->kvm) 10 lines above ?

> 
>> +	if (ret)
>> +		goto err_kvm;
>> +
>>   	return 0;
>>   
>> +err_kvm:
>> +	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
>> +				 &matrix_mdev->iommu_notifier);
>>   err_iommu:
>>   	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
>>   				 &matrix_mdev->group_notifier);
>> @@ -906,19 +904,33 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
>>   	return ret;
>>   }
>>   
>> -static void vfio_ap_mdev_release(struct mdev_device *mdev)
>> +static int vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
>>   {
>> -	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>> +	struct kvm *kvm = matrix_mdev->kvm;
>>   
>>   	if (matrix_mdev->kvm)
>>   		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> 
> This still conditional?

Yes, nothing to clear if there is no KVM.

> 
>> -
>> +	vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
> 
> I guess your intention was to move vfio_ap_mdev_reset_queues()
> here from vfio_ap_mdev_release(), but you still have a
> vfio_ap_mdev_reset_queues() call in vfio_ap_mdev_release().
> 
>> +	matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
>>   	matrix_mdev->kvm = NULL;
>> +
>> +	kvm_put_kvm(kvm);
>> +	return 0;
>> +}
>> +
>> +static void vfio_ap_mdev_release(struct mdev_device *mdev)
>> +{
>> +	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>> +
>> +	mutex_lock(&matrix_dev->lock);
>> +
>>   	vfio_ap_mdev_reset_queues(mdev);

right, this one will go away.
Thanks for reviewing.

Regards,
Pierre
Halil Pasic - March 19, 2019, 11:54 a.m.
On Tue, 19 Mar 2019 10:38:42 +0100
Pierre Morel <pmorel@linux.ibm.com> wrote:

> On 15/03/2019 19:15, Halil Pasic wrote:
> > On Wed, 13 Mar 2019 17:05:01 +0100
> > Pierre Morel <pmorel@linux.ibm.com> wrote:
> > 
> >> When the mediated device is open we setup the relation with KVM unset it
> >> when the mediated device is released.
> >>
> >> We ensure KVM is present on opening of the mediated device.
> >>
> >> We ensure that KVM survives the mediated device, and establish a direct
> > 
> > survives?
> 
> what alternative do you prefer?
> 

Increase kvm's refcount to ensure the guest is alive when the
ap_matrix_mdev is active. An ap mp_matrix becomes active with
a successful open() and ceases to be active with a release().

Your sentence was materially wrong as the mdev is allowed to outlive
the KVM. BTW survive tends to have an 'in spite of' note to it, which
outlive does not. vfio-ap is, I hope, not a calamity that threatens
the life of KVM ;). https://en.oxforddictionaries.com/definition/survive

> > 
> >> link from KVM to the mediated device to simplify the relationship.
> >>
> >> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> >> ---
> 
> ...snip...
> 
> >>   static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> >>   				       unsigned long action, void *data)
> >>   {
> >> -	int ret;
> >>   	struct ap_matrix_mdev *matrix_mdev;
> >>   
> >>   	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
> >>   		return NOTIFY_OK;
> >>   
> >>   	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
> >> -
> >> -	if (!data) {
> >> -		matrix_mdev->kvm = NULL;
> >> -		return NOTIFY_OK;
> >> -	}
> >> -
> >> -	ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
> >> -	if (ret)
> >> -		return NOTIFY_DONE;
> >> -
> >> -	/* If there is no CRYCB pointer, then we can't copy the masks */
> >> -	if (!matrix_mdev->kvm->arch.crypto.crycbd)
> >> -		return NOTIFY_DONE;
> >> -
> >> -	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
> >> -				  matrix_mdev->matrix.aqm,
> >> -				  matrix_mdev->matrix.adm);
> >> +	matrix_mdev->kvm = data;
> >>   
> >>   	return NOTIFY_OK;
> >>   }
> >> @@ -888,6 +873,12 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
> >>   	if (ret)
> >>   		goto err_group;
> >>   
> >> +	/* We do not support opening the mediated device without KVM */
> >> +	if (!matrix_mdev->kvm) {
> >> +		ret = -ENODEV;
> >> +		goto err_group;
> >> +	}
> >> +
> >>   	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
> >>   	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> >>   
> >> @@ -896,8 +887,15 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
> >>   	if (ret)
> >>   		goto err_iommu;
> >>   
> >> +	ret = vfio_ap_mdev_set_kvm(matrix_mdev);
> > 
> > At this point the matrix_mdev->kvm ain't guaranteed to be valid IMHO. Or
> > am I wrong? If I'm right kvm_get_kvm(matrix_mdev->kvm) could be too late.
> 
> What about the if (!matrix_mdev->kvm) 10 lines above ?
> 

That check is not sufficient.

You should do the kvm_get_kvm() in vfio_ap_mdev_group_notifier(). VFIO
must ensure that the kvm pointer you get is valid, in a sense that it
points to a valid struct kvm and the kvm object is alive, while you are
in the callback. But not beyond.

If another thread were to decrement the refcount of the kvm object you
would end up with matrix_mdev->kvm pointing to an object that has already
died.

Does my analysis make sense to you?

> > 
> >> +	if (ret)
> >> +		goto err_kvm;
> >> +
> >>   	return 0;
> >>   
> >> +err_kvm:
> >> +	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> >> +				 &matrix_mdev->iommu_notifier);
> >>   err_iommu:
> >>   	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
> >>   				 &matrix_mdev->group_notifier);
> >> @@ -906,19 +904,33 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
> >>   	return ret;
> >>   }
> >>   
> >> -static void vfio_ap_mdev_release(struct mdev_device *mdev)
> >> +static int vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
> >>   {
> >> -	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> >> +	struct kvm *kvm = matrix_mdev->kvm;
> >>   
> >>   	if (matrix_mdev->kvm)
> >>   		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> > 
> > This still conditional?
> 
> Yes, nothing to clear if there is no KVM.
> 

Since we have ensured the open only works if there is a KVM at that
point in time, and we have taken a reference to KVM, I would expect
KVM can not go away before we give up our reference.

> > 
> >> -
> >> +	vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
> > 
> > I guess your intention was to move vfio_ap_mdev_reset_queues()
> > here from vfio_ap_mdev_release(), but you still have a
> > vfio_ap_mdev_reset_queues() call in vfio_ap_mdev_release().
> > 
> >> +	matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> >>   	matrix_mdev->kvm = NULL;
> >> +
> >> +	kvm_put_kvm(kvm);
> >> +	return 0;
> >> +}
> >> +
> >> +static void vfio_ap_mdev_release(struct mdev_device *mdev)
> >> +{
> >> +	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> >> +
> >> +	mutex_lock(&matrix_dev->lock);
> >> +
> >>   	vfio_ap_mdev_reset_queues(mdev);
> 
> right, this one will go away.
> Thanks for reviewing.
> 

yw

Regards,
Halil
Pierre Morel - March 19, 2019, 2:23 p.m.
On 19/03/2019 12:54, Halil Pasic wrote:
> On Tue, 19 Mar 2019 10:38:42 +0100
> Pierre Morel <pmorel@linux.ibm.com> wrote:
> 
>> On 15/03/2019 19:15, Halil Pasic wrote:
>>> On Wed, 13 Mar 2019 17:05:01 +0100
>>> Pierre Morel <pmorel@linux.ibm.com> wrote:
>>>
>>>> When the mediated device is open we setup the relation with KVM unset it
>>>> when the mediated device is released.
>>>>
>>>> We ensure KVM is present on opening of the mediated device.
>>>>
>>>> We ensure that KVM survives the mediated device, and establish a direct
>>>
>>> survives?
>>
>> what alternative do you prefer?
>>
> 
> Increase kvm's refcount to ensure the guest is alive when the
> ap_matrix_mdev is active. An ap mp_matrix becomes active with
> a successful open() and ceases to be active with a release().

Right, it is mdev usage not mdev.

> 
> Your sentence was materially wrong as the mdev is allowed to outlive
> the KVM. BTW survive tends to have an 'in spite of' note to it, which
> outlive does not. vfio-ap is, I hope, not a calamity that threatens
> the life of KVM ;). https://en.oxforddictionaries.com/definition/survive

Thanks, your description is much better.

> 
>>>
>>>> link from KVM to the mediated device to simplify the relationship.
>>>>
>>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>>> ---
>>
>> ...snip...
>>
>>>>    static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>>>    				       unsigned long action, void *data)
>>>>    {
>>>> -	int ret;
>>>>    	struct ap_matrix_mdev *matrix_mdev;
>>>>    
>>>>    	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
>>>>    		return NOTIFY_OK;
>>>>    
>>>>    	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>>>> -
>>>> -	if (!data) {
>>>> -		matrix_mdev->kvm = NULL;
>>>> -		return NOTIFY_OK;
>>>> -	}
>>>> -
>>>> -	ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
>>>> -	if (ret)
>>>> -		return NOTIFY_DONE;
>>>> -
>>>> -	/* If there is no CRYCB pointer, then we can't copy the masks */
>>>> -	if (!matrix_mdev->kvm->arch.crypto.crycbd)
>>>> -		return NOTIFY_DONE;
>>>> -
>>>> -	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
>>>> -				  matrix_mdev->matrix.aqm,
>>>> -				  matrix_mdev->matrix.adm);
>>>> +	matrix_mdev->kvm = data;
>>>>    
>>>>    	return NOTIFY_OK;
>>>>    }
>>>> @@ -888,6 +873,12 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
>>>>    	if (ret)
>>>>    		goto err_group;
>>>>    
>>>> +	/* We do not support opening the mediated device without KVM */
>>>> +	if (!matrix_mdev->kvm) {
>>>> +		ret = -ENODEV;
>>>> +		goto err_group;
>>>> +	}
>>>> +
>>>>    	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
>>>>    	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
>>>>    
>>>> @@ -896,8 +887,15 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
>>>>    	if (ret)
>>>>    		goto err_iommu;
>>>>    
>>>> +	ret = vfio_ap_mdev_set_kvm(matrix_mdev);
>>>
>>> At this point the matrix_mdev->kvm ain't guaranteed to be valid IMHO. Or
>>> am I wrong? If I'm right kvm_get_kvm(matrix_mdev->kvm) could be too late.
>>
>> What about the if (!matrix_mdev->kvm) 10 lines above ?
>>
> 
> That check is not sufficient.
> 
> You should do the kvm_get_kvm() in vfio_ap_mdev_group_notifier(). VFIO
> must ensure that the kvm pointer you get is valid, in a sense that it
> points to a valid struct kvm and the kvm object is alive, while you are
> in the callback. But not beyond.
> 
> If another thread were to decrement the refcount of the kvm object you
> would end up with matrix_mdev->kvm pointing to an object that has already
> died.
> 
> Does my analysis make sense to you?

Yes thanks the explication is good, it would have been worth to get it 
the first time.

> 
>>>
>>>> +	if (ret)
>>>> +		goto err_kvm;
>>>> +
>>>>    	return 0;
>>>>    
>>>> +err_kvm:
>>>> +	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
>>>> +				 &matrix_mdev->iommu_notifier);
>>>>    err_iommu:
>>>>    	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
>>>>    				 &matrix_mdev->group_notifier);
>>>> @@ -906,19 +904,33 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
>>>>    	return ret;
>>>>    }
>>>>    
>>>> -static void vfio_ap_mdev_release(struct mdev_device *mdev)
>>>> +static int vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
>>>>    {
>>>> -	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>>>> +	struct kvm *kvm = matrix_mdev->kvm;
>>>>    
>>>>    	if (matrix_mdev->kvm)
>>>>    		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
>>>
>>> This still conditional?
>>
>> Yes, nothing to clear if there is no KVM.
>>
> 
> Since we have ensured the open only works if there is a KVM at that
> point in time, and we have taken a reference to KVM, I would expect
> KVM can not go away before we give up our reference.

Right.


Thanks,
Pierre
Pierre Morel - March 19, 2019, 2:47 p.m.
On 19/03/2019 15:23, Pierre Morel wrote:
> On 19/03/2019 12:54, Halil Pasic wrote:
>> On Tue, 19 Mar 2019 10:38:42 +0100
>> Pierre Morel <pmorel@linux.ibm.com> wrote:
>>
>>> On 15/03/2019 19:15, Halil Pasic wrote:
>>>> On Wed, 13 Mar 2019 17:05:01 +0100
>>>> Pierre Morel <pmorel@linux.ibm.com> wrote:
>>>>
>>>>> When the mediated device is open we setup the relation with KVM 
>>>>> unset it
>>>>> when the mediated device is released.
>>>>>
>>>>> We ensure KVM is present on opening of the mediated device.
>>>>>
>>>>> We ensure that KVM survives the mediated device, and establish a 
>>>>> direct
>>>>
>>>> survives?
>>>
>>> what alternative do you prefer?
>>>
>>
>> Increase kvm's refcount to ensure the guest is alive when the
>> ap_matrix_mdev is active. An ap mp_matrix becomes active with
>> a successful open() and ceases to be active with a release().
> 
> Right, it is mdev usage not mdev.
> 
>>
>> Your sentence was materially wrong as the mdev is allowed to outlive
>> the KVM. BTW survive tends to have an 'in spite of' note to it, which
>> outlive does not. vfio-ap is, I hope, not a calamity that threatens
>> the life of KVM ;). https://en.oxforddictionaries.com/definition/survive
> 
> Thanks, your description is much better.
> 
>>
>>>>
>>>>> link from KVM to the mediated device to simplify the relationship.
>>>>>
>>>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>>>> ---
>>>
>>> ...snip...
>>>
>>>>>    static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>>>>                           unsigned long action, void *data)
>>>>>    {
>>>>> -    int ret;
>>>>>        struct ap_matrix_mdev *matrix_mdev;
>>>>>        if (action != VFIO_GROUP_NOTIFY_SET_KVM)
>>>>>            return NOTIFY_OK;
>>>>>        matrix_mdev = container_of(nb, struct ap_matrix_mdev, 
>>>>> group_notifier);
>>>>> -
>>>>> -    if (!data) {
>>>>> -        matrix_mdev->kvm = NULL;
>>>>> -        return NOTIFY_OK;
>>>>> -    }
>>>>> -
>>>>> -    ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
>>>>> -    if (ret)
>>>>> -        return NOTIFY_DONE;
>>>>> -
>>>>> -    /* If there is no CRYCB pointer, then we can't copy the masks */
>>>>> -    if (!matrix_mdev->kvm->arch.crypto.crycbd)
>>>>> -        return NOTIFY_DONE;
>>>>> -
>>>>> -    kvm_arch_crypto_set_masks(matrix_mdev->kvm, 
>>>>> matrix_mdev->matrix.apm,
>>>>> -                  matrix_mdev->matrix.aqm,
>>>>> -                  matrix_mdev->matrix.adm);
>>>>> +    matrix_mdev->kvm = data;
>>>>>        return NOTIFY_OK;
>>>>>    }
>>>>> @@ -888,6 +873,12 @@ static int vfio_ap_mdev_open(struct 
>>>>> mdev_device *mdev)
>>>>>        if (ret)
>>>>>            goto err_group;
>>>>> +    /* We do not support opening the mediated device without KVM */
>>>>> +    if (!matrix_mdev->kvm) {
>>>>> +        ret = -ENODEV;
>>>>> +        goto err_group;
>>>>> +    }
>>>>> +
>>>>>        matrix_mdev->iommu_notifier.notifier_call = 
>>>>> vfio_ap_mdev_iommu_notifier;
>>>>>        events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
>>>>> @@ -896,8 +887,15 @@ static int vfio_ap_mdev_open(struct 
>>>>> mdev_device *mdev)
>>>>>        if (ret)
>>>>>            goto err_iommu;
>>>>> +    ret = vfio_ap_mdev_set_kvm(matrix_mdev);
>>>>
>>>> At this point the matrix_mdev->kvm ain't guaranteed to be valid 
>>>> IMHO. Or
>>>> am I wrong? If I'm right kvm_get_kvm(matrix_mdev->kvm) could be too 
>>>> late.
>>>
>>> What about the if (!matrix_mdev->kvm) 10 lines above ?
>>>
>>
>> That check is not sufficient.
>>
>> You should do the kvm_get_kvm() in vfio_ap_mdev_group_notifier(). VFIO
>> must ensure that the kvm pointer you get is valid, in a sense that it
>> points to a valid struct kvm and the kvm object is alive, while you are
>> in the callback. But not beyond.
>>
>> If another thread were to decrement the refcount of the kvm object you
>> would end up with matrix_mdev->kvm pointing to an object that has already
>> died.
>>
>> Does my analysis make sense to you?
> 
> Yes thanks the explication is good, it would have been worth to get it 
> the first time.
> 
>>
>>>>
>>>>> +    if (ret)
>>>>> +        goto err_kvm;
>>>>> +
>>>>>        return 0;
>>>>> +err_kvm:
>>>>> +    vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
>>>>> +                 &matrix_mdev->iommu_notifier);
>>>>>    err_iommu:
>>>>>        vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
>>>>>                     &matrix_mdev->group_notifier);
>>>>> @@ -906,19 +904,33 @@ static int vfio_ap_mdev_open(struct 
>>>>> mdev_device *mdev)
>>>>>        return ret;
>>>>>    }
>>>>> -static void vfio_ap_mdev_release(struct mdev_device *mdev)
>>>>> +static int vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
>>>>>    {
>>>>> -    struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>>>>> +    struct kvm *kvm = matrix_mdev->kvm;
>>>>>        if (matrix_mdev->kvm)
>>>>>            kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
>>>>
>>>> This still conditional?
>>>
>>> Yes, nothing to clear if there is no KVM.
>>>
>>
>> Since we have ensured the open only works if there is a KVM at that
>> point in time, and we have taken a reference to KVM, I would expect
>> KVM can not go away before we give up our reference.
> 
> Right.

Right but based on the assumption we do a kvm_get_kvm() during open.

But now we will do it inside the notifier, so the logic is to do a 
kvm_put_kvm in the notifier too.
This is important because userland will ask us to release the KVM/VFIO 
link through this notifier.
So I will have to rework this part where KVM==NULL in the notifier too.

Regards,
Pierre
Halil Pasic - March 19, 2019, 3:27 p.m.
On Tue, 19 Mar 2019 15:47:05 +0100
Pierre Morel <pmorel@linux.ibm.com> wrote:

> >>>>>        if (matrix_mdev->kvm)
> >>>>>            kvm_arch_crypto_clear_masks(matrix_mdev->kvm);  
> >>>>
> >>>> This still conditional?  
> >>>
> >>> Yes, nothing to clear if there is no KVM.
> >>>  
> >>
> >> Since we have ensured the open only works if there is a KVM at that
> >> point in time, and we have taken a reference to KVM, I would expect
> >> KVM can not go away before we give up our reference.  
> > 
> > Right.  
> 
> Right but based on the assumption we do a kvm_get_kvm() during open.
> 
> But now we will do it inside the notifier, so the logic is to do a 
> kvm_put_kvm in the notifier too.
> This is important because userland will ask us to release the KVM/VFIO 
> link through this notifier.
> So I will have to rework this part where KVM==NULL in the notifier too.
> 
> Regards,
> Pierre

I think it can be done both ways. If you ensure KVM != NULL if the open
succeeds and take the reference in the notifier. I suppose if open()
fails release() won't be called. But the logic/code in open() would get
quite ugly because the callback could be called assync so that it
overlaps with the rest of open().

Not failing open() in case of no KVM is there yet is in my opinion
cleaner anyway.

Regards,
Halil
Pierre Morel - March 19, 2019, 4:48 p.m.
On 19/03/2019 16:27, Halil Pasic wrote:
> On Tue, 19 Mar 2019 15:47:05 +0100
> Pierre Morel <pmorel@linux.ibm.com> wrote:
> 
>>>>>>>         if (matrix_mdev->kvm)
>>>>>>>             kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
>>>>>>
>>>>>> This still conditional?
>>>>>
>>>>> Yes, nothing to clear if there is no KVM.
>>>>>   
>>>>
>>>> Since we have ensured the open only works if there is a KVM at that
>>>> point in time, and we have taken a reference to KVM, I would expect
>>>> KVM can not go away before we give up our reference.
>>>
>>> Right.
>>
>> Right but based on the assumption we do a kvm_get_kvm() during open.
>>
>> But now we will do it inside the notifier, so the logic is to do a
>> kvm_put_kvm in the notifier too.
>> This is important because userland will ask us to release the KVM/VFIO
>> link through this notifier.
>> So I will have to rework this part where KVM==NULL in the notifier too.
>>
>> Regards,
>> Pierre
> 
> I think it can be done both ways. If you ensure KVM != NULL if the open
> succeeds and take the reference in the notifier. I suppose if open()
> fails release() won't be called. But the logic/code in open() would get
> quite ugly because the callback could be called assync so that it
> overlaps with the rest of open().

Not necessary, but there is more than just the kvm_get_kvm().

When the user calls KVM_DEV_VFIO_GROUP_DEL he asks to break the link 
between VFIO and KVM.

Currently we just ignore this instead of stopping all activity 
associated with KVM.

But we have more bugs there:
We should not support multiple open of the mdev which will overwrite 
matrix->kvm for the same mdev with a different KVM.
I send a bugfix for this.


> 
> Not failing open() in case of no KVM is there yet is in my opinion
> cleaner anyway.

If we handle correctly the notifiers and the exclusivity, we can do this.
I will make this correctly for the next iteration.

Regards,
Pierre

> 
> Regards,
> Halil
>

Patch

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 0f8952c23..6b559ca 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -790,7 +790,6 @@  static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
  * vfio_ap_mdev_set_kvm
  *
  * @matrix_mdev: a mediated matrix device
- * @kvm: reference to KVM instance
  *
  * Verifies no other mediated matrix device has @kvm and sets a reference to
  * it in @matrix_mdev->kvm.
@@ -798,53 +797,39 @@  static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
  * Return 0 if no other mediated matrix device has a reference to @kvm;
  * otherwise, returns an -EPERM.
  */
-static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
-				struct kvm *kvm)
+static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev)
 {
-	struct ap_matrix_mdev *m;
-
 	mutex_lock(&matrix_dev->lock);
+	if (matrix_mdev->kvm->arch.crypto.pqap_hook)
+		goto err_unlock;
 
-	list_for_each_entry(m, &matrix_dev->mdev_list, node) {
-		if ((m != matrix_mdev) && (m->kvm == kvm)) {
-			mutex_unlock(&matrix_dev->lock);
-			return -EPERM;
-		}
-	}
+	if (!matrix_mdev->kvm->arch.crypto.crycbd)
+		goto err_unlock;
 
-	matrix_mdev->kvm = kvm;
-	mutex_unlock(&matrix_dev->lock);
+	matrix_mdev->kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
 
+	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
+				  matrix_mdev->matrix.aqm,
+				  matrix_mdev->matrix.adm);
+	kvm_get_kvm(matrix_mdev->kvm);
+	mutex_unlock(&matrix_dev->lock);
 	return 0;
+
+err_unlock:
+	mutex_unlock(&matrix_dev->lock);
+	return -EPERM;
 }
 
 static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
 				       unsigned long action, void *data)
 {
-	int ret;
 	struct ap_matrix_mdev *matrix_mdev;
 
 	if (action != VFIO_GROUP_NOTIFY_SET_KVM)
 		return NOTIFY_OK;
 
 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
-
-	if (!data) {
-		matrix_mdev->kvm = NULL;
-		return NOTIFY_OK;
-	}
-
-	ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
-	if (ret)
-		return NOTIFY_DONE;
-
-	/* If there is no CRYCB pointer, then we can't copy the masks */
-	if (!matrix_mdev->kvm->arch.crypto.crycbd)
-		return NOTIFY_DONE;
-
-	kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
-				  matrix_mdev->matrix.aqm,
-				  matrix_mdev->matrix.adm);
+	matrix_mdev->kvm = data;
 
 	return NOTIFY_OK;
 }
@@ -888,6 +873,12 @@  static int vfio_ap_mdev_open(struct mdev_device *mdev)
 	if (ret)
 		goto err_group;
 
+	/* We do not support opening the mediated device without KVM */
+	if (!matrix_mdev->kvm) {
+		ret = -ENODEV;
+		goto err_group;
+	}
+
 	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
 	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
 
@@ -896,8 +887,15 @@  static int vfio_ap_mdev_open(struct mdev_device *mdev)
 	if (ret)
 		goto err_iommu;
 
+	ret = vfio_ap_mdev_set_kvm(matrix_mdev);
+	if (ret)
+		goto err_kvm;
+
 	return 0;
 
+err_kvm:
+	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+				 &matrix_mdev->iommu_notifier);
 err_iommu:
 	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
 				 &matrix_mdev->group_notifier);
@@ -906,19 +904,33 @@  static int vfio_ap_mdev_open(struct mdev_device *mdev)
 	return ret;
 }
 
-static void vfio_ap_mdev_release(struct mdev_device *mdev)
+static int vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
 {
-	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+	struct kvm *kvm = matrix_mdev->kvm;
 
 	if (matrix_mdev->kvm)
 		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
-
+	vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
+	matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
 	matrix_mdev->kvm = NULL;
+
+	kvm_put_kvm(kvm);
+	return 0;
+}
+
+static void vfio_ap_mdev_release(struct mdev_device *mdev)
+{
+	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
+
+	mutex_lock(&matrix_dev->lock);
+
 	vfio_ap_mdev_reset_queues(mdev);
+	vfio_ap_mdev_unset_kvm(matrix_mdev);
 	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
 				 &matrix_mdev->iommu_notifier);
 	vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
 				 &matrix_mdev->group_notifier);
+	mutex_unlock(&matrix_dev->lock);
 	module_put(THIS_MODULE);
 }