diff mbox series

[06/10] devlink: convert snapshot id getter to return an error

Message ID 20200324223445.2077900-7-jacob.e.keller@intel.com
State Superseded
Delegated to: David Miller
Headers show
Series implement DEVLINK_CMD_REGION_NEW | expand

Commit Message

Jacob Keller March 24, 2020, 10:34 p.m. UTC
Modify the devlink_snapshot_id_get function to return a signed value,
enabling reporting an error on failure.

This enables easily refactoring how IDs are generated and kept track of
in the future. For now, just report ENOSPC once INT_MAX snapshot ids
have been returned.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/crdump.c | 10 +++++++---
 drivers/net/netdevsim/dev.c                 |  7 +++++--
 include/net/devlink.h                       |  2 +-
 net/core/devlink.c                          | 16 +++++++++++-----
 4 files changed, 24 insertions(+), 11 deletions(-)

Comments

Jakub Kicinski March 25, 2020, 6:04 p.m. UTC | #1
On Tue, 24 Mar 2020 15:34:41 -0700 Jacob Keller wrote:
> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
> index f7621ccb7b88..f9420b77e5fd 100644
> --- a/drivers/net/netdevsim/dev.c
> +++ b/drivers/net/netdevsim/dev.c
> @@ -45,8 +45,7 @@ static ssize_t nsim_dev_take_snapshot_write(struct file *file,
>  {
>  	struct nsim_dev *nsim_dev = file->private_data;
>  	void *dummy_data;
> -	int err;
> -	u32 id;
> +	int err, id;
>  
>  	dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
>  	if (!dummy_data)
> @@ -55,6 +54,10 @@ static ssize_t nsim_dev_take_snapshot_write(struct file *file,
>  	get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
>  
>  	id = devlink_region_snapshot_id_get(priv_to_devlink(nsim_dev));
> +	if (id < 0) {
> +		pr_err("Failed to get snapshot id\n");
> +		return id;
> +	}
>  	err = devlink_region_snapshot_create(nsim_dev->dummy_region,
>  					     dummy_data, id);
>  	if (err) {

Hmm... next patch introduces some ref counting on the ID AFAICT,
should there be some form of snapshot_id_put(), once the driver is 
done creating the regions it wants?

First what if driver wants to create two snapshots with the same ID but
user space manages to delete the first one before second one is created.

Second what if create fails, won't the snapshot ID just stay in XA with
count of 0 forever?
Jiri Pirko March 25, 2020, 7:13 p.m. UTC | #2
Wed, Mar 25, 2020 at 07:04:25PM CET, kuba@kernel.org wrote:
>On Tue, 24 Mar 2020 15:34:41 -0700 Jacob Keller wrote:
>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>> index f7621ccb7b88..f9420b77e5fd 100644
>> --- a/drivers/net/netdevsim/dev.c
>> +++ b/drivers/net/netdevsim/dev.c
>> @@ -45,8 +45,7 @@ static ssize_t nsim_dev_take_snapshot_write(struct file *file,
>>  {
>>  	struct nsim_dev *nsim_dev = file->private_data;
>>  	void *dummy_data;
>> -	int err;
>> -	u32 id;
>> +	int err, id;
>>  
>>  	dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
>>  	if (!dummy_data)
>> @@ -55,6 +54,10 @@ static ssize_t nsim_dev_take_snapshot_write(struct file *file,
>>  	get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
>>  
>>  	id = devlink_region_snapshot_id_get(priv_to_devlink(nsim_dev));
>> +	if (id < 0) {
>> +		pr_err("Failed to get snapshot id\n");
>> +		return id;
>> +	}
>>  	err = devlink_region_snapshot_create(nsim_dev->dummy_region,
>>  					     dummy_data, id);
>>  	if (err) {
>
>Hmm... next patch introduces some ref counting on the ID AFAICT,
>should there be some form of snapshot_id_put(), once the driver is 
>done creating the regions it wants?
>
>First what if driver wants to create two snapshots with the same ID but

>user space manages to delete the first one before second one is created.
>
>Second what if create fails, won't the snapshot ID just stay in XA with
>count of 0 forever?

Yeah, that seems like a race condition this is introducing.
Jacob Keller March 26, 2020, 1:33 a.m. UTC | #3
On 3/25/2020 11:04 AM, Jakub Kicinski wrote:
> On Tue, 24 Mar 2020 15:34:41 -0700 Jacob Keller wrote:
>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>> index f7621ccb7b88..f9420b77e5fd 100644
>> --- a/drivers/net/netdevsim/dev.c
>> +++ b/drivers/net/netdevsim/dev.c
>> @@ -45,8 +45,7 @@ static ssize_t nsim_dev_take_snapshot_write(struct file *file,
>>  {
>>  	struct nsim_dev *nsim_dev = file->private_data;
>>  	void *dummy_data;
>> -	int err;
>> -	u32 id;
>> +	int err, id;
>>  
>>  	dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
>>  	if (!dummy_data)
>> @@ -55,6 +54,10 @@ static ssize_t nsim_dev_take_snapshot_write(struct file *file,
>>  	get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
>>  
>>  	id = devlink_region_snapshot_id_get(priv_to_devlink(nsim_dev));
>> +	if (id < 0) {
>> +		pr_err("Failed to get snapshot id\n");
>> +		return id;
>> +	}
>>  	err = devlink_region_snapshot_create(nsim_dev->dummy_region,
>>  					     dummy_data, id);
>>  	if (err) {
> 
> Hmm... next patch introduces some ref counting on the ID AFAICT,
> should there be some form of snapshot_id_put(), once the driver is 
> done creating the regions it wants?
> 
> First what if driver wants to create two snapshots with the same ID but
> user space manages to delete the first one before second one is created.
> 
> Second what if create fails, won't the snapshot ID just stay in XA with
> count of 0 forever?
> 

Ah, yep good catch. I'll add a _put* function and make drivers call this.

Thanks,
Jake
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
index c3f90c0f9554..723a66efdf32 100644
--- a/drivers/net/ethernet/mellanox/mlx4/crdump.c
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c
@@ -169,7 +169,7 @@  int mlx4_crdump_collect(struct mlx4_dev *dev)
 	struct pci_dev *pdev = dev->persist->pdev;
 	unsigned long cr_res_size;
 	u8 __iomem *cr_space;
-	u32 id;
+	int id;
 
 	if (!dev->caps.health_buffer_addrs) {
 		mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n");
@@ -189,10 +189,14 @@  int mlx4_crdump_collect(struct mlx4_dev *dev)
 		return -ENODEV;
 	}
 
-	crdump_enable_crspace_access(dev, cr_space);
-
 	/* Get the available snapshot ID for the dumps */
 	id = devlink_region_snapshot_id_get(devlink);
+	if (id < 0) {
+		mlx4_err(dev, "crdump: devlink get snapshot id err %d\n", id);
+		return id;
+	}
+
+	crdump_enable_crspace_access(dev, cr_space);
 
 	/* Try to capture dumps */
 	mlx4_crdump_collect_crspace(dev, cr_space, id);
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index f7621ccb7b88..f9420b77e5fd 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -45,8 +45,7 @@  static ssize_t nsim_dev_take_snapshot_write(struct file *file,
 {
 	struct nsim_dev *nsim_dev = file->private_data;
 	void *dummy_data;
-	int err;
-	u32 id;
+	int err, id;
 
 	dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
 	if (!dummy_data)
@@ -55,6 +54,10 @@  static ssize_t nsim_dev_take_snapshot_write(struct file *file,
 	get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
 
 	id = devlink_region_snapshot_id_get(priv_to_devlink(nsim_dev));
+	if (id < 0) {
+		pr_err("Failed to get snapshot id\n");
+		return id;
+	}
 	err = devlink_region_snapshot_create(nsim_dev->dummy_region,
 					     dummy_data, id);
 	if (err) {
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 8869ad75b965..df9f6ddf6c66 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -976,7 +976,7 @@  devlink_region_create(struct devlink *devlink,
 		      const struct devlink_region_ops *ops,
 		      u32 region_max_snapshots, u64 region_size);
 void devlink_region_destroy(struct devlink_region *region);
-u32 devlink_region_snapshot_id_get(struct devlink *devlink);
+int devlink_region_snapshot_id_get(struct devlink *devlink);
 int devlink_region_snapshot_create(struct devlink_region *region,
 				   u8 *data, u32 snapshot_id);
 int devlink_info_serial_number_put(struct devlink_info_req *req,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6dc14eb2a5f7..62a8566e9851 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3772,12 +3772,16 @@  static void devlink_nl_region_notify(struct devlink_region *region,
  *	__devlink_region_snapshot_id_get - get snapshot ID
  *	@devlink: devlink instance
  *
- *	Returns a new snapshot id. Must be called while holding the
- *	devlink instance lock.
+ *	Returns a new snapshot id or a negative error code on failure. Must be
+ *	called while holding the devlink instance lock.
  */
-static u32 __devlink_region_snapshot_id_get(struct devlink *devlink)
+static int __devlink_region_snapshot_id_get(struct devlink *devlink)
 {
 	lockdep_assert_held(&devlink->lock);
+
+	if (devlink->snapshot_id >= INT_MAX)
+		return -ENOSPC;
+
 	return ++devlink->snapshot_id;
 }
 
@@ -7781,11 +7785,13 @@  EXPORT_SYMBOL_GPL(devlink_region_destroy);
  *	Driver should use the same id for multiple snapshots taken
  *	on multiple regions at the same time/by the same trigger.
  *
+ *	Returns a positive id or a negative error code on failure.
+ *
  *	@devlink: devlink
  */
-u32 devlink_region_snapshot_id_get(struct devlink *devlink)
+int devlink_region_snapshot_id_get(struct devlink *devlink)
 {
-	u32 id;
+	int id;
 
 	mutex_lock(&devlink->lock);
 	id = __devlink_region_snapshot_id_get(devlink);