[RFC,1/3] devlink: add flash update command

Message ID 20190211065923.22670-2-jakub.kicinski@netronome.com
State RFC
Delegated to: David Miller
Headers show
Series
  • devlink: add the ability to update device flash
Related show

Commit Message

Jakub Kicinski Feb. 11, 2019, 6:59 a.m.
Add devlink flash update command. Advanced NICs have firmware
stored in flash and often cryptographically secured. Updating
that flash is handled by management firmware. Ethtool has a
flash update command which served us well, however, it has two
shortcomings:
 - it takes rtnl_lock unnecessarily - really flash update has
   nothing to do with networking, so using a networking device
   as a handle is suboptimal, which leads us to the second one:
 - it requires a functioning netdev - in case device enters an
   error state and can't spawn a netdev (e.g. communication
   with the device fails) there is no netdev to use as a handle
   for flashing.

Devlink already has the ability to report the firmware versions,
now with the ability to update the firmware/flash we will be
able to recover devices in bad state.

To enable easy interoperability with ethtool add the target
partition ID. We may or may not add a different method of
identification, but there is no such immediate need.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/net/devlink.h        |  2 ++
 include/uapi/linux/devlink.h |  6 ++++++
 net/core/devlink.c           | 30 ++++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+)

Comments

Jiri Pirko Feb. 11, 2019, 4:45 p.m. | #1
Mon, Feb 11, 2019 at 07:59:20AM CET, jakub.kicinski@netronome.com wrote:
>Add devlink flash update command. Advanced NICs have firmware
>stored in flash and often cryptographically secured. Updating
>that flash is handled by management firmware. Ethtool has a
>flash update command which served us well, however, it has two
>shortcomings:
> - it takes rtnl_lock unnecessarily - really flash update has
>   nothing to do with networking, so using a networking device
>   as a handle is suboptimal, which leads us to the second one:
> - it requires a functioning netdev - in case device enters an
>   error state and can't spawn a netdev (e.g. communication
>   with the device fails) there is no netdev to use as a handle
>   for flashing.
>
>Devlink already has the ability to report the firmware versions,
>now with the ability to update the firmware/flash we will be
>able to recover devices in bad state.
>
>To enable easy interoperability with ethtool add the target
>partition ID. We may or may not add a different method of
>identification, but there is no such immediate need.
>
>Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
>---
> include/net/devlink.h        |  2 ++
> include/uapi/linux/devlink.h |  6 ++++++
> net/core/devlink.c           | 30 ++++++++++++++++++++++++++++++
> 3 files changed, 38 insertions(+)
>
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 07660fe4c0e3..55b3478b1291 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -529,6 +529,8 @@ struct devlink_ops {
> 				      struct netlink_ext_ack *extack);
> 	int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
> 			struct netlink_ext_ack *extack);
>+	int (*flash_update)(struct devlink *devlink, const char *path,
>+			    u32 target, struct netlink_ext_ack *extack);
> };
> 
> static inline void *devlink_priv(struct devlink *devlink)
>diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>index 72d9f7c89190..f4417283fd1b 100644
>--- a/include/uapi/linux/devlink.h
>+++ b/include/uapi/linux/devlink.h
>@@ -103,6 +103,8 @@ enum devlink_command {
> 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
> 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
> 
>+	DEVLINK_CMD_FLASH_UPDATE,
>+
> 	/* add new commands above here */
> 	__DEVLINK_CMD_MAX,
> 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
>@@ -326,6 +328,10 @@ enum devlink_attr {
> 	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
> 	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
> 	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
>+
>+	DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME,	/* string */
>+	DEVLINK_ATTR_FLASH_UPDATE_TARGET_ID,	/* u32 */

Do we need to carry this on? I mean, the ef->region is only checked in 4
drivers against ETHTOOL_FLASH_ALL_REGIONS, which is default.
There is this bnxt driver which is the only one working with this value.
I think that for compat, there should be some id-region mapping
provided by driver which the compat layer would use to translate.

I also think that this should be in sync with what is returned in
DEVLINK_ATTR_INFO_VERSION_NAME.

For example:
$ devlink dev info pci/0000:82:00.0
pci/0000:82:00.0:
  driver nfp
  serial_number 16240145
  versions:
      fixed:
        board.id AMDA0081-0001
        board.rev 15
        board.vendor SMA
        board.model hydrogen
      running:
        fw.mgmt 010181.010181.0101d4
        fw.cpld 0x1030000
        fw.app abm-d372b6
        fw.undi 0.0.2
        chip.init AMDA-0081-0001  20160318164536
      stored:
        fw.mgmt 010181.010181.0101d4
        fw.app abm-d372b6
        fw.undi 0.0.2
        chip.init AMDA-0081-0001  20160318164536

Now user should be able to use one of the identifiers to flash relevant
fw, like:

devlink dev flash pci/0000:82:00.0 XXX fw.mgmt file flash-boot.bin

I'm not sure about the name of "xxx" attribute. Maybe "id":

devlink dev flash pci/0000:82:00.0 id fw.mgmt file flash-boot.bin
devlink dev flash pci/0000:82:00.0 id fw.cpld file some-other.bin



[...]
Jakub Kicinski Feb. 11, 2019, 7:25 p.m. | #2
On Mon, 11 Feb 2019 17:45:13 +0100, Jiri Pirko wrote:
> Mon, Feb 11, 2019 at 07:59:20AM CET, jakub.kicinski@netronome.com wrote:
> >Add devlink flash update command. Advanced NICs have firmware
> >stored in flash and often cryptographically secured. Updating
> >that flash is handled by management firmware. Ethtool has a
> >flash update command which served us well, however, it has two
> >shortcomings:
> > - it takes rtnl_lock unnecessarily - really flash update has
> >   nothing to do with networking, so using a networking device
> >   as a handle is suboptimal, which leads us to the second one:
> > - it requires a functioning netdev - in case device enters an
> >   error state and can't spawn a netdev (e.g. communication
> >   with the device fails) there is no netdev to use as a handle
> >   for flashing.
> >
> >Devlink already has the ability to report the firmware versions,
> >now with the ability to update the firmware/flash we will be
> >able to recover devices in bad state.
> >
> >To enable easy interoperability with ethtool add the target
> >partition ID. We may or may not add a different method of
> >identification, but there is no such immediate need.
> >
> >Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> >---
> > include/net/devlink.h        |  2 ++
> > include/uapi/linux/devlink.h |  6 ++++++
> > net/core/devlink.c           | 30 ++++++++++++++++++++++++++++++
> > 3 files changed, 38 insertions(+)
> >
> >diff --git a/include/net/devlink.h b/include/net/devlink.h
> >index 07660fe4c0e3..55b3478b1291 100644
> >--- a/include/net/devlink.h
> >+++ b/include/net/devlink.h
> >@@ -529,6 +529,8 @@ struct devlink_ops {
> > 				      struct netlink_ext_ack *extack);
> > 	int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
> > 			struct netlink_ext_ack *extack);
> >+	int (*flash_update)(struct devlink *devlink, const char *path,
> >+			    u32 target, struct netlink_ext_ack *extack);
> > };
> > 
> > static inline void *devlink_priv(struct devlink *devlink)
> >diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
> >index 72d9f7c89190..f4417283fd1b 100644
> >--- a/include/uapi/linux/devlink.h
> >+++ b/include/uapi/linux/devlink.h
> >@@ -103,6 +103,8 @@ enum devlink_command {
> > 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
> > 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
> > 
> >+	DEVLINK_CMD_FLASH_UPDATE,
> >+
> > 	/* add new commands above here */
> > 	__DEVLINK_CMD_MAX,
> > 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
> >@@ -326,6 +328,10 @@ enum devlink_attr {
> > 	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
> > 	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
> > 	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
> >+
> >+	DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME,	/* string */
> >+	DEVLINK_ATTR_FLASH_UPDATE_TARGET_ID,	/* u32 */  
> 
> Do we need to carry this on? I mean, the ef->region is only checked in 4
> drivers against ETHTOOL_FLASH_ALL_REGIONS, which is default.
> There is this bnxt driver which is the only one working with this value.
> I think that for compat, there should be some id-region mapping
> provided by driver which the compat layer would use to translate.
> 
> I also think that this should be in sync with what is returned in
> DEVLINK_ATTR_INFO_VERSION_NAME.
> 
> For example:
> $ devlink dev info pci/0000:82:00.0
> pci/0000:82:00.0:
>   driver nfp
>   serial_number 16240145
>   versions:
>       fixed:
>         board.id AMDA0081-0001
>         board.rev 15
>         board.vendor SMA
>         board.model hydrogen
>       running:
>         fw.mgmt 010181.010181.0101d4
>         fw.cpld 0x1030000
>         fw.app abm-d372b6
>         fw.undi 0.0.2
>         chip.init AMDA-0081-0001  20160318164536
>       stored:
>         fw.mgmt 010181.010181.0101d4
>         fw.app abm-d372b6
>         fw.undi 0.0.2
>         chip.init AMDA-0081-0001  20160318164536
> 
> Now user should be able to use one of the identifiers to flash relevant
> fw, like:
> 
> devlink dev flash pci/0000:82:00.0 XXX fw.mgmt file flash-boot.bin
> 
> I'm not sure about the name of "xxx" attribute. Maybe "id":
> 
> devlink dev flash pci/0000:82:00.0 id fw.mgmt file flash-boot.bin
> devlink dev flash pci/0000:82:00.0 id fw.cpld file some-other.bin

Agreed, that looks good!  TBH in case of Netronome the binary
image contains an identifier so it will update the correct component
automatically.  That's why I say "no immediate need" :)  (How about
"component" instead of "id", BTW?)

I will drop the target ID, I just added it for full backward compat
with ethtool, but it may be confusing, given it would be mostly unused.
I'll drop it in non-RFC, do you want me to add the id/component or leave
it out for now?
Jiri Pirko Feb. 12, 2019, 7:26 a.m. | #3
Mon, Feb 11, 2019 at 08:25:30PM CET, jakub.kicinski@netronome.com wrote:
>On Mon, 11 Feb 2019 17:45:13 +0100, Jiri Pirko wrote:
>> Mon, Feb 11, 2019 at 07:59:20AM CET, jakub.kicinski@netronome.com wrote:
>> >Add devlink flash update command. Advanced NICs have firmware
>> >stored in flash and often cryptographically secured. Updating
>> >that flash is handled by management firmware. Ethtool has a
>> >flash update command which served us well, however, it has two
>> >shortcomings:
>> > - it takes rtnl_lock unnecessarily - really flash update has
>> >   nothing to do with networking, so using a networking device
>> >   as a handle is suboptimal, which leads us to the second one:
>> > - it requires a functioning netdev - in case device enters an
>> >   error state and can't spawn a netdev (e.g. communication
>> >   with the device fails) there is no netdev to use as a handle
>> >   for flashing.
>> >
>> >Devlink already has the ability to report the firmware versions,
>> >now with the ability to update the firmware/flash we will be
>> >able to recover devices in bad state.
>> >
>> >To enable easy interoperability with ethtool add the target
>> >partition ID. We may or may not add a different method of
>> >identification, but there is no such immediate need.
>> >
>> >Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
>> >---
>> > include/net/devlink.h        |  2 ++
>> > include/uapi/linux/devlink.h |  6 ++++++
>> > net/core/devlink.c           | 30 ++++++++++++++++++++++++++++++
>> > 3 files changed, 38 insertions(+)
>> >
>> >diff --git a/include/net/devlink.h b/include/net/devlink.h
>> >index 07660fe4c0e3..55b3478b1291 100644
>> >--- a/include/net/devlink.h
>> >+++ b/include/net/devlink.h
>> >@@ -529,6 +529,8 @@ struct devlink_ops {
>> > 				      struct netlink_ext_ack *extack);
>> > 	int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
>> > 			struct netlink_ext_ack *extack);
>> >+	int (*flash_update)(struct devlink *devlink, const char *path,
>> >+			    u32 target, struct netlink_ext_ack *extack);
>> > };
>> > 
>> > static inline void *devlink_priv(struct devlink *devlink)
>> >diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>> >index 72d9f7c89190..f4417283fd1b 100644
>> >--- a/include/uapi/linux/devlink.h
>> >+++ b/include/uapi/linux/devlink.h
>> >@@ -103,6 +103,8 @@ enum devlink_command {
>> > 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
>> > 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
>> > 
>> >+	DEVLINK_CMD_FLASH_UPDATE,
>> >+
>> > 	/* add new commands above here */
>> > 	__DEVLINK_CMD_MAX,
>> > 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
>> >@@ -326,6 +328,10 @@ enum devlink_attr {
>> > 	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
>> > 	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
>> > 	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
>> >+
>> >+	DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME,	/* string */
>> >+	DEVLINK_ATTR_FLASH_UPDATE_TARGET_ID,	/* u32 */  
>> 
>> Do we need to carry this on? I mean, the ef->region is only checked in 4
>> drivers against ETHTOOL_FLASH_ALL_REGIONS, which is default.
>> There is this bnxt driver which is the only one working with this value.
>> I think that for compat, there should be some id-region mapping
>> provided by driver which the compat layer would use to translate.
>> 
>> I also think that this should be in sync with what is returned in
>> DEVLINK_ATTR_INFO_VERSION_NAME.
>> 
>> For example:
>> $ devlink dev info pci/0000:82:00.0
>> pci/0000:82:00.0:
>>   driver nfp
>>   serial_number 16240145
>>   versions:
>>       fixed:
>>         board.id AMDA0081-0001
>>         board.rev 15
>>         board.vendor SMA
>>         board.model hydrogen
>>       running:
>>         fw.mgmt 010181.010181.0101d4
>>         fw.cpld 0x1030000
>>         fw.app abm-d372b6
>>         fw.undi 0.0.2
>>         chip.init AMDA-0081-0001  20160318164536
>>       stored:
>>         fw.mgmt 010181.010181.0101d4
>>         fw.app abm-d372b6
>>         fw.undi 0.0.2
>>         chip.init AMDA-0081-0001  20160318164536
>> 
>> Now user should be able to use one of the identifiers to flash relevant
>> fw, like:
>> 
>> devlink dev flash pci/0000:82:00.0 XXX fw.mgmt file flash-boot.bin
>> 
>> I'm not sure about the name of "xxx" attribute. Maybe "id":
>> 
>> devlink dev flash pci/0000:82:00.0 id fw.mgmt file flash-boot.bin
>> devlink dev flash pci/0000:82:00.0 id fw.cpld file some-other.bin
>
>Agreed, that looks good!  TBH in case of Netronome the binary
>image contains an identifier so it will update the correct component

Okay. So in case the "component" attr is omitted, there would be some
flag passed to the driver so it would know that the file contains more
component binaries and has to do parsing itself/in-fw.


>automatically.  That's why I say "no immediate need" :)  (How about
>"component" instead of "id", BTW?)

Component is fine by me.


>
>I will drop the target ID, I just added it for full backward compat
>with ethtool, but it may be confusing, given it would be mostly unused.

Ok.


>I'll drop it in non-RFC, do you want me to add the id/component or leave
>it out for now?

I think it would be good to add it and have the api complete.

Patch

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 07660fe4c0e3..55b3478b1291 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -529,6 +529,8 @@  struct devlink_ops {
 				      struct netlink_ext_ack *extack);
 	int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
 			struct netlink_ext_ack *extack);
+	int (*flash_update)(struct devlink *devlink, const char *path,
+			    u32 target, struct netlink_ext_ack *extack);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 72d9f7c89190..f4417283fd1b 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -103,6 +103,8 @@  enum devlink_command {
 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
 
+	DEVLINK_CMD_FLASH_UPDATE,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -326,6 +328,10 @@  enum devlink_attr {
 	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
 	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
 	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
+
+	DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME,	/* string */
+	DEVLINK_ATTR_FLASH_UPDATE_TARGET_ID,	/* u32 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 46c468a1f3dc..a4b5e194e33e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2660,6 +2660,27 @@  static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 	return devlink->ops->reload(devlink, info->extack);
 }
 
+static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const char *file_name;
+	u32 target = 0;
+
+	if (!devlink->ops->flash_update)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME])
+		return -EINVAL;
+	file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]);
+
+	if (info->attrs[DEVLINK_ATTR_FLASH_UPDATE_TARGET_ID])
+		target = nla_get_u32(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_TARGET_ID]);
+
+	return devlink->ops->flash_update(devlink, file_name, target,
+					  info->extack);
+}
+
 static const struct devlink_param devlink_param_generic[] = {
 	{
 		.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -4876,6 +4897,8 @@  static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
 	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_FLASH_UPDATE_TARGET_ID] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -5164,6 +5187,13 @@  static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NO_LOCK,
 	},
+	{
+		.cmd = DEVLINK_CMD_FLASH_UPDATE,
+		.doit = devlink_nl_cmd_flash_update,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {