diff mbox series

[net-next,v1,1/9] ethtool: Add support for configuring frame preemption

Message ID 20201202045325.3254757-2-vinicius.gomes@intel.com
State Awaiting Upstream
Headers show
Series ethtool: Add support for frame preemption | expand

Commit Message

Vinicius Costa Gomes Dec. 2, 2020, 4:53 a.m. UTC
Frame preemption (described in IEEE 802.3br-2016) defines the concept
of preemptible and express queues. It allows traffic from express
queues to "interrupt" traffic from preemptible queues, which are
"resumed" after the express traffic has finished transmitting.

Frame preemption can only be used when both the local device and the
link partner support it.

Only parameters for enabling/disabling frame preemption and
configuring the minimum fragment size are included here. Expressing
which queues are marked as preemptible is left to mqprio/taprio, as
having that information there should be easier on the user.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
 include/linux/ethtool.h              |  19 ++++
 include/uapi/linux/ethtool_netlink.h |  17 +++
 net/ethtool/Makefile                 |   2 +-
 net/ethtool/netlink.c                |  19 ++++
 net/ethtool/netlink.h                |   4 +
 net/ethtool/preempt.c                | 151 +++++++++++++++++++++++++++
 6 files changed, 211 insertions(+), 1 deletion(-)
 create mode 100644 net/ethtool/preempt.c

Comments

Jakub Kicinski Dec. 3, 2020, 1:53 a.m. UTC | #1
On Tue,  1 Dec 2020 20:53:17 -0800 Vinicius Costa Gomes wrote:
> Frame preemption (described in IEEE 802.3br-2016) defines the concept
> of preemptible and express queues. It allows traffic from express
> queues to "interrupt" traffic from preemptible queues, which are
> "resumed" after the express traffic has finished transmitting.
> 
> Frame preemption can only be used when both the local device and the
> link partner support it.
> 
> Only parameters for enabling/disabling frame preemption and
> configuring the minimum fragment size are included here. Expressing
> which queues are marked as preemptible is left to mqprio/taprio, as
> having that information there should be easier on the user.
> 
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>

CC: Michal
Jakub Kicinski Dec. 5, 2020, 5:43 p.m. UTC | #2
On Tue,  1 Dec 2020 20:53:17 -0800 Vinicius Costa Gomes wrote:
> Frame preemption (described in IEEE 802.3br-2016) defines the concept
> of preemptible and express queues. It allows traffic from express
> queues to "interrupt" traffic from preemptible queues, which are
> "resumed" after the express traffic has finished transmitting.
> 
> Frame preemption can only be used when both the local device and the
> link partner support it.
> 
> Only parameters for enabling/disabling frame preemption and
> configuring the minimum fragment size are included here. Expressing
> which queues are marked as preemptible is left to mqprio/taprio, as
> having that information there should be easier on the user.
> 
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
> ---
>  include/linux/ethtool.h              |  19 ++++
>  include/uapi/linux/ethtool_netlink.h |  17 +++
>  net/ethtool/Makefile                 |   2 +-
>  net/ethtool/netlink.c                |  19 ++++
>  net/ethtool/netlink.h                |   4 +
>  net/ethtool/preempt.c                | 151 +++++++++++++++++++++++++++
>  6 files changed, 211 insertions(+), 1 deletion(-)
>  create mode 100644 net/ethtool/preempt.c
> 
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
> index e3da25b51ae4..16d6ee29a6ac 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -263,6 +263,19 @@ struct ethtool_pause_stats {
>  	u64 rx_pause_frames;
>  };
>  
> +/**
> + * struct ethtool_fp - Frame Preemption information
> + *
> + * @enabled: Enable frame preemption.
> + *

The empty line between members seems unnecessary.

> + * @min_frag_size_mult: Minimum size for all non-final fragment size,
> + * expressed in terms of X in '(1 + X)*64 + 4'

Is this way of expressing the min frag size from the standard?

> + */
> +struct ethtool_fp {
> +	u8 enabled;
> +	u8 min_frag_size_mult;
> +};

> +	int	(*get_preempt)(struct net_device *,
> +			       struct ethtool_fp *);
> +	int	(*set_preempt)(struct net_device *,
> +			       struct ethtool_fp *);

Since this is a new op we should probably pass extack to the drivers?

>  extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
>  extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
> @@ -375,6 +376,8 @@ extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER +
>  extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1];
>  extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
>  extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
> +extern const struct nla_policy ethnl_preempt_get_policy[ETHTOOL_A_PREEMPT_MAX + 1];
> +extern const struct nla_policy ethnl_preempt_set_policy[ETHTOOL_A_PREEMPT_MAX + 1];

Let's make the size

ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT + 1

for set, and

ETHTOOL_A_PREEMPT_HEADER + 1

for get, like the other tables

> +const struct nla_policy
> +ethnl_preempt_get_policy[ETHTOOL_A_PREEMPT_MAX + 1] = {
> +	[ETHTOOL_A_PREEMPT_UNSPEC]		= { .type = NLA_REJECT },

Unnecessary, NLA_REJECT is 0.

> +	[ETHTOOL_A_PREEMPT_HEADER]		= { .type = NLA_NESTED },

Please specify nested policy

> +	[ETHTOOL_A_PREEMPT_ENABLED]		= { .type = NLA_REJECT },
> +	[ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT]	= { .type = NLA_REJECT },

Unnecessary

> +const struct nla_policy
> +ethnl_preempt_set_policy[ETHTOOL_A_PREEMPT_MAX + 1] = {
> +	[ETHTOOL_A_PREEMPT_UNSPEC]			= { .type = NLA_REJECT },
> +	[ETHTOOL_A_PREEMPT_HEADER]			= { .type = NLA_NESTED },
> +	[ETHTOOL_A_PREEMPT_ENABLED]			= { .type = NLA_U8 },

Set the right netlink policy to check the value is <= 1.

> +	[ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT]		= { .type = NLA_U8 },
> +};
Vinicius Costa Gomes Dec. 7, 2020, 10:11 p.m. UTC | #3
Jakub Kicinski <kuba@kernel.org> writes:

>> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
>> index e3da25b51ae4..16d6ee29a6ac 100644
>> --- a/include/linux/ethtool.h
>> +++ b/include/linux/ethtool.h
>> @@ -263,6 +263,19 @@ struct ethtool_pause_stats {
>>  	u64 rx_pause_frames;
>>  };
>>  
>> +/**
>> + * struct ethtool_fp - Frame Preemption information
>> + *
>> + * @enabled: Enable frame preemption.
>> + *
>
> The empty line between members seems unnecessary.

Will fix.

>
>> + * @min_frag_size_mult: Minimum size for all non-final fragment size,
>> + * expressed in terms of X in '(1 + X)*64 + 4'
>
> Is this way of expressing the min frag size from the standard?
>

The standard has this: "A 2-bit integer value indicating, in units of 64
octets, the minimum number of octets over 64 octets required in
non-final fragments by the receiver" from IEEE 802.3br-2016, Table
79-7a.

>> + */
>> +struct ethtool_fp {
>> +	u8 enabled;
>> +	u8 min_frag_size_mult;
>> +};
>
>> +	int	(*get_preempt)(struct net_device *,
>> +			       struct ethtool_fp *);
>> +	int	(*set_preempt)(struct net_device *,
>> +			       struct ethtool_fp *);
>
> Since this is a new op we should probably pass extack to the drivers?

Yes. Will fix.

>
>>  extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
>>  extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
>> @@ -375,6 +376,8 @@ extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER +
>>  extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1];
>>  extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
>>  extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
>> +extern const struct nla_policy ethnl_preempt_get_policy[ETHTOOL_A_PREEMPT_MAX + 1];
>> +extern const struct nla_policy ethnl_preempt_set_policy[ETHTOOL_A_PREEMPT_MAX + 1];
>
> Let's make the size
>
> ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT + 1
>
> for set, and
>
> ETHTOOL_A_PREEMPT_HEADER + 1
>
> for get, like the other tables
>
>> +const struct nla_policy
>> +ethnl_preempt_get_policy[ETHTOOL_A_PREEMPT_MAX + 1] = {
>> +	[ETHTOOL_A_PREEMPT_UNSPEC]		= { .type = NLA_REJECT },
>
> Unnecessary, NLA_REJECT is 0.
>
>> +	[ETHTOOL_A_PREEMPT_HEADER]		= { .type = NLA_NESTED },
>
> Please specify nested policy
>
>> +	[ETHTOOL_A_PREEMPT_ENABLED]		= { .type = NLA_REJECT },
>> +	[ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT]	= { .type = NLA_REJECT },
>
> Unnecessary
>
>> +const struct nla_policy
>> +ethnl_preempt_set_policy[ETHTOOL_A_PREEMPT_MAX + 1] = {
>> +	[ETHTOOL_A_PREEMPT_UNSPEC]			= { .type = NLA_REJECT },
>> +	[ETHTOOL_A_PREEMPT_HEADER]			= { .type = NLA_NESTED },
>> +	[ETHTOOL_A_PREEMPT_ENABLED]			= { .type = NLA_U8 },
>
> Set the right netlink policy to check the value is <= 1.
>
>> +	[ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT]		= { .type = NLA_U8 },
>> +};


Will fix these netlink validation issues.


Cheers,
Jakub Kicinski Dec. 7, 2020, 11:21 p.m. UTC | #4
On Mon, 07 Dec 2020 14:11:48 -0800 Vinicius Costa Gomes wrote:
> Jakub Kicinski <kuba@kernel.org> writes:
> >> + * @min_frag_size_mult: Minimum size for all non-final fragment size,
> >> + * expressed in terms of X in '(1 + X)*64 + 4'  
> >
> > Is this way of expressing the min frag size from the standard?
> >  
> 
> The standard has this: "A 2-bit integer value indicating, in units of 64
> octets, the minimum number of octets over 64 octets required in
> non-final fragments by the receiver" from IEEE 802.3br-2016, Table
> 79-7a.

Thanks! Let's drop the _mult suffix and add a mention of this
controlling the addFragSize variable from the standard. Perhaps 
it should in fact be called add_frag_size (with an explanation 
that the "additional" means "above the 64B" which are required in
Ethernet, and which are accounted for by the "1" in the 1 + X formula)?
Vinicius Costa Gomes Dec. 8, 2020, 12:24 a.m. UTC | #5
Jakub Kicinski <kuba@kernel.org> writes:

> On Mon, 07 Dec 2020 14:11:48 -0800 Vinicius Costa Gomes wrote:
>> Jakub Kicinski <kuba@kernel.org> writes:
>> >> + * @min_frag_size_mult: Minimum size for all non-final fragment size,
>> >> + * expressed in terms of X in '(1 + X)*64 + 4'  
>> >
>> > Is this way of expressing the min frag size from the standard?
>> >  
>> 
>> The standard has this: "A 2-bit integer value indicating, in units of 64
>> octets, the minimum number of octets over 64 octets required in
>> non-final fragments by the receiver" from IEEE 802.3br-2016, Table
>> 79-7a.
>
> Thanks! Let's drop the _mult suffix and add a mention of this
> controlling the addFragSize variable from the standard. Perhaps 
> it should in fact be called add_frag_size (with an explanation 
> that the "additional" means "above the 64B" which are required in
> Ethernet, and which are accounted for by the "1" in the 1 + X
> formula)?

Sounds good :-) Will add a comment with the standard reference and
change the name to 'add_frag_size'.


Cheers,
Vladimir Oltean Dec. 8, 2020, 12:27 a.m. UTC | #6
On Mon, Dec 07, 2020 at 04:24:02PM -0800, Vinicius Costa Gomes wrote:
> Jakub Kicinski <kuba@kernel.org> writes:
>
> > On Mon, 07 Dec 2020 14:11:48 -0800 Vinicius Costa Gomes wrote:
> >> Jakub Kicinski <kuba@kernel.org> writes:
> >> >> + * @min_frag_size_mult: Minimum size for all non-final fragment size,
> >> >> + * expressed in terms of X in '(1 + X)*64 + 4'
> >> >
> >> > Is this way of expressing the min frag size from the standard?
> >> >
> >>
> >> The standard has this: "A 2-bit integer value indicating, in units of 64
> >> octets, the minimum number of octets over 64 octets required in
> >> non-final fragments by the receiver" from IEEE 802.3br-2016, Table
> >> 79-7a.
> >
> > Thanks! Let's drop the _mult suffix and add a mention of this
> > controlling the addFragSize variable from the standard. Perhaps
> > it should in fact be called add_frag_size (with an explanation
> > that the "additional" means "above the 64B" which are required in
> > Ethernet, and which are accounted for by the "1" in the 1 + X
> > formula)?
>
> Sounds good :-) Will add a comment with the standard reference and
> change the name to 'add_frag_size'.

I think you should be making references to the IEEE 802.3-2018, that
will age better, and a lot more people have that handy.
I believe the go-to definition for the additional fragment size can be
found in clause 30.12.2.1.37 aLldpXdot3LocAddFragSize.
Jakub Kicinski Dec. 8, 2020, 12:48 a.m. UTC | #7
On Tue, 8 Dec 2020 00:27:31 +0000 Vladimir Oltean wrote:
> On Mon, Dec 07, 2020 at 04:24:02PM -0800, Vinicius Costa Gomes wrote:
> > Jakub Kicinski <kuba@kernel.org> writes:
> >  
> > > On Mon, 07 Dec 2020 14:11:48 -0800 Vinicius Costa Gomes wrote:  
> > >> Jakub Kicinski <kuba@kernel.org> writes:  
> > >> >> + * @min_frag_size_mult: Minimum size for all non-final fragment size,
> > >> >> + * expressed in terms of X in '(1 + X)*64 + 4'  
> > >> >
> > >> > Is this way of expressing the min frag size from the standard?
> > >> >  
> > >>
> > >> The standard has this: "A 2-bit integer value indicating, in units of 64
> > >> octets, the minimum number of octets over 64 octets required in
> > >> non-final fragments by the receiver" from IEEE 802.3br-2016, Table
> > >> 79-7a.  
> > >
> > > Thanks! Let's drop the _mult suffix and add a mention of this
> > > controlling the addFragSize variable from the standard. Perhaps
> > > it should in fact be called add_frag_size (with an explanation
> > > that the "additional" means "above the 64B" which are required in
> > > Ethernet, and which are accounted for by the "1" in the 1 + X
> > > formula)?  
> >
> > Sounds good :-) Will add a comment with the standard reference and
> > change the name to 'add_frag_size'.  
> 
> I think you should be making references to the IEEE 802.3-2018, that
> will age better, and a lot more people have that handy.
> I believe the go-to definition for the additional fragment size can be
> found in clause 30.12.2.1.37 aLldpXdot3LocAddFragSize.

That's the LLDP incarnation of it. The variable is defined in:

99.4.7.3 Variables

Probably better mention 30.14.1.7 aMACMergeAddFragSize if we want a MIB
reference.
Michal Kubecek Dec. 8, 2020, 6:22 a.m. UTC | #8
On Mon, Dec 07, 2020 at 02:11:48PM -0800, Vinicius Costa Gomes wrote:
> Jakub Kicinski <kuba@kernel.org> writes:
> >> + * @min_frag_size_mult: Minimum size for all non-final fragment size,
> >> + * expressed in terms of X in '(1 + X)*64 + 4'
> >
> > Is this way of expressing the min frag size from the standard?
> >
> 
> The standard has this: "A 2-bit integer value indicating, in units of 64
> octets, the minimum number of octets over 64 octets required in
> non-final fragments by the receiver" from IEEE 802.3br-2016, Table
> 79-7a.

Can we be sure that newer version of the standard cannot change this,
e.g. come with a finer granularity? Perhaps it would be safer to express
the size in bytes in the userspace API and translate to this internal
representation in common ethtool code.

Also, please don't forget to update Documentation/networking/ethtool-netlink.rst

Michal
diff mbox series

Patch

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e3da25b51ae4..16d6ee29a6ac 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -263,6 +263,19 @@  struct ethtool_pause_stats {
 	u64 rx_pause_frames;
 };
 
+/**
+ * struct ethtool_fp - Frame Preemption information
+ *
+ * @enabled: Enable frame preemption.
+ *
+ * @min_frag_size_mult: Minimum size for all non-final fragment size,
+ * expressed in terms of X in '(1 + X)*64 + 4'
+ */
+struct ethtool_fp {
+	u8 enabled;
+	u8 min_frag_size_mult;
+};
+
 /**
  * struct ethtool_ops - optional netdev operations
  * @supported_coalesce_params: supported types of interrupt coalescing.
@@ -406,6 +419,8 @@  struct ethtool_pause_stats {
  * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
  *	This is only useful if the device maintains PHY statistics and
  *	cannot use the standard PHY library helpers.
+ * @get_preempt: Get the network device Frame Preemption parameters.
+ * @set_preempt: Set the network device Frame Preemption parameters.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -504,6 +519,10 @@  struct ethtool_ops {
 				      struct ethtool_fecparam *);
 	int	(*set_fecparam)(struct net_device *,
 				      struct ethtool_fecparam *);
+	int	(*get_preempt)(struct net_device *,
+			       struct ethtool_fp *);
+	int	(*set_preempt)(struct net_device *,
+			       struct ethtool_fp *);
 	void	(*get_ethtool_phy_stats)(struct net_device *,
 					 struct ethtool_stats *, u64 *);
 	int	(*get_phy_tunable)(struct net_device *,
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index e2bf36e6964b..0b3dc0c263a9 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -42,6 +42,8 @@  enum {
 	ETHTOOL_MSG_CABLE_TEST_ACT,
 	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
 	ETHTOOL_MSG_TUNNEL_INFO_GET,
+	ETHTOOL_MSG_PREEMPT_GET,
+	ETHTOOL_MSG_PREEMPT_SET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -80,6 +82,8 @@  enum {
 	ETHTOOL_MSG_CABLE_TEST_NTF,
 	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
 	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
+	ETHTOOL_MSG_PREEMPT_GET_REPLY,
+	ETHTOOL_MSG_PREEMPT_NTF,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -628,6 +632,19 @@  enum {
 	ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
 };
 
+/* FRAME PREEMPTION */
+
+enum {
+	ETHTOOL_A_PREEMPT_UNSPEC,
+	ETHTOOL_A_PREEMPT_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_PREEMPT_ENABLED,			/* u8 */
+	ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT,		/* u8 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_PREEMPT_CNT,
+	ETHTOOL_A_PREEMPT_MAX = (__ETHTOOL_A_PREEMPT_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 7a849ff22dad..4e584903e3ef 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@  obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
 ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
-		   tunnels.o
+		   tunnels.o preempt.o
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 50d3c8896f91..bc7d66e3ba38 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -245,6 +245,7 @@  ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_PAUSE_GET]		= &ethnl_pause_request_ops,
 	[ETHTOOL_MSG_EEE_GET]		= &ethnl_eee_request_ops,
 	[ETHTOOL_MSG_TSINFO_GET]	= &ethnl_tsinfo_request_ops,
+	[ETHTOOL_MSG_PREEMPT_GET]	= &ethnl_preempt_request_ops,
 };
 
 static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -551,6 +552,7 @@  ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
 	[ETHTOOL_MSG_COALESCE_NTF]	= &ethnl_coalesce_request_ops,
 	[ETHTOOL_MSG_PAUSE_NTF]		= &ethnl_pause_request_ops,
 	[ETHTOOL_MSG_EEE_NTF]		= &ethnl_eee_request_ops,
+	[ETHTOOL_MSG_PREEMPT_NTF]	= &ethnl_preempt_request_ops,
 };
 
 /* default notification handler */
@@ -643,6 +645,7 @@  static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
 	[ETHTOOL_MSG_COALESCE_NTF]	= ethnl_default_notify,
 	[ETHTOOL_MSG_PAUSE_NTF]		= ethnl_default_notify,
 	[ETHTOOL_MSG_EEE_NTF]		= ethnl_default_notify,
+	[ETHTOOL_MSG_PREEMPT_NTF]	= ethnl_default_notify,
 };
 
 void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
@@ -912,6 +915,22 @@  static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_tunnel_info_get_policy,
 		.maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_PREEMPT_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_preempt_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_preempt_get_policy) - 1,
+	},
+	{
+		.cmd	= ETHTOOL_MSG_PREEMPT_SET,
+		.flags	= GENL_UNS_ADMIN_PERM,
+		.doit	= ethnl_set_preempt,
+		.policy = ethnl_preempt_set_policy,
+		.maxattr = ARRAY_SIZE(ethnl_preempt_set_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index d8efec516d86..8f65e53ccd59 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -344,6 +344,7 @@  extern const struct ethnl_request_ops ethnl_coalesce_request_ops;
 extern const struct ethnl_request_ops ethnl_pause_request_ops;
 extern const struct ethnl_request_ops ethnl_eee_request_ops;
 extern const struct ethnl_request_ops ethnl_tsinfo_request_ops;
+extern const struct ethnl_request_ops ethnl_preempt_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -375,6 +376,8 @@  extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER +
 extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1];
 extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
 extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
+extern const struct nla_policy ethnl_preempt_get_policy[ETHTOOL_A_PREEMPT_MAX + 1];
+extern const struct nla_policy ethnl_preempt_set_policy[ETHTOOL_A_PREEMPT_MAX + 1];
 
 int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
@@ -392,5 +395,6 @@  int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
 int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
 int ethnl_tunnel_info_start(struct netlink_callback *cb);
 int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_set_preempt(struct sk_buff *skb, struct genl_info *info);
 
 #endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/preempt.c b/net/ethtool/preempt.c
new file mode 100644
index 000000000000..4d97d1180a65
--- /dev/null
+++ b/net/ethtool/preempt.c
@@ -0,0 +1,151 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+
+struct preempt_req_info {
+	struct ethnl_req_info		base;
+};
+
+struct preempt_reply_data {
+	struct ethnl_reply_data		base;
+	struct ethtool_fp		fp;
+};
+
+#define PREEMPT_REPDATA(__reply_base) \
+	container_of(__reply_base, struct preempt_reply_data, base)
+
+const struct nla_policy
+ethnl_preempt_get_policy[ETHTOOL_A_PREEMPT_MAX + 1] = {
+	[ETHTOOL_A_PREEMPT_UNSPEC]		= { .type = NLA_REJECT },
+	[ETHTOOL_A_PREEMPT_HEADER]		= { .type = NLA_NESTED },
+	[ETHTOOL_A_PREEMPT_ENABLED]		= { .type = NLA_REJECT },
+	[ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT]	= { .type = NLA_REJECT },
+};
+
+static int preempt_prepare_data(const struct ethnl_req_info *req_base,
+				struct ethnl_reply_data *reply_base,
+				struct genl_info *info)
+{
+	struct preempt_reply_data *data = PREEMPT_REPDATA(reply_base);
+	struct net_device *dev = reply_base->dev;
+	int ret;
+
+	if (!dev->ethtool_ops->get_preempt)
+		return -EOPNOTSUPP;
+
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		return ret;
+
+	ret = dev->ethtool_ops->get_preempt(dev, &data->fp);
+	ethnl_ops_complete(dev);
+
+	return ret;
+}
+
+static int preempt_reply_size(const struct ethnl_req_info *req_base,
+			      const struct ethnl_reply_data *reply_base)
+{
+	int len = 0;
+
+	len += nla_total_size(sizeof(u8)); /* _PREEMPT_ENABLED */
+	len += nla_total_size(sizeof(u8)); /* _PREEMPT_MIN_FRAG_SIZE */
+
+	return len;
+}
+
+static int preempt_fill_reply(struct sk_buff *skb,
+			      const struct ethnl_req_info *req_base,
+			      const struct ethnl_reply_data *reply_base)
+{
+	const struct preempt_reply_data *data = PREEMPT_REPDATA(reply_base);
+	const struct ethtool_fp *preempt = &data->fp;
+
+	if (nla_put_u8(skb, ETHTOOL_A_PREEMPT_ENABLED, preempt->enabled))
+		return -EMSGSIZE;
+
+	if (nla_put_u8(skb, ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT,
+		       preempt->min_frag_size_mult))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+const struct ethnl_request_ops ethnl_preempt_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_PREEMPT_GET,
+	.reply_cmd		= ETHTOOL_MSG_PREEMPT_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_PREEMPT_HEADER,
+	.req_info_size		= sizeof(struct preempt_req_info),
+	.reply_data_size	= sizeof(struct preempt_reply_data),
+
+	.prepare_data		= preempt_prepare_data,
+	.reply_size		= preempt_reply_size,
+	.fill_reply		= preempt_fill_reply,
+};
+
+const struct nla_policy
+ethnl_preempt_set_policy[ETHTOOL_A_PREEMPT_MAX + 1] = {
+	[ETHTOOL_A_PREEMPT_UNSPEC]			= { .type = NLA_REJECT },
+	[ETHTOOL_A_PREEMPT_HEADER]			= { .type = NLA_NESTED },
+	[ETHTOOL_A_PREEMPT_ENABLED]			= { .type = NLA_U8 },
+	[ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT]		= { .type = NLA_U8 },
+};
+
+int ethnl_set_preempt(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ethnl_req_info req_info = {};
+	struct nlattr **tb = info->attrs;
+	struct ethtool_fp preempt = {};
+	struct net_device *dev;
+	bool mod = false;
+	int ret;
+
+	ret = ethnl_parse_header_dev_get(&req_info,
+					 tb[ETHTOOL_A_PREEMPT_HEADER],
+					 genl_info_net(info), info->extack,
+					 true);
+	if (ret < 0)
+		return ret;
+	dev = req_info.dev;
+	ret = -EOPNOTSUPP;
+	if (!dev->ethtool_ops->get_preempt ||
+	    !dev->ethtool_ops->set_preempt)
+		goto out_dev;
+
+	rtnl_lock();
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		goto out_rtnl;
+
+	ret = dev->ethtool_ops->get_preempt(dev, &preempt);
+	if (ret < 0) {
+		GENL_SET_ERR_MSG(info, "failed to retrieve frame preemption settings");
+		goto out_ops;
+	}
+
+	ethnl_update_u8(&preempt.enabled,
+			tb[ETHTOOL_A_PREEMPT_ENABLED], &mod);
+	ethnl_update_u8(&preempt.min_frag_size_mult,
+			tb[ETHTOOL_A_PREEMPT_MIN_FRAG_SIZE_MULT], &mod);
+
+	ret = 0;
+	if (!mod)
+		goto out_ops;
+
+	ret = dev->ethtool_ops->set_preempt(dev, &preempt);
+	if (ret < 0) {
+		GENL_SET_ERR_MSG(info, "frame preemption settings update failed");
+		goto out_ops;
+	}
+
+	ethtool_notify(dev, ETHTOOL_MSG_PREEMPT_NTF, NULL);
+
+out_ops:
+	ethnl_ops_complete(dev);
+out_rtnl:
+	rtnl_unlock();
+out_dev:
+	dev_put(dev);
+	return ret;
+}