Patchwork [2/3] Add support for RX packet classification in a network device

login
register
mail settings
Submitter Santwona.Behera@Sun.COM
Date Dec. 22, 2008, 6:45 p.m.
Message ID <494FE060.8020600@Sun.COM>
Download mbox | patch
Permalink /patch/15264/
State RFC
Delegated to: David Miller
Headers show

Comments

Santwona.Behera@Sun.COM - Dec. 22, 2008, 6:45 p.m.

Ben Hutchings - Dec. 22, 2008, 7:27 p.m.
On Mon, 2008-12-22 at 10:45 -0800, Santwona.Behera@Sun.COM wrote:
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
> index b4b038b..d3289b0 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -55,12 +55,13 @@ struct ethtool_drvinfo {
>         char    bus_info[ETHTOOL_BUSINFO_LEN];  /* Bus info for this IF. */
>                                 /* For PCI devices, use pci_name(pci_dev). */
>         char    reserved1[32];
> -       char    reserved2[12];
> +       char    reserved2[8];
>         __u32   n_priv_flags;   /* number of flags valid in ETHTOOL_GPFLAGS */
>         __u32   n_stats;        /* number of u64's from ETHTOOL_GSTATS */
>         __u32   testinfo_len;
>         __u32   eedump_len;     /* Size of data from ETHTOOL_GEEPROM (bytes) */
>         __u32   regdump_len;    /* Size of data from ETHTOOL_GREGS (bytes) */
> +       __u32   n_rx_rules;     /* number of rx classification rules */
>  };

This shifts all the fields between n_priv_flags and regdump_len
inclusive.  What is the point of reserving space in the structure if we
then go and move fields around elsewhere?

Also, why do you think n_rx_rules is driver or hardware information?
The maximum number of RX filters is not necessarily a static property.
Consider hardware that has separate limited-size sets of layer-2 and
layer-3 filters, or that has a single set but needs more storage for
some types of filters.

The important value is the current number of rules which is dynamic and
does not belong here.

[...]
> @@ -558,14 +626,16 @@ struct ethtool_ops {
>  #define        TCP_V4_FLOW     0x01
>  #define        UDP_V4_FLOW     0x02
>  #define        SCTP_V4_FLOW    0x03
> -#define        AH_ESP_V4_FLOW  0x04
> -#define        TCP_V6_FLOW     0x05
> -#define        UDP_V6_FLOW     0x06
> -#define        SCTP_V6_FLOW    0x07
> -#define        AH_ESP_V6_FLOW  0x08
> +#define        AH_V4_FLOW      0x04
> +#define        ESP_V4_FLOW     0x05
> +#define        TCP_V6_FLOW     0x06
> +#define        UDP_V6_FLOW     0x07
> +#define        SCTP_V6_FLOW    0x08
> +#define        AH_V6_FLOW      0x09
> +#define        ESP_V6_FLOW     0x0a
> +#define        IP_USER_FLOW    0x0b
>  
>  /* L3-L4 network traffic flow hash options */
> -#define        RXH_DEV_PORT    (1 << 0)
>  #define        RXH_L2DA        (1 << 1)
>  #define        RXH_VLAN        (1 << 2)
>  #define        RXH_L3_PROTO    (1 << 3)
[...]

No, you can't do this.  Leave the existing definitions unchanged and
only add new ones.

Ben.
Santwona.Behera@Sun.COM - Dec. 22, 2008, 11:04 p.m.
On 12/22/08 11:27 AM, Ben Hutchings wrote:
> On Mon, 2008-12-22 at 10:45 -0800, Santwona.Behera@Sun.COM wrote:
>> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
>> index b4b038b..d3289b0 100644
>> --- a/include/linux/ethtool.h
>> +++ b/include/linux/ethtool.h
>> @@ -55,12 +55,13 @@ struct ethtool_drvinfo {
>>         char    bus_info[ETHTOOL_BUSINFO_LEN];  /* Bus info for this IF. */
>>                                 /* For PCI devices, use pci_name(pci_dev). */
>>         char    reserved1[32];
>> -       char    reserved2[12];
>> +       char    reserved2[8];
>>         __u32   n_priv_flags;   /* number of flags valid in ETHTOOL_GPFLAGS */
>>         __u32   n_stats;        /* number of u64's from ETHTOOL_GSTATS */
>>         __u32   testinfo_len;
>>         __u32   eedump_len;     /* Size of data from ETHTOOL_GEEPROM (bytes) */
>>         __u32   regdump_len;    /* Size of data from ETHTOOL_GREGS (bytes) */
>> +       __u32   n_rx_rules;     /* number of rx classification rules */
>>  };
> 
> This shifts all the fields between n_priv_flags and regdump_len
> inclusive.  What is the point of reserving space in the structure if we
> then go and move fields around elsewhere?
> 
> Also, why do you think n_rx_rules is driver or hardware information?
> The maximum number of RX filters is not necessarily a static property.
> Consider hardware that has separate limited-size sets of layer-2 and
> layer-3 filters, or that has a single set but needs more storage for
> some types of filters.
> 
> The important value is the current number of rules which is dynamic and
> does not belong here.
> 
> [...]


OK, I will move this to the ethtool_rxnfc struct.

>> @@ -558,14 +626,16 @@ struct ethtool_ops {
>>  #define        TCP_V4_FLOW     0x01
>>  #define        UDP_V4_FLOW     0x02
>>  #define        SCTP_V4_FLOW    0x03
>> -#define        AH_ESP_V4_FLOW  0x04
>> -#define        TCP_V6_FLOW     0x05
>> -#define        UDP_V6_FLOW     0x06
>> -#define        SCTP_V6_FLOW    0x07
>> -#define        AH_ESP_V6_FLOW  0x08
>> +#define        AH_V4_FLOW      0x04
>> +#define        ESP_V4_FLOW     0x05
>> +#define        TCP_V6_FLOW     0x06
>> +#define        UDP_V6_FLOW     0x07
>> +#define        SCTP_V6_FLOW    0x08
>> +#define        AH_V6_FLOW      0x09
>> +#define        ESP_V6_FLOW     0x0a
>> +#define        IP_USER_FLOW    0x0b
>>  
>>  /* L3-L4 network traffic flow hash options */
>> -#define        RXH_DEV_PORT    (1 << 0)
>>  #define        RXH_L2DA        (1 << 1)
>>  #define        RXH_VLAN        (1 << 2)
>>  #define        RXH_L3_PROTO    (1 << 3)
> [...]
> 
> No, you can't do this.  Leave the existing definitions unchanged and
> only add new ones.

The original code/patch was not quite correct where the AH_ESP_V4_FLOW 
was being used to represent AH flows. So my goal here was to remove that 
and add 2 separate flow types for AH and ESP. I have two ways of 
achieving this without changing the existing definitions completely:

1. I change AH_ESP_Vx_FLOW defines to AH_Vx_FLOW defines and add 2 new 
defines for ESP_Vx_FLOW at the end, with values 0x9 and 0xa.

2. I keep the AH_ESP_Vx_FLOW defines as is (but this will be dead code 
as it will not be used) and add 2 new AH_Vx_FLOW defines and 2 new 
ESP_Vx_FLOW defines at the end with values 0x9, 0xa, 0xb, 0xc.

Please let me know which one is more desirable.

rgds,
--santwona
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings - Dec. 23, 2008, 12:16 a.m.
On Mon, 2008-12-22 at 15:04 -0800, Santwona.Behera@Sun.COM wrote:
[...]
> >> @@ -558,14 +626,16 @@ struct ethtool_ops {
> >>  #define        TCP_V4_FLOW     0x01
> >>  #define        UDP_V4_FLOW     0x02
> >>  #define        SCTP_V4_FLOW    0x03
> >> -#define        AH_ESP_V4_FLOW  0x04
> >> -#define        TCP_V6_FLOW     0x05
> >> -#define        UDP_V6_FLOW     0x06
> >> -#define        SCTP_V6_FLOW    0x07
> >> -#define        AH_ESP_V6_FLOW  0x08
> >> +#define        AH_V4_FLOW      0x04
> >> +#define        ESP_V4_FLOW     0x05
> >> +#define        TCP_V6_FLOW     0x06
> >> +#define        UDP_V6_FLOW     0x07
> >> +#define        SCTP_V6_FLOW    0x08
> >> +#define        AH_V6_FLOW      0x09
> >> +#define        ESP_V6_FLOW     0x0a
> >> +#define        IP_USER_FLOW    0x0b
> >>  
> >>  /* L3-L4 network traffic flow hash options */
> >> -#define        RXH_DEV_PORT    (1 << 0)
> >>  #define        RXH_L2DA        (1 << 1)
> >>  #define        RXH_VLAN        (1 << 2)
> >>  #define        RXH_L3_PROTO    (1 << 3)
> > [...]
> > 
> > No, you can't do this.  Leave the existing definitions unchanged and
> > only add new ones.
> 
> The original code/patch was not quite correct where the AH_ESP_V4_FLOW 
> was being used to represent AH flows. So my goal here was to remove that 
> and add 2 separate flow types for AH and ESP. I have two ways of 
> achieving this without changing the existing definitions completely:
> 
> 1. I change AH_ESP_Vx_FLOW defines to AH_Vx_FLOW defines and add 2 new 
> defines for ESP_Vx_FLOW at the end, with values 0x9 and 0xa.

If AH_ESP_Vx_FLOW has only ever been implemented as AH-only then perhaps
this is reasonable.

> 2. I keep the AH_ESP_Vx_FLOW defines as is (but this will be dead code 
> as it will not be used) and add 2 new AH_Vx_FLOW defines and 2 new 
> ESP_Vx_FLOW defines at the end with values 0x9, 0xa, 0xb, 0xc.

This is safest.

Also you are probably right to remove RXH_DEV_PORT as that appears to be
an niu quirk that shouldn't be exposed.

Ben.
Santwona.Behera@Sun.COM - Dec. 23, 2008, 12:36 a.m.
>>
>> 1. I change AH_ESP_Vx_FLOW defines to AH_Vx_FLOW defines and add 2 new 
>> defines for ESP_Vx_FLOW at the end, with values 0x9 and 0xa.
> 
> If AH_ESP_Vx_FLOW has only ever been implemented as AH-only then perhaps
> this is reasonable.
> 
>> 2. I keep the AH_ESP_Vx_FLOW defines as is (but this will be dead code 
>> as it will not be used) and add 2 new AH_Vx_FLOW defines and 2 new 
>> ESP_Vx_FLOW defines at the end with values 0x9, 0xa, 0xb, 0xc.
> 
> This is safest.

OK, I will re-implement it this way then.

> 
> Also you are probably right to remove RXH_DEV_PORT as that appears to be
> an niu quirk that shouldn't be exposed.

That was the reasoning behind removing RXH_DEV_PORT.

rgds,
--santwona
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

From: Santwona Behera <santwona.behera@sun.com>
Subject: [PATCH 2/3] [Ethtool] Add RX pkt classification interface

Signed-off-by: Santwona Behera <santwona.behera@sun.com>
---
 include/linux/ethtool.h |   95 +++++++++++++++++++++++++++++++++++++++++------
 net/core/ethtool.c      |   60 ++++++++++++++++++++++++-----
 2 files changed, 133 insertions(+), 22 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b4b038b..d3289b0 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -55,12 +55,13 @@  struct ethtool_drvinfo {
 	char	bus_info[ETHTOOL_BUSINFO_LEN];	/* Bus info for this IF. */
 				/* For PCI devices, use pci_name(pci_dev). */
 	char	reserved1[32];
-	char	reserved2[12];
+	char	reserved2[8];
 	__u32	n_priv_flags;	/* number of flags valid in ETHTOOL_GPFLAGS */
 	__u32	n_stats;	/* number of u64's from ETHTOOL_GSTATS */
 	__u32	testinfo_len;
 	__u32	eedump_len;	/* Size of data from ETHTOOL_GEEPROM (bytes) */
 	__u32	regdump_len;	/* Size of data from ETHTOOL_GREGS (bytes) */
+	__u32	n_rx_rules;	/* number of rx classification rules */
 };
 
 #define SOPASS_MAX	6
@@ -287,10 +288,71 @@  enum ethtool_flags {
 	ETH_FLAG_LRO		= (1 << 15),	/* LRO is enabled */
 };
 
-struct ethtool_rxnfc {
-	__u32		cmd;
+struct ethtool_tcpip4_spec {
+	__u32	ip4src;
+	__u32	ip4dst;
+	__u16	psrc;
+	__u16	pdst;
+	__u8    tos;
+};
+
+struct ethtool_ah_espip4_spec {
+	__u32	ip4src;
+	__u32	ip4dst;
+	__u32	spi;
+	__u8    tos;
+};
+
+struct ethtool_rawip4_spec {
+	__u32	ip4src;
+	__u32	ip4dst;
+	__u8	hdata[64];
+};
+
+struct ethtool_ether_spec {
+	__u16	ether_type;
+	__u8	frame_size;
+	__u8	eframe[16];
+};
+
+#define	ETH_RX_NFC_IP4	1
+#define	ETH_RX_NFC_IP6	2
+
+struct ethtool_usrip4_spec {
+	__u32	ip4src;
+	__u32	ip4dst;
+	__u32	l4_4_bytes;
+	__u8    tos;
+	__u8    ip_ver;
+	__u8    proto;
+};
+
+struct ethtool_rx_flow_spec {
 	__u32		flow_type;
-	__u64		data;
+	union
+	{
+		struct ethtool_tcpip4_spec		tcp_ip4_spec;
+		struct ethtool_tcpip4_spec		udp_ip4_spec;
+		struct ethtool_tcpip4_spec		sctp_ip4_spec;
+		struct ethtool_ah_espip4_spec		ah_ip4_spec;
+		struct ethtool_ah_espip4_spec		esp_ip4_spec;
+		struct ethtool_rawip4_spec		raw_ip4_spec;
+		struct ethtool_ether_spec		ether_spec;
+		struct ethtool_usrip4_spec		usr_ip4_spec;
+		__u8					hdata[64];
+	} h_u, m_u; /* entry, mask */
+	__u64		ring_cookie;
+	__u32		location;
+};
+
+struct ethtool_rxnfc {
+	__u32				cmd;
+	__u32				flow_type;
+	/* The rx flow hash value or the rule DB size */
+	__u64				data;
+	struct ethtool_rx_flow_spec	fs;
+	__u32				rule_cnt;
+	__u32				rule_locs[0];
 };
 
 #ifdef __KERNEL__
@@ -417,8 +479,9 @@  struct ethtool_ops {
 	/* the following hooks are obsolete */
 	int	(*self_test_count)(struct net_device *);/* use get_sset_count */
 	int	(*get_stats_count)(struct net_device *);/* use get_sset_count */
-	int	(*get_rxhash)(struct net_device *, struct ethtool_rxnfc *);
-	int	(*set_rxhash)(struct net_device *, struct ethtool_rxnfc *);
+	int	(*get_rxrule_cnt)(struct net_device *);
+	int	(*get_rxnfc)(struct net_device *, struct ethtool_rxnfc *, void *);
+	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 };
 #endif /* __KERNEL__ */
 
@@ -467,6 +530,11 @@  struct ethtool_ops {
 
 #define	ETHTOOL_GRXFH		0x00000029 /* Get RX flow hash configuration */
 #define	ETHTOOL_SRXFH		0x0000002a /* Set RX flow hash configuration */
+#define	ETHTOOL_GRXRINGS	0x0000002b /* Get RX rings available for LB */
+#define	ETHTOOL_GRXCLSRULE	0x0000002c /* Get RX classification rule */
+#define	ETHTOOL_GRXCLSRLALL	0x0000002d /* Get all RX classification rule */
+#define	ETHTOOL_SRXCLSRLDEL	0x0000002e /* Delete RX classification rule */
+#define	ETHTOOL_SRXCLSRLINS	0x0000002f /* Insert RX classification rule */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
@@ -558,14 +626,16 @@  struct ethtool_ops {
 #define	TCP_V4_FLOW	0x01
 #define	UDP_V4_FLOW	0x02
 #define	SCTP_V4_FLOW	0x03
-#define	AH_ESP_V4_FLOW	0x04
-#define	TCP_V6_FLOW	0x05
-#define	UDP_V6_FLOW	0x06
-#define	SCTP_V6_FLOW	0x07
-#define	AH_ESP_V6_FLOW	0x08
+#define	AH_V4_FLOW	0x04
+#define	ESP_V4_FLOW	0x05
+#define	TCP_V6_FLOW	0x06
+#define	UDP_V6_FLOW	0x07
+#define	SCTP_V6_FLOW	0x08
+#define	AH_V6_FLOW	0x09
+#define	ESP_V6_FLOW	0x0a
+#define	IP_USER_FLOW	0x0b
 
 /* L3-L4 network traffic flow hash options */
-#define	RXH_DEV_PORT	(1 << 0)
 #define	RXH_L2DA	(1 << 1)
 #define	RXH_VLAN	(1 << 2)
 #define	RXH_L3_PROTO	(1 << 3)
@@ -575,5 +645,6 @@  struct ethtool_ops {
 #define	RXH_L4_B_2_3	(1 << 7) /* dst port in case of TCP/UDP/SCTP */
 #define	RXH_DISCARD	(1 << 31)
 
+#define	RX_CLS_FLOW_DISC	0xffffffffffffffffULL
 
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 14ada53..46b1259 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -203,40 +203,72 @@  static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 		info.regdump_len = ops->get_regs_len(dev);
 	if (ops->get_eeprom_len)
 		info.eedump_len = ops->get_eeprom_len(dev);
+	if (ops->get_rxrule_cnt)
+		info.n_rx_rules = ops->get_rxrule_cnt(dev);
 
 	if (copy_to_user(useraddr, &info, sizeof(info)))
 		return -EFAULT;
 	return 0;
 }
 
-static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr)
+static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc cmd;
 
-	if (!dev->ethtool_ops->set_rxhash)
+	if (!dev->ethtool_ops->set_rxnfc)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_rxhash(dev, &cmd);
+	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
 
-static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr)
+static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret, rule_cnt;
+	void *rule_buf = NULL;
 
-	if (!dev->ethtool_ops->get_rxhash)
+	if (!ops->get_rxnfc || !ops->get_rxrule_cnt)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&info, useraddr, sizeof(info)))
 		return -EFAULT;
 
-	dev->ethtool_ops->get_rxhash(dev, &info);
+	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
+		rule_cnt = ops->get_rxrule_cnt(dev);
+		if (info.rule_cnt > rule_cnt)
+			info.rule_cnt = rule_cnt;
+		if (rule_cnt > 0) {
+			rule_buf = kmalloc(rule_cnt * sizeof(u32), GFP_USER);
+			if (!rule_buf)
+				return -ENOMEM;
+		}
+	}
+
+	ret = ops->get_rxnfc(dev, &info, rule_buf);
+	if (ret < 0)
+		goto err_out;
 
+	ret = -EFAULT;
 	if (copy_to_user(useraddr, &info, sizeof(info)))
-		return -EFAULT;
-	return 0;
+		goto err_out;
+
+	if (rule_buf) {
+		useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+		if (copy_to_user(useraddr, rule_buf,
+				 info.rule_cnt * sizeof(u32)))
+			goto err_out;
+	}
+	ret = 0;
+
+err_out:
+	if (rule_buf)
+		kfree(rule_buf);
+
+	return ret;
 }
 
 static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
@@ -857,6 +889,9 @@  int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GFLAGS:
 	case ETHTOOL_GPFLAGS:
 	case ETHTOOL_GRXFH:
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
 		break;
 	default:
 		if (!capable(CAP_NET_ADMIN))
@@ -1009,10 +1044,15 @@  int dev_ethtool(struct net *net, struct ifreq *ifr)
 				       dev->ethtool_ops->set_priv_flags);
 		break;
 	case ETHTOOL_GRXFH:
-		rc = ethtool_get_rxhash(dev, useraddr);
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
+		rc = ethtool_get_rxnfc(dev, useraddr);
 		break;
 	case ETHTOOL_SRXFH:
-		rc = ethtool_set_rxhash(dev, useraddr);
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+		rc = ethtool_set_rxnfc(dev, useraddr);
 		break;
 	default:
 		rc = -EOPNOTSUPP;
-- 
1.6.0.4