diff mbox

[net-next,09/10] net/mlx4_en: Manage flow steering rules with ethtool

Message ID 1341135823-29039-10-git-send-email-ogerlitz@mellanox.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Or Gerlitz July 1, 2012, 9:43 a.m. UTC
From: Hadar Hen Zion <hadarh@mellanox.co.il>

Implement the ethtool APIs for attaching L2/L3/L4 based flow steering
rules to the netdevice RX rings. Added set_rxnfc callback and enhanced
the existing get_rxnfc callback.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c |  373 +++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    |    7 +
 2 files changed, 380 insertions(+), 0 deletions(-)

Comments

David Miller July 1, 2012, 10:23 a.m. UTC | #1
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun,  1 Jul 2012 12:43:42 +0300

> +		en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n"
> +		       , cmd->fs.location, rule->id);

Put that first comma at the end of the first line, this is very ugly.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings July 1, 2012, 4 p.m. UTC | #2
On Sun, 2012-07-01 at 12:43 +0300, Or Gerlitz wrote:
> From: Hadar Hen Zion <hadarh@mellanox.co.il>
> 
> Implement the ethtool APIs for attaching L2/L3/L4 based flow steering
> rules to the netdevice RX rings. Added set_rxnfc callback and enhanced
> the existing get_rxnfc callback.
> 
> Signed-off-by: Hadar Hen Zion <hadarh@mellanox.co.il>
> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> ---
>  drivers/net/ethernet/mellanox/mlx4/en_ethtool.c |  373 +++++++++++++++++++++++
>  drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    |    7 +
>  2 files changed, 380 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
> index 72901ce..30de264 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
> @@ -38,6 +38,10 @@
>  #include "mlx4_en.h"
>  #include "en_port.h"
>  
> +#define EN_ETHTOOL_QP_ATTACH (1ull << 63)
> +#define EN_ETHTOOL_MAC_MASK 0xffffffffffffULL
> +#define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff)
> +#define EN_ETHTOOL_WORD_MASK  cpu_to_be32(0xffffffff)
>  
>  static void
>  mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
> @@ -599,16 +603,360 @@ static int mlx4_en_set_rxfh_indir(struct net_device *dev,
>  	return err;
>  }
>  
> +#define not_all_zeros_or_all_ones(field, type) \
> +	(field && (type)~field)
> +
> +static int mlx4_en_validate_flow(struct net_device *dev,
> +				 struct ethtool_rxnfc *cmd)
> +{
> +	struct ethtool_usrip4_spec *l3_mask;
> +	struct ethtool_tcpip4_spec *l4_mask;
> +	struct ethhdr *eth_mask;
> +	u64 full_mac = ~0ull;
> +	u64 zero_mac = 0;
> +
> +	if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
> +		return -EINVAL;
> +
> +	switch (cmd->fs.flow_type & ~FLOW_EXT) {
> +	case TCP_V4_FLOW:
> +	case UDP_V4_FLOW:
> +		if (cmd->fs.h_u.tcp_ip4_spec.tos)
> +			return -EOPNOTSUPP;

I suspect that your filter ignores TOS, rather than only matching TOS ==
0, so you should actually be checking the corresponding field in the
mask (fs.m_u).  The error code should be EINVAL.

> +		l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
> +		/* don't allow mask which isn't all 0 or 1 */
> +		if (not_all_zeros_or_all_ones(l4_mask->ip4src, __be32) ||
> +		    not_all_zeros_or_all_ones(l4_mask->ip4dst, __be32) ||
> +		    not_all_zeros_or_all_ones(l4_mask->psrc, __be16) ||
> +		    not_all_zeros_or_all_ones(l4_mask->pdst, __be16))
> +			return -EOPNOTSUPP;

Again, here and in many further instances, the error code should be
EINVAL.

> +		break;
> +	case IP_USER_FLOW:
> +		l3_mask = &cmd->fs.m_u.usr_ip4_spec;
> +		if (cmd->fs.h_u.usr_ip4_spec.l4_4_bytes ||
> +		    cmd->fs.h_u.usr_ip4_spec.tos ||

I think this should be checking l4_4_bytes and tos in the mask.

> +		    cmd->fs.h_u.usr_ip4_spec.proto ||
> +		    cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 ||
> +		    (!cmd->fs.h_u.usr_ip4_spec.ip4src &&
> +		     !cmd->fs.h_u.usr_ip4_spec.ip4dst) ||
> +		    not_all_zeros_or_all_ones(l3_mask->ip4src, __be32) ||
> +		    not_all_zeros_or_all_ones(l3_mask->ip4dst, __be32))
> +			return -EOPNOTSUPP;
> +		break;
> +	case ETHER_FLOW:
> +		eth_mask = &cmd->fs.m_u.ether_spec;
> +		if (memcmp(eth_mask->h_source, &zero_mac, ETH_ALEN))
> +			return -EOPNOTSUPP;
> +		if (!memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN))
> +			return -EOPNOTSUPP;

But in the next statement you test whether eth_mask->h_dest is either
all-zeroes or all-ones.  Is all-zeroes valid or not?  I suspect you
actually intend to reject the case where both h_dest and h_proto are
masked out.

> +		if (not_all_zeros_or_all_ones(eth_mask->h_proto, __be16) ||
> +		    (memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN) &&
> +		     memcmp(eth_mask->h_dest, &full_mac, ETH_ALEN)))
> +			return -EOPNOTSUPP;
> +		break;
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if ((cmd->fs.flow_type & FLOW_EXT)) {
> +		if (cmd->fs.m_ext.vlan_etype ||
> +		    not_all_zeros_or_all_ones(cmd->fs.m_ext.vlan_tci,
> +					       __be16)) {
> +			return -EOPNOTSUPP;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void add_ip_rule(struct mlx4_en_priv *priv,
> +			struct ethtool_rxnfc *cmd,
> +			struct list_head *list_h)
> +{
> +	struct mlx4_spec_list *spec_l3;
> +
> +	spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
> +	if (!spec_l3) {
> +		en_err(priv, "Fail to alloc ethtool rule.\n");
> +		return;
> +	}

This should return an error code as well - logging is not a substitue.

> +	spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
> +	spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src;
> +	if (spec_l3->ipv4.src_ip)
> +		spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
> +	spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst;
> +	if (spec_l3->ipv4.dst_ip)
> +		spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;

The conditions should be using the mask (cmd->fs.m_u) not the value.

> +	list_add_tail(&spec_l3->list, list_h);
> +}
> +
> +static void add_tcp_udp_rule(struct mlx4_en_priv *priv,
> +			     struct ethtool_rxnfc *cmd,
> +			     struct list_head *list_h, int proto)
> +{
> +	struct mlx4_spec_list *spec_l3;
> +	struct mlx4_spec_list *spec_l4;
> +
> +	spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
> +	spec_l4 = kzalloc(sizeof *spec_l4, GFP_KERNEL);
> +	if (!spec_l4 || !spec_l3) {
> +		en_err(priv, "Fail to alloc ethtool rule.\n");

If one of them was successfully allocated, it will now be leaked.

> +		return;
> +	}
> +
> +	spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
> +
> +	if (proto == TCP_V4_FLOW) {
> +		spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP;
> +		spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src;
> +		spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst;
> +		spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc;
> +		spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst;
> +	} else {
> +		spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP;
> +		spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src;
> +		spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst;
> +		spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc;
> +		spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst;
> +	}
> +
> +	if (spec_l3->ipv4.src_ip)
> +		spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
> +	if (spec_l3->ipv4.dst_ip)
> +		spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
> +
> +	if (spec_l4->tcp_udp.src_port)
> +		spec_l4->tcp_udp.src_port_msk = EN_ETHTOOL_SHORT_MASK;
> +	if (spec_l4->tcp_udp.dst_port)
> +		spec_l4->tcp_udp.dst_port_msk = EN_ETHTOOL_SHORT_MASK;

All these conditions should be using the mask, not the value.

> +	list_add_tail(&spec_l3->list, list_h);
> +	list_add_tail(&spec_l4->list, list_h);
> +}
> +
> +static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev,
> +					     struct ethtool_rxnfc *cmd,
> +					     struct list_head *rule_list_h)
> +{
> +	int err;
> +	u64 mac;
> +	__be64 be_mac;
> +	struct ethhdr *eth_spec;
> +	struct mlx4_en_priv *priv = netdev_priv(dev);
> +	struct mlx4_spec_list *spec_l2;
> +	__be64 mac_msk = cpu_to_be64(EN_ETHTOOL_MAC_MASK << 16);
> +
> +	err = mlx4_en_validate_flow(dev, cmd);
> +	if (err)
> +		return err;
> +
> +	spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL);
> +	if (!spec_l2)
> +		return -ENOMEM;
> +
> +	mac = priv->mac & EN_ETHTOOL_MAC_MASK;
> +	be_mac = cpu_to_be64(mac << 16);
> +
> +	spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
> +	memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
> +	if ((cmd->fs.flow_type & ~FLOW_EXT) != ETHER_FLOW)
> +		memcpy(spec_l2->eth.dst_mac, &be_mac, ETH_ALEN);

Does the hardware require filtering by MAC address and not only by layer
3/4 addresses?

> +	if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) {
> +		spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci;
> +		spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff);

This doesn't match mlx4_en_validate_flow(); you are replacing a mask of
0xffff with 0xfff.

> +	}
> +
> +	list_add_tail(&spec_l2->list, rule_list_h);
> +
> +	switch (cmd->fs.flow_type & ~FLOW_EXT) {
> +	case ETHER_FLOW:
> +		eth_spec = &cmd->fs.h_u.ether_spec;
> +		memcpy(&spec_l2->eth.dst_mac, eth_spec->h_dest, ETH_ALEN);
> +		spec_l2->eth.ether_type = eth_spec->h_proto;
> +		if (eth_spec->h_proto)
> +			spec_l2->eth.ether_type_enable = 1;
> +		break;
> +	case IP_USER_FLOW:
> +		add_ip_rule(priv, cmd, rule_list_h);
> +		break;
> +	case TCP_V4_FLOW:
> +		add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW);
> +		break;
> +	case UDP_V4_FLOW:
> +		add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW);
> +		break;

All those functions need to be able to return errors.

> +	}
> +	return 0;
> +}
> +
> +static int mlx4_en_flow_replace(struct net_device *dev,
> +				struct ethtool_rxnfc *cmd)
> +{
> +	int err;
> +	struct mlx4_en_priv *priv = netdev_priv(dev);
> +	struct ethtool_flow_id *loc_rule;
> +	struct mlx4_spec_list *spec, *tmp_spec;
> +	u32 qpn;
> +	u64 reg_id;
> +
> +	struct mlx4_net_trans_rule rule = {
> +		.queue_mode = MLX4_NET_TRANS_Q_FIFO,
> +		.exclusive = 0,
> +		.allow_loopback = 1,
> +		.promisc_mode = MLX4_FS_PROMISC_NONE,
> +	};
> +
> +	rule.port = priv->port;
> +	rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location;
> +	INIT_LIST_HEAD(&rule.list);
> +
> +	/* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */
> +	if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC)
> +		return -EOPNOTSUPP;

EINVAL.

[...]
>  static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
>  			     u32 *rule_locs)
>  {
>  	struct mlx4_en_priv *priv = netdev_priv(dev);
> +	struct mlx4_en_dev *mdev = priv->mdev;
>  	int err = 0;
> +	int i = 0, priority = 0;
> +
> +	if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
> +		return -EOPNOTSUPP;
>  
>  	switch (cmd->cmd) {
>  	case ETHTOOL_GRXRINGS:
>  		cmd->data = priv->rx_ring_num;
>  		break;
> +	case ETHTOOL_GRXCLSRLCNT:
> +		cmd->rule_cnt = mlx4_en_get_num_flows(priv);
> +		break;
> +	case ETHTOOL_GRXCLSRULE:
> +		err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
> +		break;
> +	case ETHTOOL_GRXCLSRLALL:
> +		while (!err || err == -ENOENT) {
> +			err = mlx4_en_get_flow(dev, cmd, i);
> +			if (!err)
> +				((u32 *)(rule_locs))[priority++] = i;

I don't see any range check against cmd->rule_cnt.

Why are you casting rule_locs?

Also, are the rules really prioritised by location, so that if rule 0
and 1 match a packet then only rule 0 is applied?  If they are actually
prioritised by the match type then you need to assign locations on the
driver side that reflect the actual priorities.

> +			i++;
> +		}
> +		if (priority)
> +			err = 0;
[...]

But if there are no rules defined, this is an error?  That's not right.
I think you should unconditionally set err = 0 here.

Ben.
Joe Perches July 1, 2012, 4:38 p.m. UTC | #3
On Sun, 2012-07-01 at 17:00 +0100, Ben Hutchings wrote:
> On Sun, 2012-07-01 at 12:43 +0300, Or Gerlitz wrote:
> > From: Hadar Hen Zion <hadarh@mellanox.co.il>
[]
> > diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
[]
> > @@ -599,16 +603,360 @@ static int mlx4_en_set_rxfh_indir(struct net_device *dev,
> >  	return err;
> >  }
> >  
> > +#define not_all_zeros_or_all_ones(field, type) \
> > +	(field && (type)~field)

I think this macro is suboptimal because
negated names are easy to misuse.

I think type is also unnecessary and too
easy to mismatch or keep up to date with
field type changes.

Perhaps it's better as:

#define all_zeros_or_all_ones(field)		\
({						\
	field && (typeof(field))~field;		\
})

> > +
> > +static int mlx4_en_validate_flow(struct net_device *dev,
> > +				 struct ethtool_rxnfc *cmd)
> > +{
[]
> > +		/* don't allow mask which isn't all 0 or 1 */
> > +		if (not_all_zeros_or_all_ones(l4_mask->ip4src, __be32) ||
> > +		    not_all_zeros_or_all_ones(l4_mask->ip4dst, __be32) ||
> > +		    not_all_zeros_or_all_ones(l4_mask->psrc, __be16) ||
> > +		    not_all_zeros_or_all_ones(l4_mask->pdst, __be16))
> > +			return -EOPNOTSUPP;

		if (!all_zeros_or_all_ones(l4_mask->ip4src) ||
		    !all_zeros_or_all_ones(l4_mask->ip4dst) ||
		    !all_zeros_or_all_ones(l4_mask->psrc) ||
		    !all_zeros_or_all_ones(l4_mask->pdst))


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joe Perches July 1, 2012, 5:31 p.m. UTC | #4
On Sun, 2012-07-01 at 09:38 -0700, Joe Perches wrote:

> I think this macro is suboptimal because
> negated names are easy to misuse.
> 
> I think type is also unnecessary and too
> easy to mismatch or keep up to date with
> field type changes.
> 
> Perhaps it's better as:
> 
> #define all_zeros_or_all_ones(field)		\
> ({						\
> 	field && (typeof(field))~field;		\
> })

Umm, or not.

It helps when I actually test the code not just type
it into an email client.

	!(field && (typeof(field))~field)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Schwab July 1, 2012, 6:48 p.m. UTC | #5
Joe Perches <joe@perches.com> writes:

> On Sun, 2012-07-01 at 09:38 -0700, Joe Perches wrote:
>
>> I think this macro is suboptimal because
>> negated names are easy to misuse.
>> 
>> I think type is also unnecessary and too
>> easy to mismatch or keep up to date with
>> field type changes.
>> 
>> Perhaps it's better as:
>> 
>> #define all_zeros_or_all_ones(field)		\
>> ({						\
>> 	field && (typeof(field))~field;		\
>> })
>
> Umm, or not.
>
> It helps when I actually test the code not just type
> it into an email client.
>
> 	!(field && (typeof(field))~field)

Or write it as (!field || !(typeof(field))~field) which more closely
resembles what the macro name expresses.

Andreas.
Joe Perches July 1, 2012, 7:52 p.m. UTC | #6
On Sun, 2012-07-01 at 20:48 +0200, Andreas Schwab wrote:
> Joe Perches <joe@perches.com> writes:
> > On Sun, 2012-07-01 at 09:38 -0700, Joe Perches wrote:
> >> Perhaps it's better as:
> >> 
> >> #define all_zeros_or_all_ones(field)		\
> >> ({						\
> >> 	field && (typeof(field))~field;		\
> >> })
> >
> > Umm, or not.
> >
> > It helps when I actually test the code not just type
> > it into an email client.
> >
> > 	!(field && (typeof(field))~field)
> 
> Or write it as (!field || !(typeof(field))~field) which more closely
> resembles what the macro name expresses.

Better still, or maybe:

	field == 0 || field == (typeof field)~0


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Laight July 2, 2012, 10:19 a.m. UTC | #7
> > Or write it as (!field || !(typeof(field))~field) which more closely
> > resembles what the macro name expresses.
> 
> Better still, or maybe:
> 
> 	field == 0 || field == (typeof field)~0

Which doesn't work when sizeof(field) > sizeof(int).
Needs another cast.

	field == 0 || field == (typeof field)~(typeof field)0

	David


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Schwab July 2, 2012, 11:35 a.m. UTC | #8
"David Laight" <David.Laight@ACULAB.COM> writes:

>  
>> > Or write it as (!field || !(typeof(field))~field) which more closely
>> > resembles what the macro name expresses.
>> 
>> Better still, or maybe:
>> 
>> 	field == 0 || field == (typeof field)~0
>
> Which doesn't work when sizeof(field) > sizeof(int).
> Needs another cast.
>
> 	field == 0 || field == (typeof field)~(typeof field)0

You can avoid that by using (typeof field)-1.

Andreas.
David Laight July 2, 2012, 12:15 p.m. UTC | #9
> "David Laight" <David.Laight@ACULAB.COM> writes:
> 
> >  
> >> > Or write it as (!field || !(typeof(field))~field) which more
closely
> >> > resembles what the macro name expresses.
> >> 
> >> Better still, or maybe:
> >> 
> >> 	field == 0 || field == (typeof field)~0
> >
> > Which doesn't work when sizeof(field) > sizeof(int).
> > Needs another cast.
> >
> > 	field == 0 || field == (typeof field)~(typeof field)0
> 
> You can avoid that by using (typeof field)-1.

Gah, I thought I knew the integral promotion rules!
-1 and ~0 are both 'integer' and get treated the same.

A quick test shows that gcc does sign extend when converting
32bit int to 64bit unsigned long long.
Which probably means that is required by the standard!

	David


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz July 2, 2012, 12:57 p.m. UTC | #10
On 7/1/2012 7:00 PM, Ben Hutchings wrote:
[...]

Hi Ben,

Thanks for the detailed feedback, see below some responses


> +		l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
> +		/* don't allow mask which isn't all 0 or 1 */
> +		if (not_all_zeros_or_all_ones(l4_mask->ip4src, __be32) ||
> +		    not_all_zeros_or_all_ones(l4_mask->ip4dst, __be32) ||
> +		    not_all_zeros_or_all_ones(l4_mask->psrc, __be16) ||
> +		    not_all_zeros_or_all_ones(l4_mask->pdst, __be16))
> +			return -EOPNOTSUPP;
>
> Again, here and in many further instances, the error code should be EINVAL.


OK, will fix to use EINVAL instead of EOPNOTSUPP here and else-where needed.


> +
> +static void add_ip_rule(struct mlx4_en_priv *priv,
> +			struct ethtool_rxnfc *cmd,
> +			struct list_head *list_h)
> +{
> +	struct mlx4_spec_list *spec_l3;
> +
> +	spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
> +	if (!spec_l3) {
> +		en_err(priv, "Fail to alloc ethtool rule.\n");
> +		return;
> +	}
>
> This should return an error code as well - logging is not a substitue.

OK, will do.

>
>
>> +	spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
>> +	spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src;
>> +	if (spec_l3->ipv4.src_ip)
>> +		spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
>> +	spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst;
>> +	if (spec_l3->ipv4.dst_ip)
>> +		spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
>
> The conditions should be using the mask (cmd->fs.m_u) not the value.

I'd like to clarify things here a bit - the way the code is written, is to

1st make sure that we can deal with the mask provided by the user in the 
ethtool
command, e.g its all zeroes or all ones (leaving a side for a minute the 
other
discussion on how would be best to impl. that check...) - this check is 
done
in mlx4_en_validate_flow

2nd initialize mlx4 flow spec which is all empty - see calls to kzalloc 
spec_l2/l3/l4

3rd import the non-zero values (not masks) from the ethtool command into 
the mlx4
flow specs, with FULL mask


Under this logic, we can use the values and not the masks, isn't that?



>
>
>> +static void add_tcp_udp_rule(struct mlx4_en_priv *priv,
>> +			     struct ethtool_rxnfc *cmd,
>> +			     struct list_head *list_h, int proto)
>> +{
>> +	struct mlx4_spec_list *spec_l3;
>> +	struct mlx4_spec_list *spec_l4;
>> +
>> +	spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
>> +	spec_l4 = kzalloc(sizeof *spec_l4, GFP_KERNEL);
>> +	if (!spec_l4 || !spec_l3) {
>> +		en_err(priv, "Fail to alloc ethtool rule.\n");
>
> If one of them was successfully allocated, it will now be leaked.

THANKS, will fix.

>> +static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev,
>> +					     struct ethtool_rxnfc *cmd,
>> +					     struct list_head *rule_list_h)
>> +{
>> +	int err;
>> +	u64 mac;
>> +	__be64 be_mac;
>> +	struct ethhdr *eth_spec;
>> +	struct mlx4_en_priv *priv = netdev_priv(dev);
>> +	struct mlx4_spec_list *spec_l2;
>> +	__be64 mac_msk = cpu_to_be64(EN_ETHTOOL_MAC_MASK << 16);
>> +
>> +	err = mlx4_en_validate_flow(dev, cmd);
>> +	if (err)
>> +		return err;
>> +
>> +	spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL);
>> +	if (!spec_l2)
>> +		return -ENOMEM;
>> +
>> +	mac = priv->mac & EN_ETHTOOL_MAC_MASK;
>> +	be_mac = cpu_to_be64(mac << 16);
>> +
>> +	spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
>> +	memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
>> +	if ((cmd->fs.flow_type & ~FLOW_EXT) != ETHER_FLOW)
>> +		memcpy(spec_l2->eth.dst_mac, &be_mac, ETH_ALEN);
>
> Does the hardware require filtering by MAC address and not only by layer 3/4 addresses?

YES

>
>
>> +	if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) {
>> +		spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci;
>> +		spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff);
>
> This doesn't match mlx4_en_validate_flow(); you are replacing a mask of
> 0xffff with 0xfff.

I need to check how exactly this should be done here, vlan ID is only 12 
bits in size, so this is
probably the source for the 0xfff vs 0xffff


>
>
>> +	switch (cmd->fs.flow_type & ~FLOW_EXT) {
>> +	case ETHER_FLOW:
>> +		eth_spec = &cmd->fs.h_u.ether_spec;
>> +		memcpy(&spec_l2->eth.dst_mac, eth_spec->h_dest, ETH_ALEN);
>> +		spec_l2->eth.ether_type = eth_spec->h_proto;
>> +		if (eth_spec->h_proto)
>> +			spec_l2->eth.ether_type_enable = 1;
>> +		break;
>> +	case IP_USER_FLOW:
>> +		add_ip_rule(priv, cmd, rule_list_h);
>> +		break;
>> +	case TCP_V4_FLOW:
>> +		add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW);
>> +		break;
>> +	case UDP_V4_FLOW:
>> +		add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW);
>> +		break;
>
> All those functions need to be able to return errors.


OK

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz July 2, 2012, 1:32 p.m. UTC | #11
On 7/1/2012 7:00 PM, Ben Hutchings wrote:
>> static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
>> >  			     u32 *rule_locs)
>> >  {
>> >  	struct mlx4_en_priv *priv = netdev_priv(dev);
>> >+	struct mlx4_en_dev *mdev = priv->mdev;
>> >  	int err = 0;
>> >+	int i = 0, priority = 0;
>> >+
>> >+	if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
>> >+		return -EOPNOTSUPP;
>> >  
>> >  	switch (cmd->cmd) {
>> >  	case ETHTOOL_GRXRINGS:
>> >  		cmd->data = priv->rx_ring_num;
>> >  		break;
>> >+	case ETHTOOL_GRXCLSRLCNT:
>> >+		cmd->rule_cnt = mlx4_en_get_num_flows(priv);
>> >+		break;
>> >+	case ETHTOOL_GRXCLSRULE:
>> >+		err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
>> >+		break;
>> >+	case ETHTOOL_GRXCLSRLALL:
>> >+		while (!err || err == -ENOENT) {
>> >+			err = mlx4_en_get_flow(dev, cmd, i);
>> >+			if (!err)
>> >+				((u32 *)(rule_locs))[priority++] = i;
> I don't see any range check against cmd->rule_cnt.

OK, will fix to make sure we don't cross cmd->rule_cnt

>
>
> Why are you casting rule_locs?

doesn't seem to be needed, will remove that casting

>
>
> Also, are the rules really prioritised by location, so that if rule 0
> and 1 match a packet then only rule 0 is applied?  If they are actually
> prioritised by the match type then you need to assign locations on the
> driver side that reflect the actual priorities.


Rules are prioritized by a scheme made of "domain" X location, see below 
and in patch #6
the MLX4_DOMAIN_yyy entries, higher domain value means lower priority, 
so for instance rules
placed by ethtool would have higher priority over rules added by the L2 
NIC  or by future RFS
patch. Within a domain, the location matters.

You can see that simple L2 rules (e.g contain dest-mac, possibly vlan) 
added by the driver
use the NIC domain, wheres those added to serve ethtool commands use the 
ETHTOOL one.

Within the ethtool domain, we maintain the priority as the location 
specified by the user, hope this explains
things.

> +enum {
> +	MLX4_DOMAIN_UVERBS	= 0x1000,
> +	MLX4_DOMAIN_ETHTOOL     = 0x2000,
> +	MLX4_DOMAIN_RFS         = 0x3000,
> +	MLX4_DOMAIN_NIC    = 0x5000,
> +};

>> >+			i++;
>> >+		}
>> >+		if (priority)
>> >+			err = 0;
> [...]
>
> But if there are no rules defined, this is an error?  That's not right.
> I think you should unconditionally set err = 0 here.

OK, will do.

Or.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings July 3, 2012, 1:47 a.m. UTC | #12
On Mon, 2012-07-02 at 15:57 +0300, Or Gerlitz wrote:
> On 7/1/2012 7:00 PM, Ben Hutchings wrote:
[...]
> >> +	spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
> >> +	spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src;
> >> +	if (spec_l3->ipv4.src_ip)
> >> +		spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
> >> +	spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst;
> >> +	if (spec_l3->ipv4.dst_ip)
> >> +		spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
> >
> > The conditions should be using the mask (cmd->fs.m_u) not the value.
> 
> I'd like to clarify things here a bit - the way the code is written, is to
> 
> 1st make sure that we can deal with the mask provided by the user in the 
> ethtool
> command, e.g its all zeroes or all ones (leaving a side for a minute the 
> other
> discussion on how would be best to impl. that check...) - this check is 
> done
> in mlx4_en_validate_flow
> 
> 2nd initialize mlx4 flow spec which is all empty - see calls to kzalloc 
> spec_l2/l3/l4
> 
> 3rd import the non-zero values (not masks) from the ethtool command into 
> the mlx4
> flow specs, with FULL mask
> 
> 
> Under this logic, we can use the values and not the masks, isn't that?

No, it's perfectly valid to specify a filter that matches, for example,
a destination IP address of 0.0.0.0 with mask of 255.255.255.255.  So
you really need to check the mask.  If your filter hardware doesn't
support zero values for some fields then you'll need to reject them in
mlx4_en_validate_flow.

[...]
> >> +static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev,
> >> +					     struct ethtool_rxnfc *cmd,
> >> +					     struct list_head *rule_list_h)
> >> +{
> >> +	int err;
> >> +	u64 mac;
> >> +	__be64 be_mac;
> >> +	struct ethhdr *eth_spec;
> >> +	struct mlx4_en_priv *priv = netdev_priv(dev);
> >> +	struct mlx4_spec_list *spec_l2;
> >> +	__be64 mac_msk = cpu_to_be64(EN_ETHTOOL_MAC_MASK << 16);
> >> +
> >> +	err = mlx4_en_validate_flow(dev, cmd);
> >> +	if (err)
> >> +		return err;
> >> +
> >> +	spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL);
> >> +	if (!spec_l2)
> >> +		return -ENOMEM;
> >> +
> >> +	mac = priv->mac & EN_ETHTOOL_MAC_MASK;
> >> +	be_mac = cpu_to_be64(mac << 16);
> >> +
> >> +	spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
> >> +	memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
> >> +	if ((cmd->fs.flow_type & ~FLOW_EXT) != ETHER_FLOW)
> >> +		memcpy(spec_l2->eth.dst_mac, &be_mac, ETH_ALEN);
> >
> > Does the hardware require filtering by MAC address and not only by layer 3/4 addresses?
> 
> YES

That's a pity.  Maybe the API should be extended so the driver can at
least report that the filter is narrower than requested.

> >> +	if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) {
> >> +		spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci;
> >> +		spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff);
> >
> > This doesn't match mlx4_en_validate_flow(); you are replacing a mask of
> > 0xffff with 0xfff.
> 
> I need to check how exactly this should be done here, vlan ID is only 12 
> bits in size, so this is
> probably the source for the 0xfff vs 0xffff
[...]

If the hardware can only match the VID then you need to validate that
the mask is either 0 or 0xfff instead of 0 or 0xffff.

Ben.
Ben Hutchings July 3, 2012, 1:50 a.m. UTC | #13
On Mon, 2012-07-02 at 16:32 +0300, Or Gerlitz wrote:
[...]
> > Also, are the rules really prioritised by location, so that if rule 0
> > and 1 match a packet then only rule 0 is applied?  If they are actually
> > prioritised by the match type then you need to assign locations on the
> > driver side that reflect the actual priorities.
> 
> 
> Rules are prioritized by a scheme made of "domain" X location, see below 
> and in patch #6
> the MLX4_DOMAIN_yyy entries, higher domain value means lower priority, 
> so for instance rules
> placed by ethtool would have higher priority over rules added by the L2 
> NIC  or by future RFS
> patch. Within a domain, the location matters.
> 
> You can see that simple L2 rules (e.g contain dest-mac, possibly vlan) 
> added by the driver
> use the NIC domain, wheres those added to serve ethtool commands use the 
> ETHTOOL one.
> 
> Within the ethtool domain, we maintain the priority as the location 
> specified by the user, hope this explains
> things.
[...]

Good, I didn't read the other patches but just wanted to make sure you
had thought about this.

Ben
Or Gerlitz July 3, 2012, 8:14 a.m. UTC | #14
On 7/2/2012 2:35 PM, Andreas Schwab wrote:
>
> 	field == 0 || field == (typeof field)~(typeof field)0
> You can avoid that by using (typeof field)-1.
>

OK, thanks everybody, we will take that path.

Or.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz July 3, 2012, 8:56 a.m. UTC | #15
On 7/3/2012 4:47 AM, Ben Hutchings wrote:
> If the hardware can only match the VID then you need to validate that
> the mask is either 0 or 0xfff instead of 0 or 0xffff.

sure, will fix

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz July 3, 2012, 8:58 a.m. UTC | #16
On 7/3/2012 4:47 AM, Ben Hutchings wrote:
>> Under this logic, we can use the values and not the masks, isn't that?
> No, it's perfectly valid to specify a filter that matches, for example,
> a destination IP address of 0.0.0.0 with mask of 255.255.255.255.  So
> you really need to check the mask.  If your filter hardware doesn't
> support zero values for some fields then you'll need to reject them in
> mlx4_en_validate_flow.

Got it, will change to use masks all over the place, as you pointed out 
we need to do.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz July 3, 2012, 9 a.m. UTC | #17
On 7/1/2012 7:00 PM, Ben Hutchings wrote:
>> +#define not_all_zeros_or_all_ones(field, type) \
>> >+	(field && (type)~field)
>> >+
>> >+static int mlx4_en_validate_flow(struct net_device *dev,
>> >+				 struct ethtool_rxnfc *cmd)
>> >+{
>> >+	struct ethtool_usrip4_spec *l3_mask;
>> >+	struct ethtool_tcpip4_spec *l4_mask;
>> >+	struct ethhdr *eth_mask;
>> >+	u64 full_mac = ~0ull;
>> >+	u64 zero_mac = 0;
>> >+
>> >+	if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
>> >+		return -EINVAL;
>> >+
>> >+	switch (cmd->fs.flow_type & ~FLOW_EXT) {
>> >+	case TCP_V4_FLOW:
>> >+	case UDP_V4_FLOW:
>> >+		if (cmd->fs.h_u.tcp_ip4_spec.tos)
>> >+			return -EOPNOTSUPP;
> I suspect that your filter ignores TOS, rather than only matching TOS ==
> 0, so you should actually be checking the corresponding field in the
> mask (fs.m_u). [...]

OK, thanks for pointing this over, will fix.

> >+		break;
> >+	case IP_USER_FLOW:
> >+		l3_mask = &cmd->fs.m_u.usr_ip4_spec;
> >+		if (cmd->fs.h_u.usr_ip4_spec.l4_4_bytes ||
> >+		    cmd->fs.h_u.usr_ip4_spec.tos ||
> I think this should be checking l4_4_bytes and tos in the mask.

OK

>
>> >+		    cmd->fs.h_u.usr_ip4_spec.proto ||
>> >+		    cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 ||
>> >+		    (!cmd->fs.h_u.usr_ip4_spec.ip4src &&
>> >+		     !cmd->fs.h_u.usr_ip4_spec.ip4dst) ||
>> >+		    not_all_zeros_or_all_ones(l3_mask->ip4src, __be32) ||
>> >+		    not_all_zeros_or_all_ones(l3_mask->ip4dst, __be32))
>> >+			return -EOPNOTSUPP;
>> >+		break;
>> >+	case ETHER_FLOW:
>> >+		eth_mask = &cmd->fs.m_u.ether_spec;
>> >+		if (memcmp(eth_mask->h_source, &zero_mac, ETH_ALEN))
>> >+			return -EOPNOTSUPP;
>> >+		if (!memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN))
>> >+			return -EOPNOTSUPP;
> But in the next statement you test whether eth_mask->h_dest is either
> all-zeroes or all-ones.  Is all-zeroes valid or not?  I suspect you
> actually intend to reject the case where both h_dest and h_proto are masked out.

indeed, this code section can be better written, will fix for V1


>
>> >+		if (not_all_zeros_or_all_ones(eth_mask->h_proto, __be16) ||
>> >+		    (memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN) &&
>> >+		     memcmp(eth_mask->h_dest, &full_mac, ETH_ALEN)))
>> >+			return -EOPNOTSUPP;
>> >+		break;
>> >+	default:
>> >+		return -EOPNOTSUPP;
>> >+	}
>> >+
>> >+	if ((cmd->fs.flow_type & FLOW_EXT)) {
>> >+		if (cmd->fs.m_ext.vlan_etype ||
>> >+		    not_all_zeros_or_all_ones(cmd->fs.m_ext.vlan_tci,
>> >+					       __be16)) {
>> >+			return -EOPNOTSUPP;
>> >+		}
>> >+	}
>> >+
>> >+	return 0;
>> >+}


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 72901ce..30de264 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -38,6 +38,10 @@ 
 #include "mlx4_en.h"
 #include "en_port.h"
 
+#define EN_ETHTOOL_QP_ATTACH (1ull << 63)
+#define EN_ETHTOOL_MAC_MASK 0xffffffffffffULL
+#define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff)
+#define EN_ETHTOOL_WORD_MASK  cpu_to_be32(0xffffffff)
 
 static void
 mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
@@ -599,16 +603,360 @@  static int mlx4_en_set_rxfh_indir(struct net_device *dev,
 	return err;
 }
 
+#define not_all_zeros_or_all_ones(field, type) \
+	(field && (type)~field)
+
+static int mlx4_en_validate_flow(struct net_device *dev,
+				 struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_usrip4_spec *l3_mask;
+	struct ethtool_tcpip4_spec *l4_mask;
+	struct ethhdr *eth_mask;
+	u64 full_mac = ~0ull;
+	u64 zero_mac = 0;
+
+	if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
+		return -EINVAL;
+
+	switch (cmd->fs.flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		if (cmd->fs.h_u.tcp_ip4_spec.tos)
+			return -EOPNOTSUPP;
+		l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
+		/* don't allow mask which isn't all 0 or 1 */
+		if (not_all_zeros_or_all_ones(l4_mask->ip4src, __be32) ||
+		    not_all_zeros_or_all_ones(l4_mask->ip4dst, __be32) ||
+		    not_all_zeros_or_all_ones(l4_mask->psrc, __be16) ||
+		    not_all_zeros_or_all_ones(l4_mask->pdst, __be16))
+			return -EOPNOTSUPP;
+		break;
+	case IP_USER_FLOW:
+		l3_mask = &cmd->fs.m_u.usr_ip4_spec;
+		if (cmd->fs.h_u.usr_ip4_spec.l4_4_bytes ||
+		    cmd->fs.h_u.usr_ip4_spec.tos ||
+		    cmd->fs.h_u.usr_ip4_spec.proto ||
+		    cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 ||
+		    (!cmd->fs.h_u.usr_ip4_spec.ip4src &&
+		     !cmd->fs.h_u.usr_ip4_spec.ip4dst) ||
+		    not_all_zeros_or_all_ones(l3_mask->ip4src, __be32) ||
+		    not_all_zeros_or_all_ones(l3_mask->ip4dst, __be32))
+			return -EOPNOTSUPP;
+		break;
+	case ETHER_FLOW:
+		eth_mask = &cmd->fs.m_u.ether_spec;
+		if (memcmp(eth_mask->h_source, &zero_mac, ETH_ALEN))
+			return -EOPNOTSUPP;
+		if (!memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN))
+			return -EOPNOTSUPP;
+		if (not_all_zeros_or_all_ones(eth_mask->h_proto, __be16) ||
+		    (memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN) &&
+		     memcmp(eth_mask->h_dest, &full_mac, ETH_ALEN)))
+			return -EOPNOTSUPP;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if ((cmd->fs.flow_type & FLOW_EXT)) {
+		if (cmd->fs.m_ext.vlan_etype ||
+		    not_all_zeros_or_all_ones(cmd->fs.m_ext.vlan_tci,
+					       __be16)) {
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static void add_ip_rule(struct mlx4_en_priv *priv,
+			struct ethtool_rxnfc *cmd,
+			struct list_head *list_h)
+{
+	struct mlx4_spec_list *spec_l3;
+
+	spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
+	if (!spec_l3) {
+		en_err(priv, "Fail to alloc ethtool rule.\n");
+		return;
+	}
+
+	spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+	spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src;
+	if (spec_l3->ipv4.src_ip)
+		spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
+	spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst;
+	if (spec_l3->ipv4.dst_ip)
+		spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
+	list_add_tail(&spec_l3->list, list_h);
+}
+
+static void add_tcp_udp_rule(struct mlx4_en_priv *priv,
+			     struct ethtool_rxnfc *cmd,
+			     struct list_head *list_h, int proto)
+{
+	struct mlx4_spec_list *spec_l3;
+	struct mlx4_spec_list *spec_l4;
+
+	spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
+	spec_l4 = kzalloc(sizeof *spec_l4, GFP_KERNEL);
+	if (!spec_l4 || !spec_l3) {
+		en_err(priv, "Fail to alloc ethtool rule.\n");
+		return;
+	}
+
+	spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+
+	if (proto == TCP_V4_FLOW) {
+		spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP;
+		spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src;
+		spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst;
+		spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc;
+		spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst;
+	} else {
+		spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP;
+		spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src;
+		spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst;
+		spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc;
+		spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst;
+	}
+
+	if (spec_l3->ipv4.src_ip)
+		spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
+	if (spec_l3->ipv4.dst_ip)
+		spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
+
+	if (spec_l4->tcp_udp.src_port)
+		spec_l4->tcp_udp.src_port_msk = EN_ETHTOOL_SHORT_MASK;
+	if (spec_l4->tcp_udp.dst_port)
+		spec_l4->tcp_udp.dst_port_msk = EN_ETHTOOL_SHORT_MASK;
+
+	list_add_tail(&spec_l3->list, list_h);
+	list_add_tail(&spec_l4->list, list_h);
+}
+
+static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev,
+					     struct ethtool_rxnfc *cmd,
+					     struct list_head *rule_list_h)
+{
+	int err;
+	u64 mac;
+	__be64 be_mac;
+	struct ethhdr *eth_spec;
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_spec_list *spec_l2;
+	__be64 mac_msk = cpu_to_be64(EN_ETHTOOL_MAC_MASK << 16);
+
+	err = mlx4_en_validate_flow(dev, cmd);
+	if (err)
+		return err;
+
+	spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL);
+	if (!spec_l2)
+		return -ENOMEM;
+
+	mac = priv->mac & EN_ETHTOOL_MAC_MASK;
+	be_mac = cpu_to_be64(mac << 16);
+
+	spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
+	memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
+	if ((cmd->fs.flow_type & ~FLOW_EXT) != ETHER_FLOW)
+		memcpy(spec_l2->eth.dst_mac, &be_mac, ETH_ALEN);
+
+	if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) {
+		spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci;
+		spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff);
+	}
+
+	list_add_tail(&spec_l2->list, rule_list_h);
+
+	switch (cmd->fs.flow_type & ~FLOW_EXT) {
+	case ETHER_FLOW:
+		eth_spec = &cmd->fs.h_u.ether_spec;
+		memcpy(&spec_l2->eth.dst_mac, eth_spec->h_dest, ETH_ALEN);
+		spec_l2->eth.ether_type = eth_spec->h_proto;
+		if (eth_spec->h_proto)
+			spec_l2->eth.ether_type_enable = 1;
+		break;
+	case IP_USER_FLOW:
+		add_ip_rule(priv, cmd, rule_list_h);
+		break;
+	case TCP_V4_FLOW:
+		add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW);
+		break;
+	case UDP_V4_FLOW:
+		add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW);
+		break;
+	}
+	return 0;
+}
+
+static int mlx4_en_flow_replace(struct net_device *dev,
+				struct ethtool_rxnfc *cmd)
+{
+	int err;
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct ethtool_flow_id *loc_rule;
+	struct mlx4_spec_list *spec, *tmp_spec;
+	u32 qpn;
+	u64 reg_id;
+
+	struct mlx4_net_trans_rule rule = {
+		.queue_mode = MLX4_NET_TRANS_Q_FIFO,
+		.exclusive = 0,
+		.allow_loopback = 1,
+		.promisc_mode = MLX4_FS_PROMISC_NONE,
+	};
+
+	rule.port = priv->port;
+	rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location;
+	INIT_LIST_HEAD(&rule.list);
+
+	/* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */
+	if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC)
+		return -EOPNOTSUPP;
+	else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) {
+		qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1);
+	} else {
+		if (cmd->fs.ring_cookie >= priv->rx_ring_num) {
+			en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist.\n",
+				cmd->fs.ring_cookie);
+			return -EINVAL;
+		}
+		qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn;
+		if (!qpn) {
+			en_warn(priv, "rxnfc: RX ring (%llu) is inactive.\n",
+				cmd->fs.ring_cookie);
+			return -EINVAL;
+		}
+	}
+	rule.qpn = qpn;
+	err = mlx4_en_ethtool_to_net_trans_rule(dev, cmd, &rule.list);
+	if (err)
+		goto out_free_list;
+
+	loc_rule = &priv->ethtool_rules[cmd->fs.location];
+	if (loc_rule->id) {
+		err = mlx4_flow_detach(priv->mdev->dev, loc_rule->id);
+		if (err) {
+			en_err(priv, "Fail to detach network rule at location %d. registration id = %llx\n"
+			       , cmd->fs.location, loc_rule->id);
+			goto out_free_list;
+		}
+		loc_rule->id = 0;
+		memset(&loc_rule->flow_spec, 0,
+		       sizeof(struct ethtool_rx_flow_spec));
+	}
+	err = mlx4_flow_attach(priv->mdev->dev, &rule, &reg_id);
+	if (err) {
+		en_err(priv, "Fail to attach network rule at location %d.\n"
+		       , cmd->fs.location);
+		goto out_free_list;
+	}
+	loc_rule->id = reg_id;
+	memcpy(&loc_rule->flow_spec, &cmd->fs,
+	       sizeof(struct ethtool_rx_flow_spec));
+
+out_free_list:
+	list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) {
+		list_del(&spec->list);
+		kfree(spec);
+	}
+	return err;
+}
+
+static int mlx4_en_flow_detach(struct net_device *dev,
+			       struct ethtool_rxnfc *cmd)
+{
+	int err = 0;
+	struct ethtool_flow_id *rule;
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
+		return -EINVAL;
+
+	rule = &priv->ethtool_rules[cmd->fs.location];
+	if (!rule->id) {
+		err =  -ENOENT;
+		goto out;
+	}
+
+	err = mlx4_flow_detach(priv->mdev->dev, rule->id);
+	if (err) {
+		en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n"
+		       , cmd->fs.location, rule->id);
+		goto out;
+	}
+	rule->id = 0;
+	memset(&rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec));
+out:
+	return err;
+
+}
+
+static int mlx4_en_get_flow(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			    int loc)
+{
+	int err = 0;
+	struct ethtool_flow_id *rule;
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES)
+		return -EINVAL;
+
+	rule = &priv->ethtool_rules[loc];
+	if (rule->id)
+		memcpy(&cmd->fs, &rule->flow_spec,
+		       sizeof(struct ethtool_rx_flow_spec));
+	else
+		err = -ENOENT;
+
+	return err;
+}
+
+static int mlx4_en_get_num_flows(struct mlx4_en_priv *priv)
+{
+
+	int i, res = 0;
+	for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) {
+		if (priv->ethtool_rules[i].id)
+			res++;
+	}
+	return res;
+
+}
+
 static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 			     u32 *rule_locs)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
 	int err = 0;
+	int i = 0, priority = 0;
+
+	if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
+		return -EOPNOTSUPP;
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
 		cmd->data = priv->rx_ring_num;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = mlx4_en_get_num_flows(priv);
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		while (!err || err == -ENOENT) {
+			err = mlx4_en_get_flow(dev, cmd, i);
+			if (!err)
+				((u32 *)(rule_locs))[priority++] = i;
+			i++;
+		}
+		if (priority)
+			err = 0;
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
@@ -617,6 +965,30 @@  static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 	return err;
 }
 
+static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	int err = 0;
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
+		return -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		err = mlx4_en_flow_replace(dev, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		err = mlx4_en_flow_detach(dev, cmd);
+		break;
+	default:
+		en_warn(priv, "Unsupported ethtool command. (%d)\n", cmd->cmd);
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
 const struct ethtool_ops mlx4_en_ethtool_ops = {
 	.get_drvinfo = mlx4_en_get_drvinfo,
 	.get_settings = mlx4_en_get_settings,
@@ -637,6 +1009,7 @@  const struct ethtool_ops mlx4_en_ethtool_ops = {
 	.get_ringparam = mlx4_en_get_ringparam,
 	.set_ringparam = mlx4_en_set_ringparam,
 	.get_rxnfc = mlx4_en_get_rxnfc,
+	.set_rxnfc = mlx4_en_set_rxnfc,
 	.get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size,
 	.get_rxfh_indir = mlx4_en_get_rxfh_indir,
 	.set_rxfh_indir = mlx4_en_set_rxfh_indir,
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 2d6dabe..8882e70 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -75,6 +75,7 @@ 
 #define STAMP_SHIFT		31
 #define STAMP_VAL		0x7fffffff
 #define STATS_DELAY		(HZ / 4)
+#define MAX_NUM_OF_FS_RULES	256
 
 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */
 #define MAX_DESC_SIZE		512
@@ -435,6 +436,11 @@  struct mlx4_en_frag_info {
 
 #endif
 
+struct ethtool_flow_id {
+	struct ethtool_rx_flow_spec flow_spec;
+	u64 id;
+};
+
 struct mlx4_en_priv {
 	struct mlx4_en_dev *mdev;
 	struct mlx4_en_port_profile *prof;
@@ -444,6 +450,7 @@  struct mlx4_en_priv {
 	struct net_device_stats ret_stats;
 	struct mlx4_en_port_state port_state;
 	spinlock_t stats_lock;
+	struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES];
 
 	unsigned long last_moder_packets[MAX_RX_RINGS];
 	unsigned long last_moder_tx_packets;