diff mbox

[v6,04/12] net/mlx4_en: add support for fast rx drop bpf program

Message ID 1467944124-14891-5-git-send-email-bblanco@plumgrid.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Brenden Blanco July 8, 2016, 2:15 a.m. UTC
Add support for the BPF_PROG_TYPE_XDP hook in mlx4 driver.

In tc/socket bpf programs, helpers linearize skb fragments as needed
when the program touchs the packet data. However, in the pursuit of
speed, XDP programs will not be allowed to use these slower functions,
especially if it involves allocating an skb.

Therefore, disallow MTU settings that would produce a multi-fragment
packet that XDP programs would fail to access. Future enhancements could
be done to increase the allowable MTU.

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 38 ++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 36 +++++++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  5 ++++
 3 files changed, 75 insertions(+), 4 deletions(-)

Comments

Or Gerlitz July 9, 2016, 2:07 p.m. UTC | #1
On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> Add support for the BPF_PROG_TYPE_XDP hook in mlx4 driver.
>
> In tc/socket bpf programs, helpers linearize skb fragments as needed
> when the program touchs the packet data. However, in the pursuit of

nit, for the next version touchs --> touches

> speed, XDP programs will not be allowed to use these slower functions,
> especially if it involves allocating an skb.


[...]

> @@ -835,6 +838,34 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
>                 l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
>                         (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
>
> +               /* A bpf program gets first chance to drop the packet. It may
> +                * read bytes but not past the end of the frag.
> +                */
> +               if (prog) {
> +                       struct xdp_buff xdp;
> +                       dma_addr_t dma;
> +                       u32 act;
> +
> +                       dma = be64_to_cpu(rx_desc->data[0].addr);
> +                       dma_sync_single_for_cpu(priv->ddev, dma,
> +                                               priv->frag_info[0].frag_size,
> +                                               DMA_FROM_DEVICE);
> +
> +                       xdp.data = page_address(frags[0].page) +
> +                                                       frags[0].page_offset;
> +                       xdp.data_end = xdp.data + length;
> +
> +                       act = bpf_prog_run_xdp(prog, &xdp);
> +                       switch (act) {
> +                       case XDP_PASS:
> +                               break;
> +                       default:
> +                               bpf_warn_invalid_xdp_action(act);
> +                       case XDP_DROP:
> +                               goto next;
> +                       }
> +               }


(probably a nit too, but wanted to make sure we don't miss something
here) is the default case preceding the DROP one in purpose? any
special reason to do that?
Saeed Mahameed July 9, 2016, 7:58 p.m. UTC | #2
On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> Add support for the BPF_PROG_TYPE_XDP hook in mlx4 driver.
>
> In tc/socket bpf programs, helpers linearize skb fragments as needed
> when the program touchs the packet data. However, in the pursuit of
> speed, XDP programs will not be allowed to use these slower functions,
> especially if it involves allocating an skb.
>
> Therefore, disallow MTU settings that would produce a multi-fragment
> packet that XDP programs would fail to access. Future enhancements could
> be done to increase the allowable MTU.
>
> Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
> ---
>  drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 38 ++++++++++++++++++++++++++
>  drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 36 +++++++++++++++++++++---
>  drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  5 ++++
>  3 files changed, 75 insertions(+), 4 deletions(-)
>
[...]
> +               /* A bpf program gets first chance to drop the packet. It may
> +                * read bytes but not past the end of the frag.
> +                */
> +               if (prog) {
> +                       struct xdp_buff xdp;
> +                       dma_addr_t dma;
> +                       u32 act;
> +
> +                       dma = be64_to_cpu(rx_desc->data[0].addr);
> +                       dma_sync_single_for_cpu(priv->ddev, dma,
> +                                               priv->frag_info[0].frag_size,
> +                                               DMA_FROM_DEVICE);

In case of XDP_PASS we will dma_sync again in the normal path, this
can be improved by doing the dma_sync as soon as we can and once and
for all, regardless of the path the packet is going to take
(XDP_DROP/mlx4_en_complete_rx_desc/mlx4_en_rx_skb).

> +
> +                       xdp.data = page_address(frags[0].page) +
> +                                                       frags[0].page_offset;
> +                       xdp.data_end = xdp.data + length;
> +
> +                       act = bpf_prog_run_xdp(prog, &xdp);
> +                       switch (act) {
> +                       case XDP_PASS:
> +                               break;
> +                       default:
> +                               bpf_warn_invalid_xdp_action(act);
> +                       case XDP_DROP:
> +                               goto next;

The drop action here (goto next) will release the current rx_desc
buffers and use new ones to refill, I know that the mlx4 rx scheme
will release/allocate new pages once every ~32 packet, but one
improvement can really help here especially for XDP_DROP benchmarks is
to reuse the current rx_desc buffers in case it is going to be
dropped.

Considering if mlx4 rx buffer scheme doesn't allow gaps, Maybe this
can be added later as future improvement for the whole mlx4 rx data
path drop decisions.
Or Gerlitz July 9, 2016, 9:37 p.m. UTC | #3
On Sat, Jul 9, 2016 at 10:58 PM, Saeed Mahameed
<saeedm@dev.mellanox.co.il> wrote:
> On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco <bblanco@plumgrid.com> wrote:
>> Add support for the BPF_PROG_TYPE_XDP hook in mlx4 driver.
>>
>> In tc/socket bpf programs, helpers linearize skb fragments as needed
>> when the program touchs the packet data. However, in the pursuit of
>> speed, XDP programs will not be allowed to use these slower functions,
>> especially if it involves allocating an skb.
>>
>> Therefore, disallow MTU settings that would produce a multi-fragment
>> packet that XDP programs would fail to access. Future enhancements could
>> be done to increase the allowable MTU.
>>
>> Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
>> ---
>>  drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 38 ++++++++++++++++++++++++++
>>  drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 36 +++++++++++++++++++++---
>>  drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  5 ++++
>>  3 files changed, 75 insertions(+), 4 deletions(-)
>>
> [...]
>> +               /* A bpf program gets first chance to drop the packet. It may
>> +                * read bytes but not past the end of the frag.
>> +                */
>> +               if (prog) {
>> +                       struct xdp_buff xdp;
>> +                       dma_addr_t dma;
>> +                       u32 act;
>> +
>> +                       dma = be64_to_cpu(rx_desc->data[0].addr);
>> +                       dma_sync_single_for_cpu(priv->ddev, dma,
>> +                                               priv->frag_info[0].frag_size,
>> +                                               DMA_FROM_DEVICE);
>
> In case of XDP_PASS we will dma_sync again in the normal path,

yep, correct

> this can be improved by doing the dma_sync as soon as we can and once and
> for all, regardless of the path the packet is going to take
> (XDP_DROP/mlx4_en_complete_rx_desc/mlx4_en_rx_skb).

how you would envision this can be done in a not very ugly way?


>> +
>> +                       xdp.data = page_address(frags[0].page) +
>> +                                                       frags[0].page_offset;
>> +                       xdp.data_end = xdp.data + length;
>> +
>> +                       act = bpf_prog_run_xdp(prog, &xdp);
>> +                       switch (act) {
>> +                       case XDP_PASS:
>> +                               break;
>> +                       default:
>> +                               bpf_warn_invalid_xdp_action(act);
>> +                       case XDP_DROP:
>> +                               goto next;
>
> The drop action here (goto next) will release the current rx_desc
> buffers and use new ones to refill, I know that the mlx4 rx scheme
> will release/allocate new pages once every ~32 packet, but one
> improvement can really help here especially for XDP_DROP benchmarks is
> to reuse the current rx_desc buffers in case it is going to be
> dropped.

> Considering if mlx4 rx buffer scheme doesn't allow gaps, Maybe this
> can be added later as future improvement for the whole mlx4 rx data
> path drop decisions.

yes, I think it makes sense to look on this as future improvement.
Tariq Toukan July 10, 2016, 3:25 p.m. UTC | #4
On 09/07/2016 10:58 PM, Saeed Mahameed wrote:
> On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco <bblanco@plumgrid.com> wrote:
>> +               /* A bpf program gets first chance to drop the packet. It may
>> +                * read bytes but not past the end of the frag.
>> +                */
>> +               if (prog) {
>> +                       struct xdp_buff xdp;
>> +                       dma_addr_t dma;
>> +                       u32 act;
>> +
>> +                       dma = be64_to_cpu(rx_desc->data[0].addr);
>> +                       dma_sync_single_for_cpu(priv->ddev, dma,
>> +                                               priv->frag_info[0].frag_size,
>> +                                               DMA_FROM_DEVICE);
> In case of XDP_PASS we will dma_sync again in the normal path, this
> can be improved by doing the dma_sync as soon as we can and once and
> for all, regardless of the path the packet is going to take
> (XDP_DROP/mlx4_en_complete_rx_desc/mlx4_en_rx_skb).
I agree with Saeed, dma_sync is a heavy operation that is now done twice 
for all packets with XDP_PASS.
We should try our best to avoid performance degradation in the flow of 
unfiltered packets.

Regards,
Tariq
Brenden Blanco July 10, 2016, 3:40 p.m. UTC | #5
On Sat, Jul 09, 2016 at 05:07:36PM +0300, Or Gerlitz wrote:
> On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> > Add support for the BPF_PROG_TYPE_XDP hook in mlx4 driver.
> >
> > In tc/socket bpf programs, helpers linearize skb fragments as needed
> > when the program touchs the packet data. However, in the pursuit of
> 
> nit, for the next version touchs --> touches
Will fix.
> 
[...]
> > +                       switch (act) {
> > +                       case XDP_PASS:
> > +                               break;
> > +                       default:
> > +                               bpf_warn_invalid_xdp_action(act);
> > +                       case XDP_DROP:
> > +                               goto next;
> > +                       }
> > +               }
> 
> 
> (probably a nit too, but wanted to make sure we don't miss something
> here) is the default case preceding the DROP one in purpose? any
> special reason to do that?
This is intentional, and legal though unconventional C. Without this
order, the later patches end up with a bit too much copy/paste for my
liking, as in:

case XDP_DROP:
        if (mlx4_en_rx_recycle(ring, frags))
                goto consumed;
        goto next;
default:
        bpf_warn_invalid_xdp_action(act);
        if (mlx4_en_rx_recycle(ring, frags))
                goto consumed;
        goto next;
Brenden Blanco July 10, 2016, 4:05 p.m. UTC | #6
On Sun, Jul 10, 2016 at 06:25:40PM +0300, Tariq Toukan wrote:
> 
> On 09/07/2016 10:58 PM, Saeed Mahameed wrote:
> >On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> >>+               /* A bpf program gets first chance to drop the packet. It may
> >>+                * read bytes but not past the end of the frag.
> >>+                */
> >>+               if (prog) {
> >>+                       struct xdp_buff xdp;
> >>+                       dma_addr_t dma;
> >>+                       u32 act;
> >>+
> >>+                       dma = be64_to_cpu(rx_desc->data[0].addr);
> >>+                       dma_sync_single_for_cpu(priv->ddev, dma,
> >>+                                               priv->frag_info[0].frag_size,
> >>+                                               DMA_FROM_DEVICE);
> >In case of XDP_PASS we will dma_sync again in the normal path, this
> >can be improved by doing the dma_sync as soon as we can and once and
> >for all, regardless of the path the packet is going to take
> >(XDP_DROP/mlx4_en_complete_rx_desc/mlx4_en_rx_skb).
> I agree with Saeed, dma_sync is a heavy operation that is now done
> twice for all packets with XDP_PASS.
> We should try our best to avoid performance degradation in the flow
> of unfiltered packets.
Makes sense, do folks here see a way to do this cleanly?
> 
> Regards,
> Tariq
Tariq Toukan July 10, 2016, 4:38 p.m. UTC | #7
>>> +                       switch (act) {
>>> +                       case XDP_PASS:
>>> +                               break;
>>> +                       default:
>>> +                               bpf_warn_invalid_xdp_action(act);
>>> +                       case XDP_DROP:
>>> +                               goto next;
>>> +                       }
>>> +               }
>>
>> (probably a nit too, but wanted to make sure we don't miss something
>> here) is the default case preceding the DROP one in purpose? any
>> special reason to do that?
> This is intentional, and legal though unconventional C. Without this
> order, the later patches end up with a bit too much copy/paste for my
> liking, as in:
>
> case XDP_DROP:
>          if (mlx4_en_rx_recycle(ring, frags))
>                  goto consumed;
>          goto next;
> default:
>          bpf_warn_invalid_xdp_action(act);
>          if (mlx4_en_rx_recycle(ring, frags))
>                  goto consumed;
>          goto next;
The more critical issue here is the default action. All packets that get 
unknown/unsupported filter classification will be dropped.
I think the XDP_PASS is the better choice here as default action, there 
are good reasons why it should be, as Jesper already explained in 
replies to other patch in the series.

Regards,
Tariq
Saeed Mahameed July 11, 2016, 11:48 a.m. UTC | #8
On Sun, Jul 10, 2016 at 7:05 PM, Brenden Blanco <bblanco@plumgrid.com> wrote:
> On Sun, Jul 10, 2016 at 06:25:40PM +0300, Tariq Toukan wrote:
>>
>> On 09/07/2016 10:58 PM, Saeed Mahameed wrote:
>> >On Fri, Jul 8, 2016 at 5:15 AM, Brenden Blanco <bblanco@plumgrid.com> wrote:
>> >>+               /* A bpf program gets first chance to drop the packet. It may
>> >>+                * read bytes but not past the end of the frag.
>> >>+                */
>> >>+               if (prog) {
>> >>+                       struct xdp_buff xdp;
>> >>+                       dma_addr_t dma;
>> >>+                       u32 act;
>> >>+
>> >>+                       dma = be64_to_cpu(rx_desc->data[0].addr);
>> >>+                       dma_sync_single_for_cpu(priv->ddev, dma,
>> >>+                                               priv->frag_info[0].frag_size,
>> >>+                                               DMA_FROM_DEVICE);
>> >In case of XDP_PASS we will dma_sync again in the normal path, this
>> >can be improved by doing the dma_sync as soon as we can and once and
>> >for all, regardless of the path the packet is going to take
>> >(XDP_DROP/mlx4_en_complete_rx_desc/mlx4_en_rx_skb).
>> I agree with Saeed, dma_sync is a heavy operation that is now done
>> twice for all packets with XDP_PASS.
>> We should try our best to avoid performance degradation in the flow
>> of unfiltered packets.
> Makes sense, do folks here see a way to do this cleanly?

yes, we need something like:

+static inline void
+mlx4_en_sync_dma(struct mlx4_en_priv *priv,
+                struct mlx4_en_rx_desc *rx_desc,
+                int length)
+{
+       dma_addr_t dma;
+
+       /* Sync dma addresses from HW descriptor */
+       for (nr = 0; nr < priv->num_frags; nr++) {
+               struct mlx4_en_frag_info *frag_info = &priv->frag_info[nr];
+
+               if (length <= frag_info->frag_prefix_size)
+                       break;
+
+               dma = be64_to_cpu(rx_desc->data[nr].addr);
+               dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size,
+                                       DMA_FROM_DEVICE);
+       }
+}


@@ -790,6 +808,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
struct mlx4_en_cq *cq, int bud
                        goto next;
                }

+               length = be32_to_cpu(cqe->byte_cnt);
+               length -= ring->fcs_del;
+
+               mlx4_en_sync_dma(priv,rx_desc, length);
                 /* data is available continue processing the packet */

and make sure to remove all explicit dma_sync_single_for_cpu calls.
Brenden Blanco July 11, 2016, 9:49 p.m. UTC | #9
On Mon, Jul 11, 2016 at 02:48:17PM +0300, Saeed Mahameed wrote:
[...]
> 
> yes, we need something like:
> 
> +static inline void
> +mlx4_en_sync_dma(struct mlx4_en_priv *priv,
> +                struct mlx4_en_rx_desc *rx_desc,
> +                int length)
> +{
> +       dma_addr_t dma;
> +
> +       /* Sync dma addresses from HW descriptor */
> +       for (nr = 0; nr < priv->num_frags; nr++) {
> +               struct mlx4_en_frag_info *frag_info = &priv->frag_info[nr];
> +
> +               if (length <= frag_info->frag_prefix_size)
> +                       break;
> +
> +               dma = be64_to_cpu(rx_desc->data[nr].addr);
> +               dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size,
> +                                       DMA_FROM_DEVICE);
> +       }
> +}
> 
> 
> @@ -790,6 +808,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
> struct mlx4_en_cq *cq, int bud
>                         goto next;
>                 }
> 
> +               length = be32_to_cpu(cqe->byte_cnt);
> +               length -= ring->fcs_del;
> +
> +               mlx4_en_sync_dma(priv,rx_desc, length);
>                  /* data is available continue processing the packet */
> 
> and make sure to remove all explicit dma_sync_single_for_cpu calls.

I see. At first glance, this may work, but introduces some changes in
the driver that may be unwanted. For instance, the dma sync cost is now
being paid even in the case where no skb will be allocated. So, under
memory pressure, it might cause extra work which would slow down your
ability to recover from the stress.

Let's keep discussing it, but in the context of a standalone cleanup.
diff mbox

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 6083775..5c6b1a0c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -31,6 +31,7 @@ 
  *
  */
 
+#include <linux/bpf.h>
 #include <linux/etherdevice.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
@@ -2084,6 +2085,9 @@  void mlx4_en_destroy_netdev(struct net_device *dev)
 	if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
 		mlx4_en_remove_timestamp(mdev);
 
+	if (priv->prog)
+		bpf_prog_put(priv->prog);
+
 	/* Detach the netdev so tasks would not attempt to access it */
 	mutex_lock(&mdev->state_lock);
 	mdev->pndev[priv->port] = NULL;
@@ -2112,6 +2116,11 @@  static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
 		en_err(priv, "Bad MTU size:%d.\n", new_mtu);
 		return -EPERM;
 	}
+	if (priv->prog && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
+		en_err(priv, "MTU size:%d requires frags but bpf prog running",
+		       new_mtu);
+		return -EOPNOTSUPP;
+	}
 	dev->mtu = new_mtu;
 
 	if (netif_running(dev)) {
@@ -2520,6 +2529,31 @@  static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 m
 	return err;
 }
 
+static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+
+	if (priv->num_frags > 1)
+		return -EOPNOTSUPP;
+
+	/* This xchg is paired with READ_ONCE in the fast path, but is
+	 * also protected from itself via rtnl lock
+	 */
+	old_prog = xchg(&priv->prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	return 0;
+}
+
+static bool mlx4_xdp_attached(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	return !!READ_ONCE(priv->prog);
+}
+
 static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_open		= mlx4_en_open,
 	.ndo_stop		= mlx4_en_close,
@@ -2548,6 +2582,8 @@  static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_udp_tunnel_del	= mlx4_en_del_vxlan_port,
 	.ndo_features_check	= mlx4_en_features_check,
 	.ndo_set_tx_maxrate	= mlx4_en_set_tx_maxrate,
+	.ndo_xdp_set		= mlx4_xdp_set,
+	.ndo_xdp_attached	= mlx4_xdp_attached,
 };
 
 static const struct net_device_ops mlx4_netdev_ops_master = {
@@ -2584,6 +2620,8 @@  static const struct net_device_ops mlx4_netdev_ops_master = {
 	.ndo_udp_tunnel_del	= mlx4_en_del_vxlan_port,
 	.ndo_features_check	= mlx4_en_features_check,
 	.ndo_set_tx_maxrate	= mlx4_en_set_tx_maxrate,
+	.ndo_xdp_set		= mlx4_xdp_set,
+	.ndo_xdp_attached	= mlx4_xdp_attached,
 };
 
 struct mlx4_en_bond {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c1b3a9c..2bf3d62 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -743,6 +743,7 @@  int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
 	struct mlx4_en_rx_alloc *frags;
 	struct mlx4_en_rx_desc *rx_desc;
+	struct bpf_prog *prog;
 	struct sk_buff *skb;
 	int index;
 	int nr;
@@ -759,6 +760,8 @@  int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	if (budget <= 0)
 		return polled;
 
+	prog = READ_ONCE(priv->prog);
+
 	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
 	 * descriptor offset can be deduced from the CQE index instead of
 	 * reading 'cqe->index' */
@@ -835,6 +838,34 @@  int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 		l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
 			(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
 
+		/* A bpf program gets first chance to drop the packet. It may
+		 * read bytes but not past the end of the frag.
+		 */
+		if (prog) {
+			struct xdp_buff xdp;
+			dma_addr_t dma;
+			u32 act;
+
+			dma = be64_to_cpu(rx_desc->data[0].addr);
+			dma_sync_single_for_cpu(priv->ddev, dma,
+						priv->frag_info[0].frag_size,
+						DMA_FROM_DEVICE);
+
+			xdp.data = page_address(frags[0].page) +
+							frags[0].page_offset;
+			xdp.data_end = xdp.data + length;
+
+			act = bpf_prog_run_xdp(prog, &xdp);
+			switch (act) {
+			case XDP_PASS:
+				break;
+			default:
+				bpf_warn_invalid_xdp_action(act);
+			case XDP_DROP:
+				goto next;
+			}
+		}
+
 		if (likely(dev->features & NETIF_F_RXCSUM)) {
 			if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
 						      MLX4_CQE_STATUS_UDP)) {
@@ -1062,10 +1093,7 @@  static const int frag_sizes[] = {
 void mlx4_en_calc_rx_buf(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
-	/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
-	 * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
-	 */
-	int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN);
+	int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
 	int buf_size = 0;
 	int i = 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d39bf59..35ecfa2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -164,6 +164,10 @@  enum {
 #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
 
 #define MLX4_EN_MIN_MTU		46
+/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
+ * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
+ */
+#define MLX4_EN_EFF_MTU(mtu)	((mtu) + ETH_HLEN + (2 * VLAN_HLEN))
 #define ETH_BCAST		0xffffffffffffULL
 
 #define MLX4_EN_LOOPBACK_RETRIES	5
@@ -590,6 +594,7 @@  struct mlx4_en_priv {
 	struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE];
 	struct hwtstamp_config hwtstamp_config;
 	u32 counter_index;
+	struct bpf_prog *prog;
 
 #ifdef CONFIG_MLX4_EN_DCB
 #define MLX4_EN_DCB_ENABLED	0x3