diff mbox series

[v2,net-next] packet: add sockopt to ignore outgoing packets

Message ID 20180903142336.32122-1-vincent.whitchurch@axis.com
State Accepted, archived
Delegated to: David Miller
Headers show
Series [v2,net-next] packet: add sockopt to ignore outgoing packets | expand

Commit Message

Vincent Whitchurch Sept. 3, 2018, 2:23 p.m. UTC
Currently, the only way to ignore outgoing packets on a packet socket is
via the BPF filter.  With MSG_ZEROCOPY, packets that are looped into
AF_PACKET are copied in dev_queue_xmit_nit(), and this copy happens even
if the filter run from packet_rcv() would reject them.  So the presence
of a packet socket on the interface takes away the benefits of
MSG_ZEROCOPY, even if the packet socket is not interested in outgoing
packets.  (Even when MSG_ZEROCOPY is not used, the skb is unnecessarily
cloned, but the cost for that is much lower.)

Add a socket option to allow AF_PACKET sockets to ignore outgoing
packets to solve this.  Note that the *BSDs already have something
similar: BIOCSSEESENT/BIOCSDIRECTION and BIOCSDIRFILT.

The first intended user is lldpd.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
v2: Stricter value validation.
    Moved ignore check out of skb_loop_sk().

 include/linux/netdevice.h      |  1 +
 include/uapi/linux/if_packet.h |  1 +
 net/core/dev.c                 |  3 +++
 net/packet/af_packet.c         | 17 +++++++++++++++++
 4 files changed, 22 insertions(+)

Comments

David Miller Sept. 6, 2018, 5:10 a.m. UTC | #1
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Mon,  3 Sep 2018 16:23:36 +0200

> Currently, the only way to ignore outgoing packets on a packet socket is
> via the BPF filter.  With MSG_ZEROCOPY, packets that are looped into
> AF_PACKET are copied in dev_queue_xmit_nit(), and this copy happens even
> if the filter run from packet_rcv() would reject them.  So the presence
> of a packet socket on the interface takes away the benefits of
> MSG_ZEROCOPY, even if the packet socket is not interested in outgoing
> packets.  (Even when MSG_ZEROCOPY is not used, the skb is unnecessarily
> cloned, but the cost for that is much lower.)
> 
> Add a socket option to allow AF_PACKET sockets to ignore outgoing
> packets to solve this.  Note that the *BSDs already have something
> similar: BIOCSSEESENT/BIOCSDIRECTION and BIOCSDIRFILT.
> 
> The first intended user is lldpd.
> 
> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
> ---
> v2: Stricter value validation.
>     Moved ignore check out of skb_loop_sk().

Applied, thank you.
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca5ab98053c8..8ef14d9edc58 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2317,6 +2317,7 @@  static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
 
 struct packet_type {
 	__be16			type;	/* This is really htons(ether_type). */
+	bool			ignore_outgoing;
 	struct net_device	*dev;	/* NULL is wildcarded here	     */
 	int			(*func) (struct sk_buff *,
 					 struct net_device *,
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 67b61d91d89b..467b654bd4c7 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -57,6 +57,7 @@  struct sockaddr_ll {
 #define PACKET_QDISC_BYPASS		20
 #define PACKET_ROLLOVER_STATS		21
 #define PACKET_FANOUT_DATA		22
+#define PACKET_IGNORE_OUTGOING		23
 
 #define PACKET_FANOUT_HASH		0
 #define PACKET_FANOUT_LB		1
diff --git a/net/core/dev.c b/net/core/dev.c
index 325fc5088370..09dcf190c081 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1970,6 +1970,9 @@  void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_lock();
 again:
 	list_for_each_entry_rcu(ptype, ptype_list, list) {
+		if (ptype->ignore_outgoing)
+			continue;
+
 		/* Never send packets back to the socket
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5610061e7f2e..23336498eb9f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3805,6 +3805,20 @@  packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		return fanout_set_data(po, optval, optlen);
 	}
+	case PACKET_IGNORE_OUTGOING:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+		if (val < 0 || val > 1)
+			return -EINVAL;
+
+		po->prot_hook.ignore_outgoing = !!val;
+		return 0;
+	}
 	case PACKET_TX_HAS_OFF:
 	{
 		unsigned int val;
@@ -3928,6 +3942,9 @@  static int packet_getsockopt(struct socket *sock, int level, int optname,
 			((u32)po->fanout->flags << 24)) :
 		       0);
 		break;
+	case PACKET_IGNORE_OUTGOING:
+		val = po->prot_hook.ignore_outgoing;
+		break;
 	case PACKET_ROLLOVER_STATS:
 		if (!po->rollover)
 			return -EINVAL;