af_packet: add interframe drop cmsg (v3)

Message ID
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Neil Horman Sept. 24, 2009, 7:30 p.m.
Ok, version 3, dropping the use of an allocated ring buffer in favor of
shuffling around the contents of skb->cb so we can fit the gap field in there,
as per previous conversation with Eric.

Add Ancilliary data to better represent loss information

I've had a few requests recently to provide more detail regarding frame loss
during an AF_PACKET packet capture session.  Specifically the requestors want to
see where in a packet sequence frames were lost, i.e. they want to see that 40
frames were lost between frames 302 and 303 in a packet capture file.  In order
to do this we need:

1) The kernel to export this data to user space
2) The applications to make use of it

This patch addresses item (1).  It does this by doing the following:

A) attaching ancilliary data to any skb enqueued to a socket recieve queue for
which frames were lost between it and the previously enqueued frame.  Note that
the skb->cb data block was already exhausted, so to fit this new bit of data in,
I reduced the origlen value from a unsigned int to an unsigned short.  I don't
think this will be problematic, as I can't imagine a datagram with a size of
65k.  Just in case I've added checks to ensure that we get warnings if origlen
or gap overflow.

B) For any frame dequeued that has ancilliary data in the ring buffer (as
determined by the correlator value), we add a cmsg structure to the msghdr that
gets copied to user space, this cmsg structure is of cmsg_level AF_PACKET, and
cmsg_type PACKET_GAPDATA.  It contains a u16 value which counts the number of
frames lost between the reception of the frame being currently recevied and the
frame most recently preceding it.  Note this creates a situation in which if we
have packet loss followed immediately by a socket close operation we might miss
some gap information.  This situation is covered by the use of the
PACKET_AUXINFO socket option, which provides total loss stats (from which the
final gap can be computed).

I've tested this patch myself, and it works well.

Signed-off-by: Neil Horman <>

 include/linux/if_packet.h |    2 ++
 net/packet/af_packet.c    |   44 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 44 insertions(+), 2 deletions(-)

To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to
More majordomo info at


diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index dea7d6b..e5d200f 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -48,11 +48,13 @@  struct sockaddr_ll
 #define PACKET_RESERVE			12
 #define PACKET_TX_RING			13
 #define PACKET_LOSS			14
+#define PACKET_GAPDATA			15
 struct tpacket_stats
 	unsigned int	tp_packets;
 	unsigned int	tp_drops;
+	unsigned int    tp_gap;
 struct tpacket_auxdata
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d3d52c6..0f97a96 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -207,7 +207,8 @@  struct packet_sock {
 struct packet_skb_cb {
-	unsigned int origlen;
+	unsigned short origlen;
+	unsigned short gap;
 	union {
 		struct sockaddr_pkt pkt;
 		struct sockaddr_ll ll;
@@ -524,6 +525,33 @@  static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
+ * If we've lost frames since the last time we queued one to the
+ * sk_receive_queue, we need to record it here.
+ * This must be called under the protection of the socket lock
+ * to prevent racing with other softirqs and user space
+ */
+static inline void record_packet_gap(struct sk_buff *skb,
+					struct packet_sock *po)
+	PACKET_SKB_CB(skb)->gap = po->stats.tp_gap;
+	po->stats.tp_gap = 0;
+	return;
+static inline __u16 check_packet_gap(struct sk_buff *skb)
+	return PACKET_SKB_CB(skb)->gap;
+#define INC_GAP_STAT(po) do {\
+	if (likely(po->stats.tp_gap != (USHORT_MAX-1)))\
+		po->stats.tp_gap++;\
+	else if (net_ratelimit())\
+		pr_warning("gap stats overflowed on af_packet socket\n");\
+} while (0)
    This function makes lazy skb cloning in hope that most of packets
    are discarded by BPF.
@@ -612,7 +640,11 @@  static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
-	PACKET_SKB_CB(skb)->origlen = skb->len;
+	PACKET_SKB_CB(skb)->origlen = (unsigned short)skb->len;
+	if (unlikely((skb->len > PACKET_SKB_CB(skb)->origlen) &&
+	    net_ratelimit())) {
+		pr_warning("SKB len overflowed aux data for af_packet!\n");
+	}
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
@@ -626,6 +658,7 @@  static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
+	record_packet_gap(skb, po);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, skb->len);
@@ -634,6 +667,7 @@  static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -811,6 +845,7 @@  drop:
 	sk->sk_data_ready(sk, 0);
@@ -1418,6 +1453,7 @@  static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sk_buff *skb;
 	int copied, err;
 	struct sockaddr_ll *sll;
+	__u16 gap;
 	err = -EINVAL;
@@ -1496,6 +1532,10 @@  static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
+	gap = check_packet_gap(skb);
+	if (gap)
+		put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u16), &gap);
 	 *	Free or return the buffer as appropriate. Again this
 	 *	hides all the races and re-entrancy issues from us.