diff mbox

[RESEND] packet: Deliver VLAN TPID to userspace

Message ID 87r4bdk8c3.wl%atzm@stratosphere.co.jp
State Rejected, archived
Delegated to: David Miller
Headers show

Commit Message

Atzm Watanabe Oct. 22, 2013, 8:39 a.m. UTC
After the 802.1AD support, userspace packet receivers
(packet dumper, software switch, and the like) need how to know
VLAN TPID in order to reassemble original tagged frame.

Signed-off-by: Atzm Watanabe <atzm@stratosphere.co.jp>
---
struct tpacket_hdr_variant1 looks like that is allowed to grow,
as the length combined with struct tpacket3_hdr is explicit at
run-time.

 include/uapi/linux/if_packet.h | 5 +++--
 net/packet/af_packet.c         | 8 ++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

Comments

David Miller Oct. 25, 2013, 10:59 p.m. UTC | #1
From: Atzm Watanabe <atzm@stratosphere.co.jp>
Date: Tue, 22 Oct 2013 17:39:40 +0900

>  struct tpacket_hdr_variant1 {
>  	__u32	tp_rxhash;
>  	__u32	tp_vlan_tci;
> +	__u32	tp_vlan_tpid;
>  };
>  

You cannot do this, the header length is not variable.

This patch has been submitted several times, each of which you
have been shown ways in which your changes break userspace in
one way or another.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings Oct. 28, 2013, 8:11 p.m. UTC | #2
On Fri, 2013-10-25 at 18:59 -0400, David Miller wrote:
> From: Atzm Watanabe <atzm@stratosphere.co.jp>
> Date: Tue, 22 Oct 2013 17:39:40 +0900
> 
> >  struct tpacket_hdr_variant1 {
> >  	__u32	tp_rxhash;
> >  	__u32	tp_vlan_tci;
> > +	__u32	tp_vlan_tpid;
> >  };
> >  
> 
> You cannot do this, the header length is not variable.

Well it is variable to an extent.  This is used as a member of the final
anonymous union member of struct tpacket3_hdr, and the latter is
specified to be tail-padded to a multiple of 16 bytes
(TPACKET_ALIGNMENT).  Since its current size is 36 bytes, I think it can
safely grow by 12 bytes, so long as userland doesn't depend on
getsockopt(..., PACKET_HDRLEN, ...) returning only specific values.

Possibly there should be a separate struct tpacket_hdr_variant2 which
includes the extra member.  Possibly there should also be a status flag
to indicate that this member is present.

> This patch has been submitted several times, each of which you
> have been shown ways in which your changes break userspace in
> one way or another.

I think we established that struct tpacket3_hdr can't grow arbitrarily,
but not that it can't grow at all.

Ben.
Atzm Watanabe Oct. 30, 2013, 7:03 a.m. UTC | #3
At Mon, 28 Oct 2013 20:11:14 +0000,
Ben Hutchings wrote:
> 
> On Fri, 2013-10-25 at 18:59 -0400, David Miller wrote:
> > From: Atzm Watanabe <atzm@stratosphere.co.jp>
> > Date: Tue, 22 Oct 2013 17:39:40 +0900
> > 
> > >  struct tpacket_hdr_variant1 {
> > >  	__u32	tp_rxhash;
> > >  	__u32	tp_vlan_tci;
> > > +	__u32	tp_vlan_tpid;
> > >  };
> > >  
> > 
> > You cannot do this, the header length is not variable.
> 
> Well it is variable to an extent.  This is used as a member of the final
> anonymous union member of struct tpacket3_hdr, and the latter is
> specified to be tail-padded to a multiple of 16 bytes
> (TPACKET_ALIGNMENT).  Since its current size is 36 bytes, I think it can
> safely grow by 12 bytes, so long as userland doesn't depend on
> getsockopt(..., PACKET_HDRLEN, ...) returning only specific values.
> 
> Possibly there should be a separate struct tpacket_hdr_variant2 which
> includes the extra member.  Possibly there should also be a status flag
> to indicate that this member is present.

Should it be structures as below?

struct tpacket_hdr_variant1 {
        __u32   tp_rxhash;
        __u32   tp_vlan_tci;
};

struct tpacket_hdr_variant2 {
        __u32   tp_rxhash;
        __u32   tp_vlan_tci;
        __u32   tp_vlan_tpid;
};

struct tpacket3_hdr {
        __u32           tp_next_offset;
        __u32           tp_sec;
        __u32           tp_nsec;
        __u32           tp_snaplen;
        __u32           tp_len;
        __u32           tp_status;
        __u16           tp_mac;
        __u16           tp_net;
        /* pkt_hdr variants */
        union {
                struct tpacket_hdr_variant1 hv1;
                struct tpacket_hdr_variant2 hv2;
        };
};

If it's ok, I'd like to send the patch v2.


> > This patch has been submitted several times, each of which you
> > have been shown ways in which your changes break userspace in
> > one way or another.
> 
> I think we established that struct tpacket3_hdr can't grow arbitrarily,
> but not that it can't grow at all.

I think so, too.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings Nov. 4, 2013, 5:16 p.m. UTC | #4
On Wed, 2013-10-30 at 16:03 +0900, Atzm Watanabe wrote:
[...]
> Should it be structures as below?
>
> struct tpacket_hdr_variant1 {
>         __u32   tp_rxhash;
>         __u32   tp_vlan_tci;
> };
> 
> struct tpacket_hdr_variant2 {
>         __u32   tp_rxhash;
>         __u32   tp_vlan_tci;
>         __u32   tp_vlan_tpid;
> };
> 
> struct tpacket3_hdr {
>         __u32           tp_next_offset;
>         __u32           tp_sec;
>         __u32           tp_nsec;
>         __u32           tp_snaplen;
>         __u32           tp_len;
>         __u32           tp_status;
>         __u16           tp_mac;
>         __u16           tp_net;
>         /* pkt_hdr variants */
>         union {
>                 struct tpacket_hdr_variant1 hv1;
>                 struct tpacket_hdr_variant2 hv2;
>         };
> };
> 
> If it's ok, I'd like to send the patch v2.
[...]

I think this makes sense.

Also add a BUILD_BUG_ON(TPACKET3_HDRLEN != 48) somewhere.

Ben.
Daniel Borkmann Nov. 4, 2013, 6:33 p.m. UTC | #5
On 11/04/2013 06:16 PM, Ben Hutchings wrote:
> On Wed, 2013-10-30 at 16:03 +0900, Atzm Watanabe wrote:
> [...]
>> Should it be structures as below?
>>
>> struct tpacket_hdr_variant1 {
>>          __u32   tp_rxhash;
>>          __u32   tp_vlan_tci;
>> };
>>
>> struct tpacket_hdr_variant2 {
>>          __u32   tp_rxhash;
>>          __u32   tp_vlan_tci;
>>          __u32   tp_vlan_tpid;
>> };
>>
>> struct tpacket3_hdr {
>>          __u32           tp_next_offset;
>>          __u32           tp_sec;
>>          __u32           tp_nsec;
>>          __u32           tp_snaplen;
>>          __u32           tp_len;
>>          __u32           tp_status;
>>          __u16           tp_mac;
>>          __u16           tp_net;
>>          /* pkt_hdr variants */
>>          union {
>>                  struct tpacket_hdr_variant1 hv1;
>>                  struct tpacket_hdr_variant2 hv2;
>>          };
>> };
>>
>> If it's ok, I'd like to send the patch v2.
> [...]
>
> I think this makes sense.
>
> Also add a BUILD_BUG_ON(TPACKET3_HDRLEN != 48) somewhere.

Sorry for coming late into this discussion.

I think the packet_mmap API with its 3 different API versions
already is a "bit" terrible, and should only be further extended
for user space if there is a _*huge*_ (!) improvement somewhere
(e.g. in terms of packet capturing/transmission performance) that
would justify it. I'm thinking that this could e.g. be in terms
of packet capturing and transmission performance.

Otherwise, each time we want to add a new member, we add yet
another subheader for userspace into tpacket, making it even
more complicated to use, and adding more "legacy" API fragments?

To solve your problem, why don't you add a socket option that,
if _enabled_, would reconstruct the vlan header as it was seen
on the "wire" and push that up to userspace, just like libpcap
does internally. It's not perfect either, but perhaps a better
way to go. What do you think?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Atzm Watanabe Nov. 7, 2013, 5:22 p.m. UTC | #6
At Mon, 04 Nov 2013 19:33:52 +0100,
Daniel Borkmann wrote:
> 
> On 11/04/2013 06:16 PM, Ben Hutchings wrote:
> > On Wed, 2013-10-30 at 16:03 +0900, Atzm Watanabe wrote:
> > [...]
> >> Should it be structures as below?
> >>
> >> struct tpacket_hdr_variant1 {
> >>          __u32   tp_rxhash;
> >>          __u32   tp_vlan_tci;
> >> };
> >>
> >> struct tpacket_hdr_variant2 {
> >>          __u32   tp_rxhash;
> >>          __u32   tp_vlan_tci;
> >>          __u32   tp_vlan_tpid;
> >> };
> >>
> >> struct tpacket3_hdr {
> >>          __u32           tp_next_offset;
> >>          __u32           tp_sec;
> >>          __u32           tp_nsec;
> >>          __u32           tp_snaplen;
> >>          __u32           tp_len;
> >>          __u32           tp_status;
> >>          __u16           tp_mac;
> >>          __u16           tp_net;
> >>          /* pkt_hdr variants */
> >>          union {
> >>                  struct tpacket_hdr_variant1 hv1;
> >>                  struct tpacket_hdr_variant2 hv2;
> >>          };
> >> };
> >>
> >> If it's ok, I'd like to send the patch v2.
> > [...]
> >
> > I think this makes sense.
> >
> > Also add a BUILD_BUG_ON(TPACKET3_HDRLEN != 48) somewhere.
> 
> Sorry for coming late into this discussion.
> 
> I think the packet_mmap API with its 3 different API versions
> already is a "bit" terrible, and should only be further extended
> for user space if there is a _*huge*_ (!) improvement somewhere
> (e.g. in terms of packet capturing/transmission performance) that
> would justify it. I'm thinking that this could e.g. be in terms
> of packet capturing and transmission performance.
> 
> Otherwise, each time we want to add a new member, we add yet
> another subheader for userspace into tpacket, making it even
> more complicated to use, and adding more "legacy" API fragments?

Thank you for the comments.

Indeed, your worry will become reality if we often want to add new
members as other aims, but I also think that VLAN related members
perhaps won't be added anymore because the VLAN only contains TCI
and ethertype.


> To solve your problem, why don't you add a socket option that,
> if _enabled_, would reconstruct the vlan header as it was seen
> on the "wire" and push that up to userspace, just like libpcap
> does internally. It's not perfect either, but perhaps a better
> way to go. What do you think?

Sounds good to me, but I also think that a socket option should be
added when we want to add new members to enable userspace to
reassemble the pakcet for reasons other than the VLAN.  Because TCI
is already put into the tpacket headers or auxdata, userspace may
want to get TPID by same way.

However, if we cannot add new members into the header anyway,
we need to avoid that by other ways like you proposed.


Thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index dbf0666..6e36e0a 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -83,7 +83,7 @@  struct tpacket_auxdata {
 	__u16		tp_mac;
 	__u16		tp_net;
 	__u16		tp_vlan_tci;
-	__u16		tp_padding;
+	__u16		tp_vlan_tpid;
 };
 
 /* Rx ring - header status */
@@ -132,12 +132,13 @@  struct tpacket2_hdr {
 	__u32		tp_sec;
 	__u32		tp_nsec;
 	__u16		tp_vlan_tci;
-	__u16		tp_padding;
+	__u16		tp_vlan_tpid;
 };
 
 struct tpacket_hdr_variant1 {
 	__u32	tp_rxhash;
 	__u32	tp_vlan_tci;
+	__u32	tp_vlan_tpid;
 };
 
 struct tpacket3_hdr {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2e8286b..fbcc882 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -895,9 +895,11 @@  static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
 {
 	if (vlan_tx_tag_present(pkc->skb)) {
 		ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
+		ppd->hv1.tp_vlan_tpid = (__force __u32)ntohs(pkc->skb->vlan_proto);
 		ppd->tp_status = TP_STATUS_VLAN_VALID;
 	} else {
 		ppd->hv1.tp_vlan_tci = 0;
+		ppd->hv1.tp_vlan_tpid = 0;
 		ppd->tp_status = TP_STATUS_AVAILABLE;
 	}
 }
@@ -1836,11 +1838,12 @@  static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h2->tp_nsec = ts.tv_nsec;
 		if (vlan_tx_tag_present(skb)) {
 			h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+			h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
 			status |= TP_STATUS_VLAN_VALID;
 		} else {
 			h.h2->tp_vlan_tci = 0;
+			h.h2->tp_vlan_tpid = 0;
 		}
-		h.h2->tp_padding = 0;
 		hdrlen = sizeof(*h.h2);
 		break;
 	case TPACKET_V3:
@@ -2788,11 +2791,12 @@  static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		aux.tp_net = skb_network_offset(skb);
 		if (vlan_tx_tag_present(skb)) {
 			aux.tp_vlan_tci = vlan_tx_tag_get(skb);
+			aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
 			aux.tp_status |= TP_STATUS_VLAN_VALID;
 		} else {
 			aux.tp_vlan_tci = 0;
+			aux.tp_vlan_tpid = 0;
 		}
-		aux.tp_padding = 0;
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}