diff mbox

[net-next,7/7] net: move skb->dropcount to skb->cb[]

Message ID 1424916612-744-8-git-send-email-eyal.birger@gmail.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Eyal Birger Feb. 26, 2015, 2:10 a.m. UTC
Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)")
unionized skb->mark and skb->dropcount in order to allow recording
of the socket drop count while maintaining struct sk_buff size.

skb->dropcount was introduced since there was no available room
in skb->cb[] in packet sockets. However, its introduction led to
the inability to export skb->mark, or any other aliased field to
userspace if so desired.

Moving the dropcount metric to skb->cb[] eliminates this problem
at the expense of 4 bytes less in skb->cb[] for protocol families
using it.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/linux/skbuff.h |  2 --
 include/net/sock.h     | 18 ++++++++++++++++--
 net/socket.c           |  4 ++--
 3 files changed, 18 insertions(+), 6 deletions(-)

Comments

Shmulik Ladkani Feb. 26, 2015, 8:49 a.m. UTC | #1
On Thu, 26 Feb 2015 04:10:12 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
> diff --git a/include/net/sock.h b/include/net/sock.h
> index d462f5e..8807586 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2078,12 +2078,26 @@ static inline int sock_intr_errno(long timeo)
>  	return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
>  }
>  
> +struct sock_skb_cb {
> +	u32 dropcount;
> +};
> +
> +/* Store sock_skb_cb at the end of skb->cb[] so protocol families
> + * using skb->cb[] would keep using it directly and utilize its
> + * alignement guarantee.
> + */
> +#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
> +			    sizeof(struct sock_skb_cb)))
> +
> +#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
> +			    SOCK_SKB_CB_OFFSET))
> +
>  #define sock_skb_cb_check_size(size) \
> -	BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
> +	BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
>  

You didn't take care of aligning the 'sock_skb_cb'.
(Althogh in practive, dropcount is 4 and cb[] is 48 so you're golden).

How about:

struct sock_skb_cb {
	/* protocol families specifc CBs */
	u8 reserved[__SOCK_SKB_CB_OFFSET];
	struct __sock_skb_cb x;			// name me better!
};

Where '__SOCK_SKB_CB_OFFSET' and '__sock_skb_cb' are as follows:

struct __sock_skb_cb {	// name me better!
	u32 dropcount;
};

#define __SOCK_SKB_CB_OFFSET_UNALIGNED \
	(FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct __sock_skb_cb))

#define __SOCK_SKB_CB_OFFSET \
	(__SOCK_SKB_CB_OFFSET_UNALIGNED - \
	 (__SOCK_SKB_CB_OFFSET_UNALIGNED % __alignof__(struct __sock_skb_cb)))

This also takes care for alignement of '__sock_skb_cb x' within the CB.

Then,

#define sock_skb_cb_check_size(size) \
	BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sock_skb_cb, reserved))

Regards,
Shmulik
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eyal Birger Feb. 26, 2015, 10:17 a.m. UTC | #2
On Thu, Feb 26, 2015 at 10:49 AM, Shmulik Ladkani
<shmulik.ladkani@gmail.com> wrote:
> On Thu, 26 Feb 2015 04:10:12 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
>> diff --git a/include/net/sock.h b/include/net/sock.h
>> index d462f5e..8807586 100644
>> --- a/include/net/sock.h
>> +++ b/include/net/sock.h
>> @@ -2078,12 +2078,26 @@ static inline int sock_intr_errno(long timeo)
>>       return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
>>  }
>>
>> +struct sock_skb_cb {
>> +     u32 dropcount;
>> +};
>> +
>> +/* Store sock_skb_cb at the end of skb->cb[] so protocol families
>> + * using skb->cb[] would keep using it directly and utilize its
>> + * alignement guarantee.
>> + */
>> +#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
>> +                         sizeof(struct sock_skb_cb)))
>> +
>> +#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
>> +                         SOCK_SKB_CB_OFFSET))
>> +
>>  #define sock_skb_cb_check_size(size) \
>> -     BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
>> +     BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
>>
>
> You didn't take care of aligning the 'sock_skb_cb'.
> (Althogh in practive, dropcount is 4 and cb[] is 48 so you're golden).
>
> How about:
>
> struct sock_skb_cb {
>         /* protocol families specifc CBs */
>         u8 reserved[__SOCK_SKB_CB_OFFSET];
>         struct __sock_skb_cb x;                 // name me better!
> };
>
> Where '__SOCK_SKB_CB_OFFSET' and '__sock_skb_cb' are as follows:
>
> struct __sock_skb_cb {  // name me better!
>         u32 dropcount;
> };
>
> #define __SOCK_SKB_CB_OFFSET_UNALIGNED \
>         (FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct __sock_skb_cb))
>
> #define __SOCK_SKB_CB_OFFSET \
>         (__SOCK_SKB_CB_OFFSET_UNALIGNED - \
>          (__SOCK_SKB_CB_OFFSET_UNALIGNED % __alignof__(struct __sock_skb_cb)))
>
> This also takes care for alignement of '__sock_skb_cb x' within the CB.
>
> Then,
>
> #define sock_skb_cb_check_size(size) \
>         BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sock_skb_cb, reserved))
>

Agreed. Will add to v2.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eyal Birger Feb. 26, 2015, 10:48 a.m. UTC | #3
On Thu, Feb 26, 2015 at 12:17 PM, Eyal Birger <eyal.birger@gmail.com> wrote:
> On Thu, Feb 26, 2015 at 10:49 AM, Shmulik Ladkani
> <shmulik.ladkani@gmail.com> wrote:
>> On Thu, 26 Feb 2015 04:10:12 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
>>> diff --git a/include/net/sock.h b/include/net/sock.h
>>> index d462f5e..8807586 100644
>>> --- a/include/net/sock.h
>>> +++ b/include/net/sock.h
>>> @@ -2078,12 +2078,26 @@ static inline int sock_intr_errno(long timeo)
>>>       return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
>>>  }
>>>
>>> +struct sock_skb_cb {
>>> +     u32 dropcount;
>>> +};
>>> +
>>> +/* Store sock_skb_cb at the end of skb->cb[] so protocol families
>>> + * using skb->cb[] would keep using it directly and utilize its
>>> + * alignement guarantee.
>>> + */
>>> +#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
>>> +                         sizeof(struct sock_skb_cb)))
>>> +
>>> +#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
>>> +                         SOCK_SKB_CB_OFFSET))
>>> +
>>>  #define sock_skb_cb_check_size(size) \
>>> -     BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
>>> +     BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
>>>
>>
>> You didn't take care of aligning the 'sock_skb_cb'.
>> (Althogh in practive, dropcount is 4 and cb[] is 48 so you're golden).
>>
>> How about:
>>
>> struct sock_skb_cb {
>>         /* protocol families specifc CBs */
>>         u8 reserved[__SOCK_SKB_CB_OFFSET];
>>         struct __sock_skb_cb x;                 // name me better!
>> };
>>
>> Where '__SOCK_SKB_CB_OFFSET' and '__sock_skb_cb' are as follows:
>>
>> struct __sock_skb_cb {  // name me better!
>>         u32 dropcount;
>> };
>>
>> #define __SOCK_SKB_CB_OFFSET_UNALIGNED \
>>         (FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct __sock_skb_cb))
>>
>> #define __SOCK_SKB_CB_OFFSET \
>>         (__SOCK_SKB_CB_OFFSET_UNALIGNED - \
>>          (__SOCK_SKB_CB_OFFSET_UNALIGNED % __alignof__(struct __sock_skb_cb)))
>>
>> This also takes care for alignement of '__sock_skb_cb x' within the CB.
>>
>> Then,
>>
>> #define sock_skb_cb_check_size(size) \
>>         BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sock_skb_cb, reserved))
>>
>
> Agreed. Will add to v2.

After giving it some additional thought and research, I don't think
this is necessary.

The sizeof operator on struct sock_skb_cb would give a padded result
which would take care of the structure alignment.

This is under the assumption that skb->cb[] size is of a proper multiple (which
I think is rather safe).

Objections?
Eyal.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shmulik Ladkani Feb. 26, 2015, 3:32 p.m. UTC | #4
On Thu, 26 Feb 2015 12:48:14 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
> After giving it some additional thought and research, I don't think
> this is necessary.
> 
> The sizeof operator on struct sock_skb_cb would give a padded result
> which would take care of the structure alignment.
> 
> This is under the assumption that skb->cb[] size is of a proper multiple (which
> I think is rather safe).

Yes, you are correct.

With the assumption that skb->cb and 'cb' size are properly aligned and
sized (compile time asserted, btw?), you can simply:

struct your_thing {     // name it
	u32 dropcount;
};

struct sock_skb_cb {
	/* protocol families specifc CBs */
	u8 pf_reserved[PAD];
	struct your_thing sock_cb;
};

where:

#define PAD (FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct your_thing))

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eyal Birger Feb. 26, 2015, 6:52 p.m. UTC | #5
On Thu, Feb 26, 2015 at 5:32 PM, Shmulik Ladkani
<shmulik.ladkani@gmail.com> wrote:
> On Thu, 26 Feb 2015 12:48:14 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
>> After giving it some additional thought and research, I don't think
>> this is necessary.
>>
>> The sizeof operator on struct sock_skb_cb would give a padded result
>> which would take care of the structure alignment.
>>
>> This is under the assumption that skb->cb[] size is of a proper multiple (which
>> I think is rather safe).
>
> Yes, you are correct.
>
> With the assumption that skb->cb and 'cb' size are properly aligned and
> sized (compile time asserted, btw?), you can simply:
>
> struct your_thing {     // name it
>         u32 dropcount;
> };
>
> struct sock_skb_cb {
>         /* protocol families specifc CBs */
>         u8 pf_reserved[PAD];
>         struct your_thing sock_cb;
> };
>
> where:
>
> #define PAD (FIELD_SIZEOF(struct sk_buff, cb) - sizeof(struct your_thing))
>

struct sock_skb_cb as I see it represents the sock generic metadata
required in the skb->cb[] and its placement at the end of skb->cb[]
is an implementation detail - so it, in my view, is what you referred to as
"struct your_thing".

Coding-wise I would rather avoid the additional enclosing struct and stick
with the proposed implementation.

Eyal.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d898b32..bba1330 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -492,7 +492,6 @@  static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
- *	@dropcount: total number of sk_receive_queue overflows
  *	@vlan_proto: vlan encapsulation protocol
  *	@vlan_tci: vlan tag control information
  *	@inner_protocol: Protocol (encapsulation)
@@ -641,7 +640,6 @@  struct sk_buff {
 #endif
 	union {
 		__u32		mark;
-		__u32		dropcount;
 		__u32		reserved_tailroom;
 	};
 
diff --git a/include/net/sock.h b/include/net/sock.h
index d462f5e..8807586 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2078,12 +2078,26 @@  static inline int sock_intr_errno(long timeo)
 	return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
 }
 
+struct sock_skb_cb {
+	u32 dropcount;
+};
+
+/* Store sock_skb_cb at the end of skb->cb[] so protocol families
+ * using skb->cb[] would keep using it directly and utilize its
+ * alignement guarantee.
+ */
+#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
+			    sizeof(struct sock_skb_cb)))
+
+#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
+			    SOCK_SKB_CB_OFFSET))
+
 #define sock_skb_cb_check_size(size) \
-	BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
+	BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
 
 static inline void sock_skb_set_dropcount(struct sock *sk, struct sk_buff *skb)
 {
-	skb->dropcount = atomic_read(&sk->sk_drops);
+	SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
 }
 
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
diff --git a/net/socket.c b/net/socket.c
index bbedbfc..b78cf60 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -731,9 +731,9 @@  EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
 static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
 				   struct sk_buff *skb)
 {
-	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
+	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
 		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
-			sizeof(__u32), &skb->dropcount);
+			sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
 }
 
 void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,