diff mbox

[v2] netlink: align attributes on 64-bits

Message ID 20121218125735.GG27746@casper.infradead.org
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Thomas Graf Dec. 18, 2012, 12:57 p.m. UTC
On 12/17/12 at 05:49pm, Nicolas Dichtel wrote:
> @@ -492,6 +492,15 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
>   */
>  static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
>  {
> +	/* Because attributes may be aligned on 64-bits boundary with fake
> +	 * attribute (type 0, size 4 (attributes are 32-bits align by default)),
> +	 * an exact payload size cannot be calculated. Hence, we need to reserve
> +	 * more space for these attributes.
> +	 * 128 is arbitrary: it allows to align up to 32 attributes.
> +	 */
> +	if (payload < NLMSG_DEFAULT_SIZE)
> +		payload = min(payload + 128, (size_t)NLMSG_DEFAULT_SIZE);

This is doomed to fail eventually. A netlink message may carry
hundreds of attributes eventually. See my suggestion below.

> diff --git a/lib/nlattr.c b/lib/nlattr.c
> index 18eca78..7440a80 100644
> --- a/lib/nlattr.c
> +++ b/lib/nlattr.c
> @@ -450,9 +450,18 @@ EXPORT_SYMBOL(__nla_put_nohdr);
>   */
>  int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
>  {
> -	if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
> +	int align = IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8) ? 0 : 4;

This does not look right. In order for the attribute data to be
aligned properly you would need to check skb_tail_pointer(skb) +
NLA_HDRLEN for proper alignment or you end up aligning the
attribute header.

How about we change nla_total_size() to return the size with
needed padding taken into account. That should fix the message
size caluclation problem and we only need to reserve room for
the initial padding to align the very first attribute.

Below is an untested patch that does this. What do you think?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Nicolas Dichtel Dec. 18, 2012, 4:23 p.m. UTC | #1
Le 18/12/2012 13:57, Thomas Graf a écrit :
> On 12/17/12 at 05:49pm, Nicolas Dichtel wrote:
>> @@ -492,6 +492,15 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
>>    */
>>   static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
>>   {
>> +	/* Because attributes may be aligned on 64-bits boundary with fake
>> +	 * attribute (type 0, size 4 (attributes are 32-bits align by default)),
>> +	 * an exact payload size cannot be calculated. Hence, we need to reserve
>> +	 * more space for these attributes.
>> +	 * 128 is arbitrary: it allows to align up to 32 attributes.
>> +	 */
>> +	if (payload < NLMSG_DEFAULT_SIZE)
>> +		payload = min(payload + 128, (size_t)NLMSG_DEFAULT_SIZE);
>
> This is doomed to fail eventually. A netlink message may carry
> hundreds of attributes eventually. See my suggestion below.
>
>> diff --git a/lib/nlattr.c b/lib/nlattr.c
>> index 18eca78..7440a80 100644
>> --- a/lib/nlattr.c
>> +++ b/lib/nlattr.c
>> @@ -450,9 +450,18 @@ EXPORT_SYMBOL(__nla_put_nohdr);
>>    */
>>   int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
>>   {
>> -	if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
>> +	int align = IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8) ? 0 : 4;
>
> This does not look right. In order for the attribute data to be
> aligned properly you would need to check skb_tail_pointer(skb) +
> NLA_HDRLEN for proper alignment or you end up aligning the
> attribute header.
Good point.

>
> How about we change nla_total_size() to return the size with
> needed padding taken into account. That should fix the message
> size caluclation problem and we only need to reserve room for
> the initial padding to align the very first attribute.
>
> Below is an untested patch that does this. What do you think?
I still have some doubts about the size calculation (see bellow).
For the rest of the patch, it seems ok (except some minor point). I will test it.

>
> diff --git a/include/net/netlink.h b/include/net/netlink.h
> index 9690b0f..7ce8e76 100644
> --- a/include/net/netlink.h
> +++ b/include/net/netlink.h
> @@ -114,7 +114,6 @@
>    * Attribute Length Calculations:
>    *   nla_attr_size(payload)		length of attribute w/o padding
>    *   nla_total_size(payload)		length of attribute w/ padding
> - *   nla_padlen(payload)		length of padding
>    *
>    * Attribute Payload Access:
>    *   nla_data(nla)			head of attribute payload
> @@ -492,6 +491,14 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
>    */
>   static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
>   {
> +	/* If an exact size if specified, reserve some additional space to
> +	 * align the first attribute, all subsequent attributes should have
> +	 * padding accounted for.
> +	 */
> +	if (payload != NLMSG_DEFAULT_SIZE)
> +		payload = min_t(size_t, payload + NLA_ATTR_ALIGN,
> +				NLMSG_DEFAULT_SIZE);
> +
>   	return alloc_skb(nlmsg_total_size(payload), flags);
>   }
>
> @@ -653,16 +660,12 @@ static inline int nla_attr_size(int payload)
>    */
>   static inline int nla_total_size(int payload)
>   {
> -	return NLA_ALIGN(nla_attr_size(payload));
> -}
> +	size_t len = NLA_ALIGN(nla_attr_size(payload));
>
> -/**
> - * nla_padlen - length of padding at the tail of attribute
> - * @payload: length of payload
> - */
> -static inline int nla_padlen(int payload)
> -{
> -	return nla_total_size(payload) - nla_attr_size(payload);
> +	if (!IS_ALIGNED(len, NLA_ATTR_ALIGN))
> +		len = ALIGN(len + NLA_HDRLEN, NLA_ATTR_ALIGN);
Two comments:
1/ should it be ALIGN(len, NLA_ATTR_ALIGN)? If we want to add a __u64:
    => nla_attr_size(sizeof(__u64)) = 12
    => NLA_ALIGN(nla_attr_size(sizeof(__u64))) => 12 (= len)
    => ALIGN(len + NLA_HDRLEN, NLA_ATTR_ALIGN) = 0 but it should be 4

2/ Suppose that the attribute is:

   struct foo {
   	__u64 bar1;
   	__u32 bar2;
   }
   => sizeof(struct foo) = 12 (= payload)
   => nla_attr_size(payload) = 16
   => NLA_ALIGN(nla_attr_size(payload)) = 16 (= len)
   => IS_ALIGNED(len, NLA_ATTR_ALIGN) = true
   => extra room is not reserved
   But it's not guaranteed that bar1 is aligned on 8 bytes, only on 4 bytes.

> +
> +	return len;
>   }
>
>   /**
> diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
> index 78d5b8a..1856729 100644
> --- a/include/uapi/linux/netlink.h
> +++ b/include/uapi/linux/netlink.h
> @@ -149,5 +149,10 @@ struct nlattr {
>   #define NLA_ALIGN(len)		(((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
>   #define NLA_HDRLEN		((int) NLA_ALIGN(sizeof(struct nlattr)))
>
> +/* Padding attribute type, added automatically to align attributes,
> + * must be ignored by readers. */
> +#define NLA_PADDING		0
> +#define NLA_ATTR_ALIGN		8
> +
>
>   #endif /* _UAPI__LINUX_NETLINK_H */
> diff --git a/lib/nlattr.c b/lib/nlattr.c
> index 18eca78..b09473c 100644
> --- a/lib/nlattr.c
> +++ b/lib/nlattr.c
> @@ -322,18 +322,36 @@ int nla_strcmp(const struct nlattr *nla, const char *str)
>    * Adds a netlink attribute header to a socket buffer and reserves
>    * room for the payload but does not copy it.
>    *
> + * May add a padding attribute of type NLA_PADDING before the
> + * real attribute to ensure proper alignment.
> + *
>    * The caller is responsible to ensure that the skb provides enough
>    * tailroom for the attribute header and payload.
>    */
>   struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
>   {
>   	struct nlattr *nla;
> +	size_t offset;
> +
> +	offset = (size_t) skb_tail_pointer(skb);
> +	if (!IS_ALIGNED(offset + NLA_HDRLEN, NLA_ATTR_ALIGN)) {
With the previous struct foo, this test may be true even if we don't have 
reserved extra room. This test depends on previous attribute.
I think the exact size of the netlink message depends on the order of 
attributes, not only on the attribute itself.
What about taking the assumption that the start will never be aligned and always 
allocating extra room: ALIGN(NLA_ALIGNTO, NLA_ATTR_ALIGN) (= 4)?

> +		struct nlattr *pad;
> +		size_t padlen;
> +
> +		padlen = nla_total_size(offset) - offset - NLA_HDRLEN;
> +		pad = (struct nlattr *) skb_put(skb, nla_attr_size(padlen));
> +		pad->nla_type = 0;
> +		pad->nla_len = nla_attr_size(padlen);
> +
> +		memset((unsigned char *) pad + NLA_HDRLEN, 0, padlen);
> +	}
>
> -	nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen));
> +	nla = (struct nlattr *) skb_put(skb, NLA_ALIGN(nla_attr_size(attrlen)));
>   	nla->nla_type = attrtype;
>   	nla->nla_len = nla_attr_size(attrlen);
>
> -	memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));
> +	memset((unsigned char *) nla + nla->nla_len, 0,
> +	       NLA_ALIGN(nla->nla_len) - nla->nla_len);
>
>   	return nla;
>   }
> @@ -360,6 +378,23 @@ void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
>   }
>   EXPORT_SYMBOL(__nla_reserve_nohdr);
>
> +static size_t nla_pre_padlen(struct sk_buff *skb)
> +{
> +	size_t offset = (size_t) skb_tail_pointer(skb);
> +
> +	if (!IS_ALIGNED(offset + NLA_HDRLEN, NLA_ATTR_ALIGN))
> +		return nla_total_size(offset) - offset - NLA_HDRLEN;
> +
> +	return 0;
> +}
> +
> +static bool nla_insufficient_space(struct sk_buff *skb, int attrlen)
> +{
> +	size_t needed = nla_pre_padlen(skb) + nla_total_size(attrlen);
If nla_total_size() was right, nla_pre_padlen(skb) should already be included. 
Am I wrong?

> +
> +	return (skb_tailroom(skb) < needed);
> +}
> +
>   /**
>    * nla_reserve - reserve room for attribute on the skb
>    * @skb: socket buffer to reserve room on
> @@ -374,7 +409,7 @@ EXPORT_SYMBOL(__nla_reserve_nohdr);
>    */
>   struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
>   {
> -	if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
> +	if (unlikely(nla_insufficient_space(skb, attrlen)))
>   		return NULL;
>
>   	return __nla_reserve(skb, attrtype, attrlen);
> @@ -450,7 +485,7 @@ EXPORT_SYMBOL(__nla_put_nohdr);
>    */
>   int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
>   {
> -	if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
> +	if (unlikely(nla_insufficient_space(skb, attrlen)))
>   		return -EMSGSIZE;
>
>   	__nla_put(skb, attrtype, attrlen, data);
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Laight Dec. 18, 2012, 4:50 p.m. UTC | #2
> 2/ Suppose that the attribute is:
> 
>    struct foo {
>    	__u64 bar1;
>    	__u32 bar2;
>    }
>    => sizeof(struct foo) = 12 (= payload)

That is only true if the host architecture aligns 64bit items
on 32 it boundaries (as i386 does).
Otherwise there are 4 bytes of padding at the end and the
size is 16.

Actually it is worse than that.
Consider the structure:
	struct bar {
		__u32 foo1;
		__u64 foo2;
	}
On i386 it will have size 12 and foo2 will be at offset 4.
On sparc32 (and most 64bit) it will have size 16 with foo2
at offset 8 (and 4 bytes of pad after foo1).

Do these messages move between systems?
If they do then any 64bit items need an explicit alignment
eg tag with __attribute__((aligned(8))) (or aligned(4)).

	David



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Graf Dec. 18, 2012, 5:08 p.m. UTC | #3
On 12/18/12 at 05:23pm, Nicolas Dichtel wrote:
> Le 18/12/2012 13:57, Thomas Graf a écrit :
> >-static inline int nla_padlen(int payload)
> >-{
> >-	return nla_total_size(payload) - nla_attr_size(payload);
> >+	if (!IS_ALIGNED(len, NLA_ATTR_ALIGN))
> >+		len = ALIGN(len + NLA_HDRLEN, NLA_ATTR_ALIGN);
> Two comments:
> 1/ should it be ALIGN(len, NLA_ATTR_ALIGN)? If we want to add a __u64:
>    => nla_attr_size(sizeof(__u64)) = 12
>    => NLA_ALIGN(nla_attr_size(sizeof(__u64))) => 12 (= len)
>    => ALIGN(len + NLA_HDRLEN, NLA_ATTR_ALIGN) = 0 but it should be 4

We can't add 1-3 bytes of padding, therefore we need to add
NLA_HDRLEN to len before aligning it to enforce a minimal
padding. We can't hit it right now because 4 byte alignment
of the previous attribute is a given but if we ever change
the alignment it could become an issue and the above should
be bullet proof.

Your example would come out like this:
  nla_attr_size(8) = 12
  ALIGN(12 + 4, 8) = 16

> 2/ Suppose that the attribute is:
> 
>   struct foo {
>   	__u64 bar1;
>   	__u32 bar2;
>   }
>   => sizeof(struct foo) = 12 (= payload)
>   => nla_attr_size(payload) = 16
>   => NLA_ALIGN(nla_attr_size(payload)) = 16 (= len)
>   => IS_ALIGNED(len, NLA_ATTR_ALIGN) = true
>   => extra room is not reserved
>   But it's not guaranteed that bar1 is aligned on 8 bytes, only on 4 bytes.

That's correct, that's why I have added the additional
NLA_ATTR_ALIGN of room in nlmsg_new(). It will account
for the one time padding that is needed before we add
the very first attribute.

If all attributes after that have a size aligned to 8
bytes no padding is needed. Padding will only be needed
again if a struct is missized in which case we reserve
room with the above. Correct?

> >+	offset = (size_t) skb_tail_pointer(skb);
> >+	if (!IS_ALIGNED(offset + NLA_HDRLEN, NLA_ATTR_ALIGN)) {
> With the previous struct foo, this test may be true even if we don't
> have reserved extra room. This test depends on previous attribute.
> I think the exact size of the netlink message depends on the order
> of attributes, not only on the attribute itself.
> What about taking the assumption that the start will never be
> aligned and always allocating extra room: ALIGN(NLA_ALIGNTO,
> NLA_ATTR_ALIGN) (= 4)?

See my explanation above. I think this works. The order does not
matter, the sum of all padding required will always be the same.

> >+static bool nla_insufficient_space(struct sk_buff *skb, int attrlen)
> >+{
> >+	size_t needed = nla_pre_padlen(skb) + nla_total_size(attrlen);
> If nla_total_size() was right, nla_pre_padlen(skb) should already be
> included. Am I wrong?

No, nla_pre_padlen() contains the number of bytes needed to align
skb_tail_pointer() to an alignment of 8. If that is > 0 but the
attribute to follow is already aligned.

The tricky part here is that accounting for padding in
nla_total_size() only works for the sum of all attributes.
It does not account for the specific padding required for the
previous attribute.

Therefore the above check. The above could be changed to
nla_attr_size() theoretically as we don't need space for the
final padding eventually but we checked for space before so I
kept it that way.

I realize it's slightly confusign and needs better documentation
and please double check my thinking :-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Graf Dec. 18, 2012, 5:11 p.m. UTC | #4
On 12/18/12 at 04:50pm, David Laight wrote:
> > 2/ Suppose that the attribute is:
> > 
> >    struct foo {
> >    	__u64 bar1;
> >    	__u32 bar2;
> >    }
> >    => sizeof(struct foo) = 12 (= payload)
> 
> That is only true if the host architecture aligns 64bit items
> on 32 it boundaries (as i386 does).
> Otherwise there are 4 bytes of padding at the end and the
> size is 16.
> 
> Actually it is worse than that.
> Consider the structure:
> 	struct bar {
> 		__u32 foo1;
> 		__u64 foo2;
> 	}
> On i386 it will have size 12 and foo2 will be at offset 4.
> On sparc32 (and most 64bit) it will have size 16 with foo2
> at offset 8 (and 4 bytes of pad after foo1).

This is a known problem and I can't think of anything
that can be done about it except for memcpy()ing the
data before accessing it.

If you have ideas, I'm more that willing to listen :)

> Do these messages move between systems?
> If they do then any 64bit items need an explicit alignment
> eg tag with __attribute__((aligned(8))) (or aligned(4)).

They don't. Netlink has and will be host bound. It also
uses host byte order for that reason.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas Dichtel Dec. 18, 2012, 10:07 p.m. UTC | #5
Le 18/12/2012 18:08, Thomas Graf a écrit :
> On 12/18/12 at 05:23pm, Nicolas Dichtel wrote:
>> Le 18/12/2012 13:57, Thomas Graf a écrit :
>>> -static inline int nla_padlen(int payload)
>>> -{
>>> -	return nla_total_size(payload) - nla_attr_size(payload);
>>> +	if (!IS_ALIGNED(len, NLA_ATTR_ALIGN))
>>> +		len = ALIGN(len + NLA_HDRLEN, NLA_ATTR_ALIGN);
>> Two comments:
>> 1/ should it be ALIGN(len, NLA_ATTR_ALIGN)? If we want to add a __u64:
>>     => nla_attr_size(sizeof(__u64)) = 12
>>     => NLA_ALIGN(nla_attr_size(sizeof(__u64))) => 12 (= len)
>>     => ALIGN(len + NLA_HDRLEN, NLA_ATTR_ALIGN) = 0 but it should be 4
>
> We can't add 1-3 bytes of padding, therefore we need to add
> NLA_HDRLEN to len before aligning it to enforce a minimal
> padding. We can't hit it right now because 4 byte alignment
> of the previous attribute is a given but if we ever change
> the alignment it could become an issue and the above should
> be bullet proof.
>
> Your example would come out like this:
>    nla_attr_size(8) = 12
>    ALIGN(12 + 4, 8) = 16
Got it, right.

>
>> 2/ Suppose that the attribute is:
>>
>>    struct foo {
>>    	__u64 bar1;
>>    	__u32 bar2;
>>    }
>>    => sizeof(struct foo) = 12 (= payload)
>>    => nla_attr_size(payload) = 16
>>    => NLA_ALIGN(nla_attr_size(payload)) = 16 (= len)
>>    => IS_ALIGNED(len, NLA_ATTR_ALIGN) = true
>>    => extra room is not reserved
>>    But it's not guaranteed that bar1 is aligned on 8 bytes, only on 4 bytes.
>
> That's correct, that's why I have added the additional
> NLA_ATTR_ALIGN of room in nlmsg_new(). It will account
> for the one time padding that is needed before we add
> the very first attribute.
>
> If all attributes after that have a size aligned to 8
> bytes no padding is needed. Padding will only be needed
> again if a struct is missized in which case we reserve
> room with the above. Correct?
Seems good ;-)

>
>>> +	offset = (size_t) skb_tail_pointer(skb);
>>> +	if (!IS_ALIGNED(offset + NLA_HDRLEN, NLA_ATTR_ALIGN)) {
>> With the previous struct foo, this test may be true even if we don't
>> have reserved extra room. This test depends on previous attribute.
>> I think the exact size of the netlink message depends on the order
>> of attributes, not only on the attribute itself.
>> What about taking the assumption that the start will never be
>> aligned and always allocating extra room: ALIGN(NLA_ALIGNTO,
>> NLA_ATTR_ALIGN) (= 4)?
>
> See my explanation above. I think this works. The order does not
> matter, the sum of all padding required will always be the same.
I will do more test.

>
>>> +static bool nla_insufficient_space(struct sk_buff *skb, int attrlen)
>>> +{
>>> +	size_t needed = nla_pre_padlen(skb) + nla_total_size(attrlen);
>> If nla_total_size() was right, nla_pre_padlen(skb) should already be
>> included. Am I wrong?
>
> No, nla_pre_padlen() contains the number of bytes needed to align
> skb_tail_pointer() to an alignment of 8. If that is > 0 but the
> attribute to follow is already aligned.
>
> The tricky part here is that accounting for padding in
> nla_total_size() only works for the sum of all attributes.
> It does not account for the specific padding required for the
> previous attribute.
>
> Therefore the above check. The above could be changed to
> nla_attr_size() theoretically as we don't need space for the
> final padding eventually but we checked for space before so I
> kept it that way.
>
> I realize it's slightly confusign and needs better documentation
> and please double check my thinking :-)
Ok, you convince me ;-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Laight Dec. 19, 2012, 9:17 a.m. UTC | #6
> > Consider the structure:
> > 	struct bar {
> > 		__u32 foo1;
> > 		__u64 foo2;
> > 	}
> > On i386 it will have size 12 and foo2 will be at offset 4.
> > On sparc32 (and most 64bit) it will have size 16 with foo2
> > at offset 8 (and 4 bytes of pad after foo1).
> 
> This is a known problem and I can't think of anything
> that can be done about it except for memcpy()ing the
> data before accessing it.

You can't use memcpy() to copy a pointer to a misaligned
structure into an aligned buffer. The compiler assumes
the pointer is aligned and will use instructions that
depend on the alignment.

> If you have ideas, I'm more that willing to listen :)
> ... Netlink has and will be host bound. It also
> uses host byte order for that reason.

I think:
1) Alignment is only needed on systems that have 'strict alignment'
   requirements (maybe disable for testing?)
2) Alignment is only needed for parameters whose size is a
   multiple of the alignment (a structure containing a
   field that needs 8 byte alignment will always be a multiple
   of 8 bytes long).
3) You need to add NA_HDR_LEN to the write pointer before
   determining the size of the pad.

So a structure of three uint32_t will never need aligning.

	David



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Graf Dec. 19, 2012, 5:20 p.m. UTC | #7
On 12/19/12 at 09:17am, David Laight wrote:
> You can't use memcpy() to copy a pointer to a misaligned
> structure into an aligned buffer. The compiler assumes
> the pointer is aligned and will use instructions that
> depend on the alignment.

I am not sure I understand this correctly. Are you saying
that the following does not work on i386?

struct foo {
  uint32_t a;
  uint64_t b;
};

struct foo buf;

memcpy(&buf, nla_data(attr), nla_len(attr));
printf([...], buf.b);


> I think:
> 1) Alignment is only needed on systems that have 'strict alignment'
>    requirements (maybe disable for testing?)

Right, what about mixed 32bit/64bit environments?

> 2) Alignment is only needed for parameters whose size is a
>    multiple of the alignment (a structure containing a
>    field that needs 8 byte alignment will always be a multiple
>    of 8 bytes long).

Good point. I'll fix this in the next iteration of the patch.

> 3) You need to add NA_HDR_LEN to the write pointer before
>    determining the size of the pad.

Right, I'm doing this in the patch I proposed. Or are you referring
to something else?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Laight Dec. 20, 2012, 9:37 a.m. UTC | #8
> On 12/19/12 at 09:17am, David Laight wrote:
> > You can't use memcpy() to copy a pointer to a misaligned
> > structure into an aligned buffer. The compiler assumes
> > the pointer is aligned and will use instructions that
> > depend on the alignment.
> 
> I am not sure I understand this correctly. Are you saying
> that the following does not work on i386?
> 
> struct foo {
>   uint32_t a;
>   uint64_t b;
> };
> 
> struct foo buf;
> 
> memcpy(&buf, nla_data(attr), nla_len(attr));
> printf([...], buf.b);

That will be fine on all systems.
But if, instead, you have:
	struct foo buf, *bufp;
	bufp = nla_data(attr);
	memcpy(&buf, bufp, sizeof buf);
The compiler is allowed to assume that 'bufp' is aligned,
so the copy will be done using 64bit accesses.

(Basically because all you are allowed to do with 'void *'
is cast a point to 'void *', then back to its original type.
So when you cast back from 'void *' the pointer can be assumed
to be aligned.)

This will fault on systems that require strict alignment
of 64bit items.

	David



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Laight Dec. 20, 2012, 9:40 a.m. UTC | #9
> > I think:
> > 1) Alignment is only needed on systems that have 'strict alignment'
> >    requirements (maybe disable for testing?)
> 
> Right, what about mixed 32bit/64bit environments?

Support for i386 user binaries on amd64 kernels
is an entirely different problem!
That, typically, requires the kernel to know that
the application is 32bit and use separate structures
where the 64bit items have the aligned(32) attribute.

	David



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 9690b0f..7ce8e76 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -114,7 +114,6 @@ 
  * Attribute Length Calculations:
  *   nla_attr_size(payload)		length of attribute w/o padding
  *   nla_total_size(payload)		length of attribute w/ padding
- *   nla_padlen(payload)		length of padding
  *
  * Attribute Payload Access:
  *   nla_data(nla)			head of attribute payload
@@ -492,6 +491,14 @@  static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
  */
 static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
 {
+	/* If an exact size if specified, reserve some additional space to
+	 * align the first attribute, all subsequent attributes should have
+	 * padding accounted for.
+	 */
+	if (payload != NLMSG_DEFAULT_SIZE)
+		payload = min_t(size_t, payload + NLA_ATTR_ALIGN,
+				NLMSG_DEFAULT_SIZE);
+
 	return alloc_skb(nlmsg_total_size(payload), flags);
 }
 
@@ -653,16 +660,12 @@  static inline int nla_attr_size(int payload)
  */
 static inline int nla_total_size(int payload)
 {
-	return NLA_ALIGN(nla_attr_size(payload));
-}
+	size_t len = NLA_ALIGN(nla_attr_size(payload));
 
-/**
- * nla_padlen - length of padding at the tail of attribute
- * @payload: length of payload
- */
-static inline int nla_padlen(int payload)
-{
-	return nla_total_size(payload) - nla_attr_size(payload);
+	if (!IS_ALIGNED(len, NLA_ATTR_ALIGN))
+		len = ALIGN(len + NLA_HDRLEN, NLA_ATTR_ALIGN);
+
+	return len;
 }
 
 /**
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 78d5b8a..1856729 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -149,5 +149,10 @@  struct nlattr {
 #define NLA_ALIGN(len)		(((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
 #define NLA_HDRLEN		((int) NLA_ALIGN(sizeof(struct nlattr)))
 
+/* Padding attribute type, added automatically to align attributes,
+ * must be ignored by readers. */
+#define NLA_PADDING		0
+#define NLA_ATTR_ALIGN		8
+
 
 #endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 18eca78..b09473c 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -322,18 +322,36 @@  int nla_strcmp(const struct nlattr *nla, const char *str)
  * Adds a netlink attribute header to a socket buffer and reserves
  * room for the payload but does not copy it.
  *
+ * May add a padding attribute of type NLA_PADDING before the
+ * real attribute to ensure proper alignment.
+ *
  * The caller is responsible to ensure that the skb provides enough
  * tailroom for the attribute header and payload.
  */
 struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
 {
 	struct nlattr *nla;
+	size_t offset;
+
+	offset = (size_t) skb_tail_pointer(skb);
+	if (!IS_ALIGNED(offset + NLA_HDRLEN, NLA_ATTR_ALIGN)) {
+		struct nlattr *pad;
+		size_t padlen;
+
+		padlen = nla_total_size(offset) - offset - NLA_HDRLEN;
+		pad = (struct nlattr *) skb_put(skb, nla_attr_size(padlen));
+		pad->nla_type = 0;
+		pad->nla_len = nla_attr_size(padlen);
+
+		memset((unsigned char *) pad + NLA_HDRLEN, 0, padlen);
+	}
 
-	nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen));
+	nla = (struct nlattr *) skb_put(skb, NLA_ALIGN(nla_attr_size(attrlen)));
 	nla->nla_type = attrtype;
 	nla->nla_len = nla_attr_size(attrlen);
 
-	memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));
+	memset((unsigned char *) nla + nla->nla_len, 0,
+	       NLA_ALIGN(nla->nla_len) - nla->nla_len);
 
 	return nla;
 }
@@ -360,6 +378,23 @@  void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
 }
 EXPORT_SYMBOL(__nla_reserve_nohdr);
 
+static size_t nla_pre_padlen(struct sk_buff *skb)
+{
+	size_t offset = (size_t) skb_tail_pointer(skb);
+
+	if (!IS_ALIGNED(offset + NLA_HDRLEN, NLA_ATTR_ALIGN))
+		return nla_total_size(offset) - offset - NLA_HDRLEN;
+
+	return 0;
+}
+
+static bool nla_insufficient_space(struct sk_buff *skb, int attrlen)
+{
+	size_t needed = nla_pre_padlen(skb) + nla_total_size(attrlen);
+
+	return (skb_tailroom(skb) < needed);
+}
+
 /**
  * nla_reserve - reserve room for attribute on the skb
  * @skb: socket buffer to reserve room on
@@ -374,7 +409,7 @@  EXPORT_SYMBOL(__nla_reserve_nohdr);
  */
 struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
 {
-	if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
+	if (unlikely(nla_insufficient_space(skb, attrlen)))
 		return NULL;
 
 	return __nla_reserve(skb, attrtype, attrlen);
@@ -450,7 +485,7 @@  EXPORT_SYMBOL(__nla_put_nohdr);
  */
 int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
 {
-	if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
+	if (unlikely(nla_insufficient_space(skb, attrlen)))
 		return -EMSGSIZE;
 
 	__nla_put(skb, attrtype, attrlen, data);