diff mbox

[net-next,3/6] flow_dissector: Add hash_extra field to flow_keys struct

Message ID 1425093109-1077-4-git-send-email-therbert@google.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Tom Herbert Feb. 28, 2015, 3:11 a.m. UTC
This will be used for additional input into the hash computation
such as VLAN ID or GRE keyid.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/net/flow_keys.h   | 1 +
 include/net/sch_generic.h | 2 +-
 net/core/flow_dissector.c | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

Comments

Eric Dumazet Feb. 28, 2015, 7:37 a.m. UTC | #1
On Fri, 2015-02-27 at 19:11 -0800, Tom Herbert wrote:

> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index c605d30..d41a034 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -252,7 +252,7 @@ struct qdisc_skb_cb {
>  	unsigned int		pkt_len;
>  	u16			slave_dev_queue_mapping;
>  	u16			_pad;
> -#define QDISC_CB_PRIV_LEN 20
> +#define QDISC_CB_PRIV_LEN 24
>  	unsigned char		data[QDISC_CB_PRIV_LEN];
>  };
>  

This change breaks kernel build : We already are at the cb[] limit.

Please check commit 257117862634d89de33fec74858b1a0ba5ab444b
("net: sched: shrink struct qdisc_skb_cb to 28 bytes")


                 from drivers/infiniband/ulp/ipoib/ipoib_main.c:35:
In function ‘ipoib_skb_cb’,
    inlined from ‘ipoib_hard_header’ at drivers/infiniband/ulp/ipoib/ipoib_main.c:816:19:
include/linux/compiler.h:424:20: error: call to ‘__compiletime_assert_136’ declared with attribute error: BUILD_BUG_ON failed: sizeof(skb->cb) < sizeof(struct ipoib_cb)
    prefix ## suffix();    \
                    ^
include/linux/compiler.h:429:2: note: in expansion of macro ‘__compiletime_assert’
  __compiletime_assert(condition, msg, prefix, suffix)
  ^
include/linux/compiler.h:441:2: note: in expansion of macro ‘_compiletime_assert’
  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
  ^
include/linux/bug.h:50:37: note: in expansion of macro ‘compiletime_assert’
 #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                     ^
include/linux/bug.h:74:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’
  BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
  ^
drivers/infiniband/ulp/ipoib/ipoib.h:136:2: note: in expansion of macro ‘BUILD_BUG_ON’
  BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb));
  ^
  CC      drivers/parport/ieee1284.o
  CC      drivers/net/arcnet/arc-rawmode.o
  CC      drivers/misc/c2port/core.o
  CC      drivers/mfd/rtsx_pcr.o
In function ‘ipoib_skb_cb’,
    inlined from ‘ipoib_start_xmit’ at drivers/infiniband/ulp/ipoib/ipoib_main.c:719:19:
include/linux/compiler.h:424:20: error: call to ‘__compiletime_assert_136’ declared with attribute error: BUILD_BUG_ON failed: sizeof(skb->cb) < sizeof(struct ipoib_cb)
    prefix ## suffix();    \
                    ^
include/linux/compiler.h:429:2: note: in expansion of macro ‘__compiletime_assert’
  __compiletime_assert(condition, msg, prefix, suffix)
  ^
include/linux/compiler.h:441:2: note: in expansion of macro ‘_compiletime_assert’
  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
  ^
include/linux/bug.h:50:37: note: in expansion of macro ‘compiletime_assert’
 #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                     ^
include/linux/bug.h:74:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’
  BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
  ^
drivers/infiniband/ulp/ipoib/ipoib.h:136:2: note: in expansion of macro ‘BUILD_BUG_ON’
  BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb));
  ^
make[4]: *** [drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1
make[3]: *** [drivers/infiniband/ulp/ipoib] Error 2
make[2]: *** [drivers/infiniband/ulp] Error 2
make[1]: *** [drivers/infiniband] Error 2
make[1]: *** Waiting for unfinished jobs....


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Westphal Feb. 28, 2015, 8:31 p.m. UTC | #2
Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Fri, 2015-02-27 at 19:11 -0800, Tom Herbert wrote:
> 
> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> > index c605d30..d41a034 100644
> > --- a/include/net/sch_generic.h
> > +++ b/include/net/sch_generic.h
> > @@ -252,7 +252,7 @@ struct qdisc_skb_cb {
> >  	unsigned int		pkt_len;
> >  	u16			slave_dev_queue_mapping;
> >  	u16			_pad;
> > -#define QDISC_CB_PRIV_LEN 20
> > +#define QDISC_CB_PRIV_LEN 24
> >  	unsigned char		data[QDISC_CB_PRIV_LEN];
> >  };
> >  
> 
> This change breaks kernel build : We already are at the cb[] limit.
> 
> Please check commit 257117862634d89de33fec74858b1a0ba5ab444b
> ("net: sched: shrink struct qdisc_skb_cb to 28 bytes")

I've been toying around with reducing skb->cb[] to 44 bytes,
Seems Tom could integrate following patch from my test branch:

http://git.breakpoint.cc/cgit/fw/net-next.git/commit/?h=skb_cb_44_01&id=29d711e1a71244b71940c2d1e346500bef4d6670

It makes sfq use a smaller flow key state.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Taht Feb. 28, 2015, 8:46 p.m. UTC | #3
On Sat, Feb 28, 2015 at 12:31 PM, Florian Westphal <fw@strlen.de> wrote:
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> On Fri, 2015-02-27 at 19:11 -0800, Tom Herbert wrote:
>>
>> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>> > index c605d30..d41a034 100644
>> > --- a/include/net/sch_generic.h
>> > +++ b/include/net/sch_generic.h
>> > @@ -252,7 +252,7 @@ struct qdisc_skb_cb {
>> >     unsigned int            pkt_len;
>> >     u16                     slave_dev_queue_mapping;
>> >     u16                     _pad;
>> > -#define QDISC_CB_PRIV_LEN 20
>> > +#define QDISC_CB_PRIV_LEN 24
>> >     unsigned char           data[QDISC_CB_PRIV_LEN];
>> >  };
>> >
>>
>> This change breaks kernel build : We already are at the cb[] limit.
>>
>> Please check commit 257117862634d89de33fec74858b1a0ba5ab444b
>> ("net: sched: shrink struct qdisc_skb_cb to 28 bytes")
>
> I've been toying around with reducing skb->cb[] to 44 bytes,
> Seems Tom could integrate following patch from my test branch:
>
> http://git.breakpoint.cc/cgit/fw/net-next.git/commit/?h=skb_cb_44_01&id=29d711e1a71244b71940c2d1e346500bef4d6670
>
> It makes sfq use a smaller flow key state.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

My concern with all this work is that you are possibly not looking at
the quality of the hash
as the number of queues goes down, or the effects of adding in all
this extra stuff
to the hash is, in cases where they don't exist, or are not very random.

The default, in fq_codel is 1024 queues, and that worked pretty good
in monty carlo simulations, but I have always felt it could be better
after we measured more real traffic - there is not a lot of
information in the proto field in real traffic, and - although it has
been improved - the ipv6 hash was kind of weak originally and a little
odd now.

As some are attempting to deploy these hashes with 64, 32 and even 8
queues, I would hope that someone (and I can if I get the time) would
look closely at avalanche effects down to these last few bits.

http://en.wikipedia.org/wiki/Avalanche_effect
Tom Herbert March 1, 2015, 5:55 p.m. UTC | #4
On Sat, Feb 28, 2015 at 12:31 PM, Florian Westphal <fw@strlen.de> wrote:
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> On Fri, 2015-02-27 at 19:11 -0800, Tom Herbert wrote:
>>
>> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>> > index c605d30..d41a034 100644
>> > --- a/include/net/sch_generic.h
>> > +++ b/include/net/sch_generic.h
>> > @@ -252,7 +252,7 @@ struct qdisc_skb_cb {
>> >     unsigned int            pkt_len;
>> >     u16                     slave_dev_queue_mapping;
>> >     u16                     _pad;
>> > -#define QDISC_CB_PRIV_LEN 20
>> > +#define QDISC_CB_PRIV_LEN 24
>> >     unsigned char           data[QDISC_CB_PRIV_LEN];
>> >  };
>> >
>>
>> This change breaks kernel build : We already are at the cb[] limit.
>>
>> Please check commit 257117862634d89de33fec74858b1a0ba5ab444b
>> ("net: sched: shrink struct qdisc_skb_cb to 28 bytes")
>
> I've been toying around with reducing skb->cb[] to 44 bytes,
> Seems Tom could integrate following patch from my test branch:
>
> http://git.breakpoint.cc/cgit/fw/net-next.git/commit/?h=skb_cb_44_01&id=29d711e1a71244b71940c2d1e346500bef4d6670
>
> It makes sfq use a smaller flow key state.

Alternatively, I think we might be able to eliminate the use of
flow_keys and flow_dissect from the qdisc code altogether. It looks
like this is only being used to determine a hash over the addresses,
ports, and protocol so I am thinking that we can just call
skb_get_hash for that. Will try to post some patches soon.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert March 1, 2015, 6:16 p.m. UTC | #5
On Sat, Feb 28, 2015 at 12:46 PM, Dave Taht <dave.taht@gmail.com> wrote:
> On Sat, Feb 28, 2015 at 12:31 PM, Florian Westphal <fw@strlen.de> wrote:
>> Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>> On Fri, 2015-02-27 at 19:11 -0800, Tom Herbert wrote:
>>>
>>> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>>> > index c605d30..d41a034 100644
>>> > --- a/include/net/sch_generic.h
>>> > +++ b/include/net/sch_generic.h
>>> > @@ -252,7 +252,7 @@ struct qdisc_skb_cb {
>>> >     unsigned int            pkt_len;
>>> >     u16                     slave_dev_queue_mapping;
>>> >     u16                     _pad;
>>> > -#define QDISC_CB_PRIV_LEN 20
>>> > +#define QDISC_CB_PRIV_LEN 24
>>> >     unsigned char           data[QDISC_CB_PRIV_LEN];
>>> >  };
>>> >
>>>
>>> This change breaks kernel build : We already are at the cb[] limit.
>>>
>>> Please check commit 257117862634d89de33fec74858b1a0ba5ab444b
>>> ("net: sched: shrink struct qdisc_skb_cb to 28 bytes")
>>
>> I've been toying around with reducing skb->cb[] to 44 bytes,
>> Seems Tom could integrate following patch from my test branch:
>>
>> http://git.breakpoint.cc/cgit/fw/net-next.git/commit/?h=skb_cb_44_01&id=29d711e1a71244b71940c2d1e346500bef4d6670
>>
>> It makes sfq use a smaller flow key state.
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> My concern with all this work is that you are possibly not looking at
> the quality of the hash
> as the number of queues goes down, or the effects of adding in all
> this extra stuff
> to the hash is, in cases where they don't exist, or are not very random.
>
> The default, in fq_codel is 1024 queues, and that worked pretty good
> in monty carlo simulations, but I have always felt it could be better
> after we measured more real traffic - there is not a lot of
> information in the proto field in real traffic, and - although it has
> been improved - the ipv6 hash was kind of weak originally and a little
> odd now.
>
> As some are attempting to deploy these hashes with 64, 32 and even 8
> queues, I would hope that someone (and I can if I get the time) would
> look closely at avalanche effects down to these last few bits.
>
> http://en.wikipedia.org/wiki/Avalanche_effect
>
We are only increasing the input to the hash function by XOR not
reducing, so it seems unlikely this could result in less entropy. In
worse case extra input would might have no effect. As for the
avalanche effect that is more dependent on the hash function itself.
In the kernel we are using Jenkin's hash for such things, and there's
a nice graphical representation for the avalanche effect in the
wikipedia page:

http://en.wikipedia.org/wiki/Jenkins_hash_function


> --
> Dave Täht
> Let's make wifi fast, less jittery and reliable again!
>
> https://plus.google.com/u/0/107942175615993706558/posts/TVX3o84jjmb
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Westphal March 1, 2015, 6:24 p.m. UTC | #6
Tom Herbert <therbert@google.com> wrote:
> On Sat, Feb 28, 2015 at 12:31 PM, Florian Westphal <fw@strlen.de> wrote:
> > I've been toying around with reducing skb->cb[] to 44 bytes,
> > Seems Tom could integrate following patch from my test branch:
> >
> > http://git.breakpoint.cc/cgit/fw/net-next.git/commit/?h=skb_cb_44_01&id=29d711e1a71244b71940c2d1e346500bef4d6670
> >
> > It makes sfq use a smaller flow key state.
> 
> Alternatively, I think we might be able to eliminate the use of
> flow_keys and flow_dissect from the qdisc code altogether. It looks
> like this is only being used to determine a hash over the addresses,
> ports, and protocol so I am thinking that we can just call
> skb_get_hash for that. Will try to post some patches soon.

The problem with this is that you'll lose the secret input to jhash
in sfq_hash().

assuming you have packets p1 and p2 (from different flows)
with skb_get_hash(p1) == skb_get_hash(p2) those flows share same
queue/bin forever as the hash pertubation will no longer work.

For sfq, hash collisions may exist as well but they'll be resolved
after some time when q->perturbation (its part of hash input) is reseeded.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert March 1, 2015, 7:17 p.m. UTC | #7
On Sun, Mar 1, 2015 at 10:24 AM, Florian Westphal <fw@strlen.de> wrote:
> Tom Herbert <therbert@google.com> wrote:
>> On Sat, Feb 28, 2015 at 12:31 PM, Florian Westphal <fw@strlen.de> wrote:
>> > I've been toying around with reducing skb->cb[] to 44 bytes,
>> > Seems Tom could integrate following patch from my test branch:
>> >
>> > http://git.breakpoint.cc/cgit/fw/net-next.git/commit/?h=skb_cb_44_01&id=29d711e1a71244b71940c2d1e346500bef4d6670
>> >
>> > It makes sfq use a smaller flow key state.
>>
>> Alternatively, I think we might be able to eliminate the use of
>> flow_keys and flow_dissect from the qdisc code altogether. It looks
>> like this is only being used to determine a hash over the addresses,
>> ports, and protocol so I am thinking that we can just call
>> skb_get_hash for that. Will try to post some patches soon.
>
> The problem with this is that you'll lose the secret input to jhash
> in sfq_hash().
>
> assuming you have packets p1 and p2 (from different flows)
> with skb_get_hash(p1) == skb_get_hash(p2) those flows share same
> queue/bin forever as the hash pertubation will no longer work.
>
We still need hash perturbation for the mapping to a small number of
queues which can be done after retrieving skb_get_hash, but the
probability that two different flows match perfectly in skb_get_hash()
should be 1/2^32-- so are hash collisions really a concern here? Note
that we already lose information in the IPv6 address fold in
flow_dissect and don't include VLAN or VNID yet in the flow_keys yet,
so these probably already make a greater probability in a tuple
collision for sfc hash.

> For sfq, hash collisions may exist as well but they'll be resolved
> after some time when q->perturbation (its part of hash input) is reseeded.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Westphal March 1, 2015, 7:43 p.m. UTC | #8
Tom Herbert <therbert@google.com> wrote:
> We still need hash perturbation for the mapping to a small number of
> queues which can be done after retrieving skb_get_hash, but the
> probability that two different flows match perfectly in skb_get_hash()
> should be 1/2^32-- so are hash collisions really a concern here?

I'm not concerned about accidental collisions, how predictable is skb_get_hash()?
Is skb_get_hash() guaranteed to e.g. contain L4 information?

AFAIK answer to both is "depends on nic/driver", so I feel its better to use
software flow dissector.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Taht March 1, 2015, 7:57 p.m. UTC | #9
On Sun, Mar 1, 2015 at 10:24 AM, Florian Westphal <fw@strlen.de> wrote:
> Tom Herbert <therbert@google.com> wrote:
>> On Sat, Feb 28, 2015 at 12:31 PM, Florian Westphal <fw@strlen.de> wrote:
>> > I've been toying around with reducing skb->cb[] to 44 bytes,
>> > Seems Tom could integrate following patch from my test branch:
>> >
>> > http://git.breakpoint.cc/cgit/fw/net-next.git/commit/?h=skb_cb_44_01&id=29d711e1a71244b71940c2d1e346500bef4d6670
>> >
>> > It makes sfq use a smaller flow key state.
>>
>> Alternatively, I think we might be able to eliminate the use of
>> flow_keys and flow_dissect from the qdisc code altogether. It looks
>> like this is only being used to determine a hash over the addresses,
>> ports, and protocol so I am thinking that we can just call
>> skb_get_hash for that. Will try to post some patches soon.
>
> The problem with this is that you'll lose the secret input to jhash
> in sfq_hash().

fq_codel relies on a secret input to the hash also, but it is only
created at instantation time. Retaining that secret input is critical
- otherwise an attacker can fairly easily derive the hash and find
ways of flooding the connection at low rates that are almost as
effective as simple attacks against pfifo_fast.

A note on sfq: it is commonly deployed with perturb 10 - which acts as
a "poor man's AQM" scrambling big flows and causing tcp cwnd
reductions - or at least it used to before linux gained much better
resilience in re-ordering. Either behavior - blowing up tcp flows
(then) - or allowing them to overbuffer (now) is pretty undesirable.

I have gone in detail on everything that is wrong with common SFQ
deployments here, commenting extensively on what was once, one of the
most popular shapers in the world:

http://www.bufferbloat.net/projects/cerowrt/wiki/Wondershaper_Must_Die

> assuming you have packets p1 and p2 (from different flows)
> with skb_get_hash(p1) == skb_get_hash(p2) those flows share same
> queue/bin forever as the hash pertubation will no longer work.
>
> For sfq, hash collisions may exist as well but they'll be resolved
> after some time when q->perturbation (its part of hash input) is reseeded.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Taht March 1, 2015, 8:09 p.m. UTC | #10
On Sun, Mar 1, 2015 at 10:16 AM, Tom Herbert <therbert@google.com> wrote:
> On Sat, Feb 28, 2015 at 12:46 PM, Dave Taht <dave.taht@gmail.com> wrote:
>> On Sat, Feb 28, 2015 at 12:31 PM, Florian Westphal <fw@strlen.de> wrote:
>>> Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>>> On Fri, 2015-02-27 at 19:11 -0800, Tom Herbert wrote:
>>>>
>>>> > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>>>> > index c605d30..d41a034 100644
>>>> > --- a/include/net/sch_generic.h
>>>> > +++ b/include/net/sch_generic.h
>>>> > @@ -252,7 +252,7 @@ struct qdisc_skb_cb {
>>>> >     unsigned int            pkt_len;
>>>> >     u16                     slave_dev_queue_mapping;
>>>> >     u16                     _pad;
>>>> > -#define QDISC_CB_PRIV_LEN 20
>>>> > +#define QDISC_CB_PRIV_LEN 24
>>>> >     unsigned char           data[QDISC_CB_PRIV_LEN];
>>>> >  };
>>>> >
>>>>
>>>> This change breaks kernel build : We already are at the cb[] limit.
>>>>
>>>> Please check commit 257117862634d89de33fec74858b1a0ba5ab444b
>>>> ("net: sched: shrink struct qdisc_skb_cb to 28 bytes")
>>>
>>> I've been toying around with reducing skb->cb[] to 44 bytes,
>>> Seems Tom could integrate following patch from my test branch:
>>>
>>> http://git.breakpoint.cc/cgit/fw/net-next.git/commit/?h=skb_cb_44_01&id=29d711e1a71244b71940c2d1e346500bef4d6670
>>>
>>> It makes sfq use a smaller flow key state.
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>> My concern with all this work is that you are possibly not looking at
>> the quality of the hash
>> as the number of queues goes down, or the effects of adding in all
>> this extra stuff
>> to the hash is, in cases where they don't exist, or are not very random.
>>
>> The default, in fq_codel is 1024 queues, and that worked pretty good
>> in monty carlo simulations, but I have always felt it could be better
>> after we measured more real traffic - there is not a lot of
>> information in the proto field in real traffic, and - although it has
>> been improved - the ipv6 hash was kind of weak originally and a little
>> odd now.
>>
>> As some are attempting to deploy these hashes with 64, 32 and even 8
>> queues, I would hope that someone (and I can if I get the time) would
>> look closely at avalanche effects down to these last few bits.
>>
>> http://en.wikipedia.org/wiki/Avalanche_effect
>>
> We are only increasing the input to the hash function by XOR not
> reducing, so it seems unlikely this could result in less entropy. In
> worse case extra input would might have no effect. As for the
> avalanche effect that is more dependent on the hash function itself.
> In the kernel we are using Jenkin's hash for such things, and there's
> a nice graphical representation for the avalanche effect in the
> wikipedia page:
>
> http://en.wikipedia.org/wiki/Jenkins_hash_function

I did not say you were wrong! I just said you were making me nervous. :)

Hash functions are usually evaluated by tossing random data into them
and expecting random data all the way to the least significant bit.

In the networking case there is now a significant amount of data with
low entropy tossed into the function. I would be happier from a
theoretical perspective if you just tossed every input (like the full
ipv6 addresses) into the hash itself with no xor tricks and with some
care as to what low entropy sources were being used on the low order
bits.

As an example, you get 2 bits of data from the remote port truly mixed
in at 1024 queues, and yes, jenkins should avalanche that, but I
really would prefer it be evaluated on various forms of real traffic,
not random data.

And there might be other hash functions besides jenkins better or
faster now. Although I have an interest in such things, I generally
lack time to play with the way coool! new stuff like

http://www.burtleburtle.net/bob/hash/spooky.html

and:

https://code.google.com/p/smhasher/wiki/MurmurHash

Certainly it doesn't generally matter what hash is used, so long as it
is correctly responsive to its inputs, and fast.

>
>> --
>> Dave Täht
>> Let's make wifi fast, less jittery and reliable again!
>>
>> https://plus.google.com/u/0/107942175615993706558/posts/TVX3o84jjmb
Eric Dumazet March 1, 2015, 9:27 p.m. UTC | #11
On Sun, 2015-03-01 at 09:55 -0800, Tom Herbert wrote:

> Alternatively, I think we might be able to eliminate the use of
> flow_keys and flow_dissect from the qdisc code altogether. It looks
> like this is only being used to determine a hash over the addresses,
> ports, and protocol so I am thinking that we can just call
> skb_get_hash for that. Will try to post some patches soon.

Note that SFQ only stored the keys in case of a potential rehash,
and because skb->cb[] was available then.

As the rehash implies to _not_ use skb_get_hash() it is simply better to
recompute the flow key from packet, in the unlikely case we do have a
rehash.

Rehashing SFQ adds reorders, so people should think twice before using
it.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index dc8fd81..d498ee8 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -24,6 +24,7 @@  struct flow_keys {
 	};
 	u16	thoff;
 	__be16	n_proto;
+	__be32	hash_extra;
 	u8	ip_proto;
 };
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c605d30..d41a034 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -252,7 +252,7 @@  struct qdisc_skb_cb {
 	unsigned int		pkt_len;
 	u16			slave_dev_queue_mapping;
 	u16			_pad;
-#define QDISC_CB_PRIV_LEN 20
+#define QDISC_CB_PRIV_LEN 24
 	unsigned char		data[QDISC_CB_PRIV_LEN];
 };
 
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index f73a248..84bb794 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -286,7 +286,7 @@  static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
 		swap(keys->port16[0], keys->port16[1]);
 	}
 
-	extra = keys->ip_proto;
+	extra = keys->ip_proto ^ keys->hash_extra;
 	hash = __flow_hash_3words((__force u32)keys->dst ^ extra,
 				  (__force u32)keys->src,
 				  (__force u32)keys->ports);