Message ID | 20150309095833.GA4028@gondor.apana.org.au |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
On 03/09/2015 10:58 AM, Herbert Xu wrote: > Currently hash_rnd is a parameter that users can set. However, > no existing users set this parameter. It is also something that > people are unlikely to want to set directly since it's just a > random number. I believe the original purpose was to have reproduceability, but yeah, there are no users of it currently. > In preparation for allowing the reseeding/rehashing of rhashtable, > this patch moves hash_rnd into bucket_table so that it's now an > internal state rather than a parameter. > > Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> > > diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h > index d438eeb..5ef8ea5 100644 > --- a/include/linux/rhashtable.h > +++ b/include/linux/rhashtable.h > @@ -49,12 +49,14 @@ struct rhash_head { > /** > * struct bucket_table - Table of hash buckets > * @size: Number of hash buckets > + * @hash_rnd: Random seed to fold into hash > * @locks_mask: Mask to apply before accessing locks[] > * @locks: Array of spinlocks protecting individual buckets > * @buckets: size * hash buckets > */ > struct bucket_table { > size_t size; > + u32 hash_rnd; > unsigned int locks_mask; > spinlock_t *locks; > > @@ -72,7 +74,6 @@ struct rhashtable; > * @key_len: Length of key > * @key_offset: Offset of key in struct to be hashed > * @head_offset: Offset of rhash_head in struct to be hashed > - * @hash_rnd: Seed to use while hashing > * @max_shift: Maximum number of shifts while expanding > * @min_shift: Minimum number of shifts while shrinking > * @nulls_base: Base value to generate nulls marker > @@ -85,7 +86,6 @@ struct rhashtable_params { > size_t key_len; > size_t key_offset; > size_t head_offset; > - u32 hash_rnd; > size_t max_shift; > size_t min_shift; > u32 nulls_base; > diff --git a/lib/rhashtable.c b/lib/rhashtable.c > index b5344ef..4ce267f 100644 > --- a/lib/rhashtable.c > +++ b/lib/rhashtable.c > @@ -66,25 +66,28 @@ static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash) > return hash & (tbl->size - 1); > } > > -static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr) > +static u32 obj_raw_hashfn(struct rhashtable *ht, const void *ptr) > { > + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); const > u32 hash; > > if (unlikely(!ht->p.key_len)) > - hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); > + hash = ht->p.obj_hashfn(ptr, tbl->hash_rnd); > else > hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len, > - ht->p.hash_rnd); > + tbl->hash_rnd); > > return hash >> HASH_RESERVED_SPACE; > } > > static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len) > { > - return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE; > + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); > + const > + return ht->p.hashfn(key, len, tbl->hash_rnd) >> HASH_RESERVED_SPACE; > } > > -static u32 head_hashfn(const struct rhashtable *ht, > +static u32 head_hashfn(struct rhashtable *ht, > const struct bucket_table *tbl, > const struct rhash_head *he) > { > @@ -92,7 +95,7 @@ static u32 head_hashfn(const struct rhashtable *ht, > } > > #ifdef CONFIG_PROVE_LOCKING > -static void debug_dump_buckets(const struct rhashtable *ht, > +static void debug_dump_buckets(struct rhashtable *ht, > const struct bucket_table *tbl) > { > struct rhash_head *he; > @@ -1099,14 +1102,13 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) > if (tbl == NULL) > return -ENOMEM; > > + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); > + Have you tested this patch? ;-) If I see this correctly, the first time you shrink or expand, your hash_rnd from then onwards will constantly be 0. You need to copy over the above old hash_rnd to the new one from the newly allocated tbl. So, bucket_table_alloc() needs a change as well. Of course, with rehashing, the get_random_bytes() would directly move there. > atomic_set(&ht->nelems, 0); > atomic_set(&ht->shift, ilog2(tbl->size)); > RCU_INIT_POINTER(ht->tbl, tbl); > RCU_INIT_POINTER(ht->future_tbl, tbl); > > - if (!ht->p.hash_rnd) > - get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); > - > INIT_WORK(&ht->run_work, rht_deferred_worker); > > return 0; > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Herbert Xu <herbert@gondor.apana.org.au> Date: Mon, 9 Mar 2015 20:58:33 +1100 > Currently hash_rnd is a parameter that users can set. However, > no existing users set this parameter. It is also something that > people are unlikely to want to set directly since it's just a > random number. > > In preparation for allowing the reseeding/rehashing of rhashtable, > this patch moves hash_rnd into bucket_table so that it's now an > internal state rather than a parameter. > > Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Thomas is right, you're going to have to move the hash_rnd initialization to where we allocate the bucket tables, otherwise the first shrink/expand will set it to zero. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: David Miller <davem@redhat.com> Date: Mon, 09 Mar 2015 15:51:37 -0400 (EDT) > From: Herbert Xu <herbert@gondor.apana.org.au> > Date: Mon, 9 Mar 2015 20:58:33 +1100 > >> Currently hash_rnd is a parameter that users can set. However, >> no existing users set this parameter. It is also something that >> people are unlikely to want to set directly since it's just a >> random number. >> >> In preparation for allowing the reseeding/rehashing of rhashtable, >> this patch moves hash_rnd into bucket_table so that it's now an >> internal state rather than a parameter. >> >> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> > > Thomas is right, you're going to have to move the hash_rnd initialization > to where we allocate the bucket tables, otherwise the first shrink/expand > will set it to zero. My bad, Daniel pointed this out, not Thomas. Sorry about that. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Mar 09, 2015 at 05:26:52PM +0100, Daniel Borkmann wrote: > > I believe the original purpose was to have reproduceability, but > yeah, there are no users of it currently. If anyone ever needed that we could always provide a way to set the random value, through a rehash. > Have you tested this patch? ;-) Clearly not :) > If I see this correctly, the first time you shrink or expand, your hash_rnd > from then onwards will constantly be 0. > > You need to copy over the above old hash_rnd to the new one from the newly > allocated tbl. So, bucket_table_alloc() needs a change as well. Of course, > with rehashing, the get_random_bytes() would directly move there. Yes it should copy them. I've fixed in my tree now and it actually works! Cheers,
Hi Dave: These two patches implement arbitrary rehashing in rhashtable. The idea is based on the hashtable in net/bridge/br_multicast.c plus the suggestion by Josh Triplett to avoid using two linked lists. I have tested it using netlink but obviously more testing especially using netfilter would be welcome. After this I would like to explore the idea of allowing multiple rehashes in parallel. This would allow us to not have to rehash synchronously when the table gets too big and the previous rehash hasn't finished. Cheers,
On Tue, Mar 10, 2015 at 09:26:31AM +1100, Herbert Xu wrote: > Hi Dave: > > These two patches implement arbitrary rehashing in rhashtable. The > idea is based on the hashtable in net/bridge/br_multicast.c plus > the suggestion by Josh Triplett to avoid using two linked lists. Awesome. > I have tested it using netlink but obviously more testing especially > using netfilter would be welcome. > > After this I would like to explore the idea of allowing multiple > rehashes in parallel. This would allow us to not have to rehash > synchronously when the table gets too big and the previous rehash > hasn't finished. Aieee. - Josh Triplett -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/10/15 at 09:26am, Herbert Xu wrote: > After this I would like to explore the idea of allowing multiple > rehashes in parallel. This would allow us to not have to rehash > synchronously when the table gets too big and the previous rehash > hasn't finished. Another interesting idea was to allow growing faster than 2x. It actually became easier with this generic rehashing code because the number of bucket locks can grow quicker than 2x again. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/10/15 at 09:26am, Herbert Xu wrote: > I have tested it using netlink but obviously more testing especially > using netfilter would be welcome. I ran the attached bind_netlink test written by Ying Xue and it currently crashes with: [ 108.693908] BUG: unable to handle kernel paging request at fffffffffffffe68 I'm running bind_netlink like this: while true; do ./bind_netlink 9000 324234324& ./bind_netlink 9000 888448422& ./bind_netlink 9000 324 done I tested with the synchronize_rcu() fix on top as well and that didn't help so it must be something else. #include <sys/types.h> #include <sys/socket.h> #include <stdlib.h> #include <stdio.h> #include <assert.h> #include <string.h> #include <errno.h> #include <unistd.h> #include <netinet/in.h> #include <linux/netlink.h> void diep(char *err) { perror(err); exit(1); } int main(int argc, char *argv[]) { int i; int *fds; int num_ports; struct sockaddr_nl local; srand(strtoul(argv[2], NULL, 0)); if (argc < 2) { fprintf(stderr, "Usage: %s <PORTS>\n", argv[0]); exit(1); } num_ports = atoi(argv[optind]); printf("Create %u ports\n", num_ports); fds = malloc(sizeof(int) * num_ports); for (i = 1; i <= num_ports; i++) { if (!(fds[i] = socket(PF_NETLINK, SOCK_RAW, 0))) diep("socket"); memset(&local, 0, sizeof(local)); local.nl_family = AF_NETLINK; local.nl_pid = rand(); local.nl_groups = 0; if(bind(fds[i], (struct sockaddr *)&local, sizeof(local)) != 0){ diep("socket"); } if (!(i % 1000)) printf("Created %u ports\n", i); } printf("Ports successfully created, terminating\n"); return 0; }
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d438eeb..5ef8ea5 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,12 +49,14 @@ struct rhash_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @buckets: size * hash buckets */ struct bucket_table { size_t size; + u32 hash_rnd; unsigned int locks_mask; spinlock_t *locks; @@ -72,7 +74,6 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed - * @hash_rnd: Seed to use while hashing * @max_shift: Maximum number of shifts while expanding * @min_shift: Minimum number of shifts while shrinking * @nulls_base: Base value to generate nulls marker @@ -85,7 +86,6 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; - u32 hash_rnd; size_t max_shift; size_t min_shift; u32 nulls_base; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b5344ef..4ce267f 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -66,25 +66,28 @@ static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash) return hash & (tbl->size - 1); } -static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr) +static u32 obj_raw_hashfn(struct rhashtable *ht, const void *ptr) { + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); u32 hash; if (unlikely(!ht->p.key_len)) - hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); + hash = ht->p.obj_hashfn(ptr, tbl->hash_rnd); else hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len, - ht->p.hash_rnd); + tbl->hash_rnd); return hash >> HASH_RESERVED_SPACE; } static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len) { - return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE; + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + + return ht->p.hashfn(key, len, tbl->hash_rnd) >> HASH_RESERVED_SPACE; } -static u32 head_hashfn(const struct rhashtable *ht, +static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he) { @@ -92,7 +95,7 @@ static u32 head_hashfn(const struct rhashtable *ht, } #ifdef CONFIG_PROVE_LOCKING -static void debug_dump_buckets(const struct rhashtable *ht, +static void debug_dump_buckets(struct rhashtable *ht, const struct bucket_table *tbl) { struct rhash_head *he; @@ -1099,14 +1102,13 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) if (tbl == NULL) return -ENOMEM; + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + atomic_set(&ht->nelems, 0); atomic_set(&ht->shift, ilog2(tbl->size)); RCU_INIT_POINTER(ht->tbl, tbl); RCU_INIT_POINTER(ht->future_tbl, tbl); - if (!ht->p.hash_rnd) - get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); - INIT_WORK(&ht->run_work, rht_deferred_worker); return 0;
Currently hash_rnd is a parameter that users can set. However, no existing users set this parameter. It is also something that people are unlikely to want to set directly since it's just a random number. In preparation for allowing the reseeding/rehashing of rhashtable, this patch moves hash_rnd into bucket_table so that it's now an internal state rather than a parameter. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>