Patchwork [net-next] ipv6: almost identical frag hashing funcs combined

login
register
mail settings
Submitter Ilpo Järvinen
Date Sept. 30, 2008, 10:57 p.m.
Message ID <Pine.LNX.4.64.0810010146001.27688@wrl-59.cs.helsinki.fi>
Download mbox | patch
Permalink /patch/2143/
State Accepted
Delegated to: David Miller
Headers show

Comments

Ilpo Järvinen - Sept. 30, 2008, 10:57 p.m.
$ diff-funcs ip6qhashfn reassembly.c netfilter/nf_conntrack_reasm.c
 --- reassembly.c:ip6qhashfn()
 +++ netfilter/nf_conntrack_reasm.c:ip6qhashfn()
@@ -1,5 +1,5 @@
-static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
-			       struct in6_addr *daddr)
+static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr,
+			       const struct in6_addr *daddr)
 {
 	u32 a, b, c;

@@ -9,7 +9,7 @@

 	a += JHASH_GOLDEN_RATIO;
 	b += JHASH_GOLDEN_RATIO;
-	c += ip6_frags.rnd;
+	c += nf_frags.rnd;
 	__jhash_mix(a, b, c);

 	a += (__force u32)saddr->s6_addr32[3];

And codiff xx.o.old xx.o.new:

net/ipv6/netfilter/nf_conntrack_reasm.c:
  ip6qhashfn         | -512
  nf_hashfn          |   +6
  nf_ct_frag6_gather |  +36
 3 functions changed, 42 bytes added, 512 bytes removed, diff: -470
net/ipv6/reassembly.c:
  ip6qhashfn    | -512
  ip6_hashfn    |   +7
  ipv6_frag_rcv |  +89
 3 functions changed, 96 bytes added, 512 bytes removed, diff: -416

net/ipv6/reassembly.c:
  inet6_hash_frag | +510
 1 function changed, 510 bytes added, diff: +510

Total: -376

Compile tested.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---

Hmm, here came across git email format's limitation as it's stupid enough 
to make it impossible to have --- starting lines in the message part 
(disallowing direct quoting from anything that looks like a diff), I fixed 
(and tested with git-am) this for you already by adding space before the 
must-not-be-found line prefixes :-).

 include/net/ipv6.h                      |    2 +
 net/ipv6/netfilter/nf_conntrack_reasm.c |   32 +-----------------------------
 net/ipv6/reassembly.c                   |   11 +++++----
 3 files changed, 10 insertions(+), 35 deletions(-)
Arnaldo Carvalho de Melo - Oct. 1, 2008, 1:18 a.m.
Em Wed, Oct 01, 2008 at 01:57:47AM +0300, Ilpo Järvinen escreveu:
> 
> $ diff-funcs ip6qhashfn reassembly.c netfilter/nf_conntrack_reasm.c
>  --- reassembly.c:ip6qhashfn()
>  +++ netfilter/nf_conntrack_reasm.c:ip6qhashfn()
> @@ -1,5 +1,5 @@
> -static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
> -			       struct in6_addr *daddr)
> +static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr,
> +			       const struct in6_addr *daddr)
>  {
>  	u32 a, b, c;
> 
> @@ -9,7 +9,7 @@
> 
>  	a += JHASH_GOLDEN_RATIO;
>  	b += JHASH_GOLDEN_RATIO;
> -	c += ip6_frags.rnd;
> +	c += nf_frags.rnd;
>  	__jhash_mix(a, b, c);
> 
>  	a += (__force u32)saddr->s6_addr32[3];
> 
> And codiff xx.o.old xx.o.new:
> 
> net/ipv6/netfilter/nf_conntrack_reasm.c:
>   ip6qhashfn         | -512
>   nf_hashfn          |   +6
>   nf_ct_frag6_gather |  +36
>  3 functions changed, 42 bytes added, 512 bytes removed, diff: -470
> net/ipv6/reassembly.c:
>   ip6qhashfn    | -512
>   ip6_hashfn    |   +7
>   ipv6_frag_rcv |  +89
>  3 functions changed, 96 bytes added, 512 bytes removed, diff: -416
> 
> net/ipv6/reassembly.c:
>   inet6_hash_frag | +510
>  1 function changed, 510 bytes added, diff: +510
> 
> Total: -376
> 
> Compile tested.
> 
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>

Good stuff, I wonder if you can spot possible candidades by sorting by
function size... Or perhaps by function signature... perhaps a new dwarf
that looks just at the parameter types, ordering by type name, and
reducing typedefs :-)

Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>

- Arnaldo
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller - Oct. 1, 2008, 9:54 a.m.
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Sep 2008 22:18:38 -0300

> Em Wed, Oct 01, 2008 at 01:57:47AM +0300, Ilpo Järvinen escreveu:
> > Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
> 
> Good stuff, I wonder if you can spot possible candidades by sorting by
> function size... Or perhaps by function signature... perhaps a new dwarf
> that looks just at the parameter types, ordering by type name, and
> reducing typedefs :-)

So many cool tool ideas, so little time :-)

> Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Patch applied, thanks everyone.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilpo Järvinen - Oct. 1, 2008, 11:02 a.m.
On Wed, 1 Oct 2008, David Miller wrote:

> From: Arnaldo Carvalho de Melo <acme@redhat.com>
> Date: Tue, 30 Sep 2008 22:18:38 -0300
> 
> > Em Wed, Oct 01, 2008 at 01:57:47AM +0300, Ilpo Järvinen escreveu:
> > > Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
> > 
> > Good stuff, I wonder if you can spot possible candidades by sorting by
> > function size... Or perhaps by function signature... perhaps a new dwarf
> > that looks just at the parameter types, ordering by type name, and
> > reducing typedefs :-)
> 
> So many cool tool ideas, so little time :-)

...And you haven't heard even a half of them... ;-)

Don't bother with dwarf, these can (mostly) be found on source level
quite easily. I've already experimented with real string processing method 
know as suffix tree (not that I know much about the actual algorithms, 
perhaps I should know better as Ukkonen can even be seen around here :-)). 
Of course there might be some other kind of things dwarves with suffix 
tree could find but it seems either too big or unsuitable hammer to this 
particular task.

Analysis is run in libstree (I didn't find any other lib which would have 
been even nearly usable). Its author announces that there are some bugs 
but it is quite usable still and in these kind of problems you don't 
necessary look for the definitive answer somebody else might be interested 
in but just look for needles from a haystack, and there will be plenty of 
them :-), so far I've seen some garbage occassionally.

I preprocess with large line length by indent to get rid of newlines and 
varying block bracing, the small libstree front-end of mine just gets rid 
of all syntax garbage known as white-space to avoid indentation spoiling 
the party...

I've not yet figured out how to efficiently handle (filter) the results
though, so that requires quite much of a human labor currently. Libstree
basically gives me all matches for a common sequence for given length,
and that doesn't give all that nice results if you give a length which is
smaller than the largest possible for a common block. So I can only find 
an unique result in case of the longest match, but then it might still not 
be the most obvious candidate (e.g., copy pasted include list which is 
hardly interesting, though I might one day figure out a tool to trim 
that down but that's still awaiting its eureka :-)).

Ultimately I'd want to combine sparse and libstree so that I can bind
types and variables within structurally similar entities. I haven't
read much sparse yet but I suspect I need to stuff something into it to 
distinguish #define'd content from other things to avoid generating
zillions of spurious matches we already "know of" :-). This would allow 
better handling for fuzziness in the middle, libstree provides something 
for it (I haven't even tried that though) but in general I wouldn't mind 
having something more powerful in expressing something syntax-wise (though 
doing the actual expressing would probably require inventing yal or some 
userspace c which is sort of frowned upon thing ;-)). With yal, its power 
migh start to resemble coccaine (or whatever it was named), that closed 
source tool they use to find those nearly pointless cannot be null here 
removal patches. I can imagine my tool being able to archive almost as 
pointless things as that :-).

Some examples what the trivial searcher has found already... try:

@net-next:
a4356b2920fd4861dd6c75f558749fa5c38a00e8
2cf46637b501794d7fe9e365f0a3046f5d1f5dfb

cbe2d128a01315fb4bd55b96cf8b963f5df28ea2 was found by other means though
(also this jhash_mix one was found "by other means", ie., I was evaluating   
the jhash libification things).

Then, diff -u net/ipv4/ipip.c net/ipv6/sit.c :-) (and also 
net/ipv4/ip_gre.c though that's already slightly more complicated what 
those other two are in many places, but still has lot of share code 
opportunities). Also, there are some similarities on how struct flowi is 
constructed (it's appearence may easily fool one to think it as a cheap 
operation but the initilizers don't look all that simple).

So far I've only played on tcp and net playground, I don't yet dare to try 
drivers/net as that would make me rather unhappy while going through all 
those endless file copy results :-).

I'm also thinking of some kind of auto error path combiner to convert
these automatically to goto form (though I'm not sure if that helps
binary size as gcc _might_ be wise enough here):

if (err)
	revert...;
	return;
...
if (err) {
	revertmore...;
	return;   
}
...
if (err) {
	revert...;
	revertmore...;
	revertmoreandmore...;
	return;
}  
...etc.

...finding those is a super trivial operation with a preprocessed source 
and a suffix tree (I'm not any expert of it but that's quite easy to see 
already when one gets hold of the basics) :-). Much harder problem is to 
do the actual code modification automatically since our semi-random style 
constructs here and there easily fool non-trivial scripts (probably 
sparse with source file offsets might help). This is something where a 
uniform style would make things thousand times easier but that's of course 
just utopia...

Btw, does somebody know a tool which can "diff" --side-by-side like any 
number of inputs? I'd be interested...
Brian Haley - Oct. 1, 2008, 2:18 p.m.
Ilpo Järvinen wrote:
> Btw, does somebody know a tool which can "diff" --side-by-side like any 
> number of inputs? I'd be interested...

vimdiff/gvimdiff

-Brian
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 113028f..dfa7ae3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -576,6 +576,8 @@  extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
 extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 			 struct group_filter __user *optval,
 			 int __user *optlen);
+extern unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+				    const struct in6_addr *daddr, u32 rnd);
 
 #ifdef CONFIG_PROC_FS
 extern int  ac6_proc_init(struct net *net);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 52d06dd..9967ac7 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -27,7 +27,6 @@ 
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/random.h>
-#include <linux/jhash.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -103,39 +102,12 @@  struct ctl_table nf_ct_ipv6_sysctl_table[] = {
 };
 #endif
 
-static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr,
-			       const struct in6_addr *daddr)
-{
-	u32 a, b, c;
-
-	a = (__force u32)saddr->s6_addr32[0];
-	b = (__force u32)saddr->s6_addr32[1];
-	c = (__force u32)saddr->s6_addr32[2];
-
-	a += JHASH_GOLDEN_RATIO;
-	b += JHASH_GOLDEN_RATIO;
-	c += nf_frags.rnd;
-	__jhash_mix(a, b, c);
-
-	a += (__force u32)saddr->s6_addr32[3];
-	b += (__force u32)daddr->s6_addr32[0];
-	c += (__force u32)daddr->s6_addr32[1];
-	__jhash_mix(a, b, c);
-
-	a += (__force u32)daddr->s6_addr32[2];
-	b += (__force u32)daddr->s6_addr32[3];
-	c += (__force u32)id;
-	__jhash_mix(a, b, c);
-
-	return c & (INETFRAGS_HASHSZ - 1);
-}
-
 static unsigned int nf_hashfn(struct inet_frag_queue *q)
 {
 	const struct nf_ct_frag6_queue *nq;
 
 	nq = container_of(q, struct nf_ct_frag6_queue, q);
-	return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr);
+	return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
 }
 
 static void nf_skb_free(struct sk_buff *skb)
@@ -209,7 +181,7 @@  fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
 	arg.dst = dst;
 
 	read_lock_bh(&nf_frags.lock);
-	hash = ip6qhashfn(id, src, dst);
+	hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
 
 	q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
 	local_bh_enable();
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 89184b5..2eeadfa 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -99,8 +99,8 @@  static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
  * callers should be careful not to use the hash value outside the ipfrag_lock
  * as doing so could race with ipfrag_hash_rnd being recalculated.
  */
-static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
-			       struct in6_addr *daddr)
+unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+			     const struct in6_addr *daddr, u32 rnd)
 {
 	u32 a, b, c;
 
@@ -110,7 +110,7 @@  static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 
 	a += JHASH_GOLDEN_RATIO;
 	b += JHASH_GOLDEN_RATIO;
-	c += ip6_frags.rnd;
+	c += rnd;
 	__jhash_mix(a, b, c);
 
 	a += (__force u32)saddr->s6_addr32[3];
@@ -125,13 +125,14 @@  static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 
 	return c & (INETFRAGS_HASHSZ - 1);
 }
+EXPORT_SYMBOL_GPL(inet6_hash_frag);
 
 static unsigned int ip6_hashfn(struct inet_frag_queue *q)
 {
 	struct frag_queue *fq;
 
 	fq = container_of(q, struct frag_queue, q);
-	return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
+	return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd);
 }
 
 int ip6_frag_match(struct inet_frag_queue *q, void *a)
@@ -247,7 +248,7 @@  fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
 	arg.dst = dst;
 
 	read_lock(&ip6_frags.lock);
-	hash = ip6qhashfn(id, src, dst);
+	hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
 
 	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
 	if (q == NULL)