diff mbox

net: code cleanups

Message ID 1285813497-7384-1-git-send-email-xiaosuo@gmail.com
State Rejected, archived
Delegated to: David Miller
Headers show

Commit Message

Changli Gao Sept. 30, 2010, 2:24 a.m. UTC
Compare operations are more readable, and compilers generate the same code
for the both.

Use the macros fl4_* to shrink the length of the lines.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
 net/ipv4/af_inet.c |    7 +++----
 net/ipv4/route.c   |   27 ++++++++++++---------------
 2 files changed, 15 insertions(+), 19 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Joe Perches Sept. 30, 2010, 2:49 a.m. UTC | #1
On Thu, 2010-09-30 at 10:24 +0800, Changli Gao wrote:
> Compare operations are more readable, and compilers generate the same code
> for the both.

As far as I know, not all supported versions of gcc
generate the same code.

Also, you could probably now remove the (__force u32) casts.

> Use the macros fl4_* to shrink the length of the lines.
> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
>  net/ipv4/af_inet.c |    7 +++----
>  net/ipv4/route.c   |   27 ++++++++++++---------------
>  2 files changed, 15 insertions(+), 19 deletions(-)
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index f581f77..ef26640 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1338,10 +1338,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
>  
>  		iph2 = ip_hdr(p);
>  
> -		if ((iph->protocol ^ iph2->protocol) |
> -		    (iph->tos ^ iph2->tos) |
> -		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
> -		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
> +		if (iph->protocol != iph2->protocol || iph->tos != iph2->tos ||
> +		    (__force u32)iph->saddr != (__force u32)iph2->saddr ||
> +		    (__force u32)iph->daddr != (__force u32)iph2->daddr) {
>  			NAPI_GRO_CB(p)->same_flow = 0;
>  			continue;
>  		}
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 98beda4..6b00fde 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -683,19 +683,18 @@ static inline bool rt_caching(const struct net *net)
>  static inline bool compare_hash_inputs(const struct flowi *fl1,
>  					const struct flowi *fl2)
>  {
> -	return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
> -		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
> -		(fl1->iif ^ fl2->iif)) == 0);
> +	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
> +	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
> +	       fl1->iif == fl2->iif;
>  }
>  
>  static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
>  {
> -	return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
> -		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
> -		(fl1->mark ^ fl2->mark) |
> -		(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
> -		(fl1->oif ^ fl2->oif) |
> -		(fl1->iif ^ fl2->iif)) == 0;
> +	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
> +	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
> +	       fl1->mark == fl2->mark &&
> +	       *(u16 *)&fl1->fl4_tos == *(u16 *)&fl2->fl4_tos &&
> +	       fl1->oif == fl2->oif && fl1->iif == fl2->iif;
>  }
>  
>  static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
> @@ -2286,12 +2285,10 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
>  
>  	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
>  	     rth = rcu_dereference(rth->dst.rt_next)) {
> -		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
> -		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
> -		     (rth->fl.iif ^ iif) |
> -		     rth->fl.oif |
> -		     (rth->fl.fl4_tos ^ tos)) == 0 &&
> -		    rth->fl.mark == skb->mark &&
> +		if ((__force u32)rth->fl.fl4_dst == (__force u32)daddr &&
> +		    (__force u32)rth->fl.fl4_src == (__force u32)saddr &&
> +		    rth->fl.iif == iif && rth->fl.oif == 0 &&
> +		    rth->fl.fl4_tos == tos && rth->fl.mark == skb->mark &&
>  		    net_eq(dev_net(rth->dst.dev), net) &&
>  		    !rt_is_expired(rth)) {
>  			if (noref) {



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Changli Gao Sept. 30, 2010, 3:07 a.m. UTC | #2
On Thu, Sep 30, 2010 at 10:49 AM, Joe Perches <joe@perches.com> wrote:
> On Thu, 2010-09-30 at 10:24 +0800, Changli Gao wrote:
>> Compare operations are more readable, and compilers generate the same code
>> for the both.
>
> As far as I know, not all supported versions of gcc
> generate the same code.

Is the former better for the compilers?

>
> Also, you could probably now remove the (__force u32) casts.
>

Maybe Eric doesn't think so.
Joe Perches Sept. 30, 2010, 3:13 a.m. UTC | #3
On Thu, 2010-09-30 at 11:07 +0800, Changli Gao wrote:
> On Thu, Sep 30, 2010 at 10:49 AM, Joe Perches <joe@perches.com> wrote:
> > On Thu, 2010-09-30 at 10:24 +0800, Changli Gao wrote:
> >> Compare operations are more readable, and compilers generate the same code
> >> for the both.
> > As far as I know, not all supported versions of gcc
> > generate the same code.
> Is the former better for the compilers?

Yes.  I don't know how much it matters though.

> > Also, you could probably now remove the (__force u32) casts.
> Maybe Eric doesn't think so.

Comparisons of equal types don't need (__force u32) casts. 

They needed to be cast to u32 for the bitwise or's to avoid
compiler warnings.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Sept. 30, 2010, 5:16 a.m. UTC | #4
Le jeudi 30 septembre 2010 à 10:24 +0800, Changli Gao a écrit :
> Compare operations are more readable, and compilers generate the same code
> for the both.
> 

You have a buggy compiler then.

I know this code is ugly, but please keep it as is, dont add conditional
branches on hot paths.

Thanks

> Use the macros fl4_* to shrink the length of the lines.
> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
>  net/ipv4/af_inet.c |    7 +++----
>  net/ipv4/route.c   |   27 ++++++++++++---------------
>  2 files changed, 15 insertions(+), 19 deletions(-)
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index f581f77..ef26640 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1338,10 +1338,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
>  
>  		iph2 = ip_hdr(p);
>  
> -		if ((iph->protocol ^ iph2->protocol) |
> -		    (iph->tos ^ iph2->tos) |
> -		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
> -		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
> +		if (iph->protocol != iph2->protocol || iph->tos != iph2->tos ||
> +		    (__force u32)iph->saddr != (__force u32)iph2->saddr ||
> +		    (__force u32)iph->daddr != (__force u32)iph2->daddr) {
>  			NAPI_GRO_CB(p)->same_flow = 0;
>  			continue;
>  		}
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 98beda4..6b00fde 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -683,19 +683,18 @@ static inline bool rt_caching(const struct net *net)
>  static inline bool compare_hash_inputs(const struct flowi *fl1,
>  					const struct flowi *fl2)
>  {
> -	return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
> -		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
> -		(fl1->iif ^ fl2->iif)) == 0);
> +	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
> +	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
> +	       fl1->iif == fl2->iif;
>  }
>  
>  static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
>  {
> -	return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
> -		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
> -		(fl1->mark ^ fl2->mark) |
> -		(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
> -		(fl1->oif ^ fl2->oif) |
> -		(fl1->iif ^ fl2->iif)) == 0;
> +	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
> +	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
> +	       fl1->mark == fl2->mark &&
> +	       *(u16 *)&fl1->fl4_tos == *(u16 *)&fl2->fl4_tos &&
> +	       fl1->oif == fl2->oif && fl1->iif == fl2->iif;
>  }
>  
>  static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
> @@ -2286,12 +2285,10 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
>  
>  	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
>  	     rth = rcu_dereference(rth->dst.rt_next)) {
> -		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
> -		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
> -		     (rth->fl.iif ^ iif) |
> -		     rth->fl.oif |
> -		     (rth->fl.fl4_tos ^ tos)) == 0 &&
> -		    rth->fl.mark == skb->mark &&
> +		if ((__force u32)rth->fl.fl4_dst == (__force u32)daddr &&
> +		    (__force u32)rth->fl.fl4_src == (__force u32)saddr &&
> +		    rth->fl.iif == iif && rth->fl.oif == 0 &&
> +		    rth->fl.fl4_tos == tos && rth->fl.mark == skb->mark &&
>  		    net_eq(dev_net(rth->dst.dev), net) &&
>  		    !rt_is_expired(rth)) {
>  			if (noref) {
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Changli Gao Sept. 30, 2010, 6:09 a.m. UTC | #5
On Thu, Sep 30, 2010 at 1:16 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le jeudi 30 septembre 2010 à 10:24 +0800, Changli Gao a écrit :
>> Compare operations are more readable, and compilers generate the same code
>> for the both.
>>
>
> You have a buggy compiler then.

gcc version 4.4.3 (Gentoo 4.4.3-r2 p1.2)

             rth = rcu_dereference(rth->dst.rt_next)) {
                if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
                     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
                     (rth->fl.iif ^ iif) |
    2f12:       44 3b 80 dc 00 00 00    cmp    0xdc(%rax),%r8d
    2f19:       0f 85 a2 00 00 00       jne    2fc1 <ip_route_input_common+0x145
>
                     rth->fl.oif |
    2f1f:       83 b8 d8 00 00 00 00    cmpl   $0x0,0xd8(%rax)
    2f26:       0f 85 95 00 00 00       jne    2fc1 <ip_route_input_common+0x145
>
        tos &= IPTOS_RT_MASK;
        hash = rt_hash(daddr, saddr, iif, rt_genid(net));

        for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
             rth = rcu_dereference(rth->dst.rt_next)) {
                if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
    2f2c:       44 3b b8 e4 00 00 00    cmp    0xe4(%rax),%r15d
    2f33:       0f 85 88 00 00 00       jne    2fc1
<ip_route_input_common+0x145>
                     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
    2f39:       44 3b b0 e8 00 00 00    cmp    0xe8(%rax),%r14d
    2f40:       75 7f                   jne    2fc1
<ip_route_input_common+0x145>


>
> I know this code is ugly, but please keep it as is, dont add conditional
> branches on hot paths.
>

If the compiler doesn't generate conditional branches, we have to
touch every necessary field of all the cache entries in one hash
bucket. Is it better than condition branch? I think the compiler
developers know it better.

And the compiler reorders the conditional branches, is it expected?
Eric Dumazet Sept. 30, 2010, 6:31 a.m. UTC | #6
Le jeudi 30 septembre 2010 à 14:09 +0800, Changli Gao a écrit :
> On Thu, Sep 30, 2010 at 1:16 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Le jeudi 30 septembre 2010 à 10:24 +0800, Changli Gao a écrit :
> >> Compare operations are more readable, and compilers generate the same code
> >> for the both.
> >>
> >
> > You have a buggy compiler then.
> 
> gcc version 4.4.3 (Gentoo 4.4.3-r2 p1.2)
> 
>              rth = rcu_dereference(rth->dst.rt_next)) {
>                 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
>                      ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
>                      (rth->fl.iif ^ iif) |
>     2f12:       44 3b 80 dc 00 00 00    cmp    0xdc(%rax),%r8d
>     2f19:       0f 85 a2 00 00 00       jne    2fc1 <ip_route_input_common+0x145
> >
>                      rth->fl.oif |
>     2f1f:       83 b8 d8 00 00 00 00    cmpl   $0x0,0xd8(%rax)
>     2f26:       0f 85 95 00 00 00       jne    2fc1 <ip_route_input_common+0x145
> >
>         tos &= IPTOS_RT_MASK;
>         hash = rt_hash(daddr, saddr, iif, rt_genid(net));
> 
>         for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
>              rth = rcu_dereference(rth->dst.rt_next)) {
>                 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
>     2f2c:       44 3b b8 e4 00 00 00    cmp    0xe4(%rax),%r15d
>     2f33:       0f 85 88 00 00 00       jne    2fc1
> <ip_route_input_common+0x145>
>                      ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
>     2f39:       44 3b b0 e8 00 00 00    cmp    0xe8(%rax),%r14d
>     2f40:       75 7f                   jne    2fc1
> <ip_route_input_common+0x145>
> 
> 
> >
> > I know this code is ugly, but please keep it as is, dont add conditional
> > branches on hot paths.
> >
> 
> If the compiler doesn't generate conditional branches, we have to
> touch every necessary field of all the cache entries in one hash
> bucket. Is it better than condition branch? I think the compiler
> developers know it better.

Last famous words.

Are you aware of cache lines (64 bytes at least on typical cpus), and
that all fields are already in CPU L1 cache ? I (and others) worked hard
in the past.

> 
> And the compiler reorders the conditional branches, is it expected?
> 

Your compiler added conditional branches on a code not wanting them,
only because on _your_ cpu, these conditional branches might be cheap.

Now, try to compile for an i686 target and see the difference.

If there was no difference, your compiler would be _buggy_, because not
generating optimal assembly.

Here I get :

c141dda9:       8b 55 e8                mov    -0x18(%ebp),%edx
c141ddac:       8b 81 9c 00 00 00       mov    0x9c(%ecx),%eax
c141ddb2:       33 91 a0 00 00 00       xor    0xa0(%ecx),%edx
c141ddb8:       31 f0                   xor    %esi,%eax
c141ddba:       09 d0                   or     %edx,%eax
c141ddbc:       8b 55 e0                mov    -0x20(%ebp),%edx
c141ddbf:       33 91 94 00 00 00       xor    0x94(%ecx),%edx
c141ddc5:       09 d0                   or     %edx,%eax
c141ddc7:       0f b6 55 e7             movzbl -0x19(%ebp),%edx
c141ddcb:       0b 81 90 00 00 00       or     0x90(%ecx),%eax
c141ddd1:       32 91 a4 00 00 00       xor    0xa4(%ecx),%dl
c141ddd7:       0f b6 d2                movzbl %dl,%edx
c141ddda:       09 d0                   or     %edx,%eax
c141dddc:       0f 85 9d 00 00 00       jne    c141de7f <ip_route_input_common+0x1b4>

As you can see, only one conditional branch.

Your patch is not welcomed, thanks.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f581f77..ef26640 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1338,10 +1338,9 @@  static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 
 		iph2 = ip_hdr(p);
 
-		if ((iph->protocol ^ iph2->protocol) |
-		    (iph->tos ^ iph2->tos) |
-		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
-		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
+		if (iph->protocol != iph2->protocol || iph->tos != iph2->tos ||
+		    (__force u32)iph->saddr != (__force u32)iph2->saddr ||
+		    (__force u32)iph->daddr != (__force u32)iph2->daddr) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98beda4..6b00fde 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -683,19 +683,18 @@  static inline bool rt_caching(const struct net *net)
 static inline bool compare_hash_inputs(const struct flowi *fl1,
 					const struct flowi *fl2)
 {
-	return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
-		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
-		(fl1->iif ^ fl2->iif)) == 0);
+	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
+	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
+	       fl1->iif == fl2->iif;
 }
 
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 {
-	return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
-		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
-		(fl1->mark ^ fl2->mark) |
-		(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
-		(fl1->oif ^ fl2->oif) |
-		(fl1->iif ^ fl2->iif)) == 0;
+	return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst &&
+	       (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src &&
+	       fl1->mark == fl2->mark &&
+	       *(u16 *)&fl1->fl4_tos == *(u16 *)&fl2->fl4_tos &&
+	       fl1->oif == fl2->oif && fl1->iif == fl2->iif;
 }
 
 static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -2286,12 +2285,10 @@  int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
 	     rth = rcu_dereference(rth->dst.rt_next)) {
-		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
-		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
-		     (rth->fl.iif ^ iif) |
-		     rth->fl.oif |
-		     (rth->fl.fl4_tos ^ tos)) == 0 &&
-		    rth->fl.mark == skb->mark &&
+		if ((__force u32)rth->fl.fl4_dst == (__force u32)daddr &&
+		    (__force u32)rth->fl.fl4_src == (__force u32)saddr &&
+		    rth->fl.iif == iif && rth->fl.oif == 0 &&
+		    rth->fl.fl4_tos == tos && rth->fl.mark == skb->mark &&
 		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
 			if (noref) {