diff mbox series

[nf-next] netfilter: replace modulo operation with bitwise AND

Message ID 1551093361-2150-1-git-send-email-lirongqing@baidu.com
State Changes Requested
Delegated to: Pablo Neira
Headers show
Series [nf-next] netfilter: replace modulo operation with bitwise AND | expand

Commit Message

Li RongQing Feb. 25, 2019, 11:16 a.m. UTC
CONNTRACK_LOCKS is 1024 and power of 2, so modulo operations
can be replaced with AND (CONNTRACK_LOCKS - 1)

and bitwise AND operation is quicker than module operation

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 include/net/netfilter/nf_conntrack_core.h |  4 +++-
 net/netfilter/nf_conntrack_core.c         | 10 +++++-----
 net/netfilter/nf_conntrack_netlink.c      |  2 +-
 net/netfilter/nf_nat_core.c               |  6 +++---
 4 files changed, 12 insertions(+), 10 deletions(-)

Comments

Florian Westphal Feb. 25, 2019, 11:26 a.m. UTC | #1
Li RongQing <lirongqing@baidu.com> wrote:
> CONNTRACK_LOCKS is 1024 and power of 2, so modulo operations
> can be replaced with AND (CONNTRACK_LOCKS - 1)
> 
> and bitwise AND operation is quicker than module operation

Uh.  What kind of compiler doesn't figure that out?!

I would prefer to keep it as-is and let compiler do the optimization.
Li RongQing Feb. 25, 2019, 11:35 a.m. UTC | #2
> -----邮件原件-----
> 发件人: Florian Westphal [mailto:fw@strlen.de]
> 发送时间: 2019年2月25日 19:27
> 收件人: Li,Rongqing <lirongqing@baidu.com>
> 抄送: netfilter-devel@vger.kernel.org
> 主题: Re: [PATCH][nf-next] netfilter: replace modulo operation with bitwise
> AND
> 
> Li RongQing <lirongqing@baidu.com> wrote:
> > CONNTRACK_LOCKS is 1024 and power of 2, so modulo operations can be
> > replaced with AND (CONNTRACK_LOCKS - 1)
> >
> > and bitwise AND operation is quicker than module operation
> 
> Uh.  What kind of compiler doesn't figure that out?!
> 
> I would prefer to keep it as-is and let compiler do the optimization.


gcc version 7.3.0 (GCC)


main()
{
    int i=1000000000;

         i= i % 1024;

        return i;
}

00000000004004a7 <main>:
  4004a7:       55                      push   %rbp
  4004a8:       48 89 e5                mov    %rsp,%rbp
  4004ab:       c7 45 fc 00 ca 9a 3b    movl   $0x3b9aca00,-0x4(%rbp)
  4004b2:       8b 45 fc                mov    -0x4(%rbp),%eax
  4004b5:       99                      cltd
  4004b6:       c1 ea 16                shr    $0x16,%edx
  4004b9:       01 d0                   add    %edx,%eax
  4004bb:       25 ff 03 00 00          and    $0x3ff,%eax
  4004c0:       29 d0                   sub    %edx,%eax
  4004c2:       89 45 fc                mov    %eax,-0x4(%rbp)
  4004c5:       8b 45 fc                mov    -0x4(%rbp),%eax
  4004c8:       5d                      pop    %rbp
  4004c9:       c3                      retq
  4004ca:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

=================================
main()
{
    int i=1000000000;

         i= i & (1024-1);

        return i;
}

00000000004004a7 <main>:
  4004a7:       55                      push   %rbp
  4004a8:       48 89 e5                mov    %rsp,%rbp
  4004ab:       c7 45 fc 00 ca 9a 3b    movl   $0x3b9aca00,-0x4(%rbp)
  4004b2:       81 65 fc ff 03 00 00    andl   $0x3ff,-0x4(%rbp)
  4004b9:       8b 45 fc                mov    -0x4(%rbp),%eax
  4004bc:       5d                      pop    %rbp
  4004bd:       c3                      retq
  4004be:       66 90                   xchg   %ax,%ax


Similar patch:

commit 1a1d74d378b13ad3f93e8975a0ade0980a49d28b
Author: Jakub Kicinski <jakub.kicinski@netronome.com>
Date:   Mon Oct 31 20:43:17 2016 +0000

    nfp: use AND instead of modulo to get ring indexes


thanks

-RongQing
Florian Westphal Feb. 25, 2019, 11:54 a.m. UTC | #3
Li,Rongqing <lirongqing@baidu.com> wrote:
> 
> 
> > -----邮件原件-----
> > 发件人: Florian Westphal [mailto:fw@strlen.de]
> > 发送时间: 2019年2月25日 19:27
> > 收件人: Li,Rongqing <lirongqing@baidu.com>
> > 抄送: netfilter-devel@vger.kernel.org
> > 主题: Re: [PATCH][nf-next] netfilter: replace modulo operation with bitwise
> > AND
> > 
> > Li RongQing <lirongqing@baidu.com> wrote:
> > > CONNTRACK_LOCKS is 1024 and power of 2, so modulo operations can be
> > > replaced with AND (CONNTRACK_LOCKS - 1)
> > >
> > > and bitwise AND operation is quicker than module operation
> > 
> > Uh.  What kind of compiler doesn't figure that out?!
> > 
> > I would prefer to keep it as-is and let compiler do the optimization.
> 
> 
> gcc version 7.3.0 (GCC)
> 
> 
> main()
> {
>     int i=1000000000;
> 
>          i= i % 1024;
> 
>         return i;
> }
> 
> 00000000004004a7 <main>:
>   4004a7:       55                      push   %rbp
>   4004a8:       48 89 e5                mov    %rsp,%rbp
>   4004ab:       c7 45 fc 00 ca 9a 3b    movl   $0x3b9aca00,-0x4(%rbp)
>   4004b2:       8b 45 fc                mov    -0x4(%rbp),%eax
>   4004b5:       99                      cltd
>   4004b6:       c1 ea 16                shr    $0x16,%edx
>   4004b9:       01 d0                   add    %edx,%eax
>   4004bb:       25 ff 03 00 00          and    $0x3ff,%eax

& 1023.

With -O2 gcc emits same code regardless of % 1024 or & 1023.

> Similar patch:
> 
> commit 1a1d74d378b13ad3f93e8975a0ade0980a49d28b
> Author: Jakub Kicinski <jakub.kicinski@netronome.com>
> Date:   Mon Oct 31 20:43:17 2016 +0000
> 
>     nfp: use AND instead of modulo to get ring indexes

But in that case the value isn't known at compile time.

gcc can't know that '= reg1 % reg2' can be rewritten as 'reg1 & (reg2 - 1)'

But it can do it if reg2 is a known constant (and a power of 2).
Li RongQing Feb. 26, 2019, 1:15 a.m. UTC | #4
> -----邮件原件-----
> 发件人: Florian Westphal [mailto:fw@strlen.de]
> 发送时间: 2019年2月25日 19:54
> 收件人: Li,Rongqing <lirongqing@baidu.com>
> 抄送: Florian Westphal <fw@strlen.de>; netfilter-devel@vger.kernel.org
> 主题: Re: 答复: [PATCH][nf-next] netfilter: replace modulo operation with
> bitwise AND
> 
> Li,Rongqing <lirongqing@baidu.com> wrote:
> >
> >
> > > -----邮件原件-----
> > > 发件人: Florian Westphal [mailto:fw@strlen.de]
> > > 发送时间: 2019年2月25日 19:27
> > > 收件人: Li,Rongqing <lirongqing@baidu.com>
> > > 抄送: netfilter-devel@vger.kernel.org
> > > 主题: Re: [PATCH][nf-next] netfilter: replace modulo operation with
> > > bitwise AND
> > >
> > > Li RongQing <lirongqing@baidu.com> wrote:
> > > > CONNTRACK_LOCKS is 1024 and power of 2, so modulo operations can
> > > > be replaced with AND (CONNTRACK_LOCKS - 1)
> > > >
> > > > and bitwise AND operation is quicker than module operation
> > >
> > > Uh.  What kind of compiler doesn't figure that out?!
> > >
> > > I would prefer to keep it as-is and let compiler do the optimization.
> >
> >
> > gcc version 7.3.0 (GCC)
> >
> >
> > main()
> > {
> >     int i=1000000000;
> >
> >          i= i % 1024;
> >
> >         return i;
> > }
> >
> > 00000000004004a7 <main>:
> >   4004a7:       55                      push   %rbp
> >   4004a8:       48 89 e5                mov    %rsp,%rbp
> >   4004ab:       c7 45 fc 00 ca 9a 3b    movl   $0x3b9aca00,-0x4(%rbp)
> >   4004b2:       8b 45 fc                mov    -0x4(%rbp),%eax
> >   4004b5:       99                      cltd
> >   4004b6:       c1 ea 16                shr    $0x16,%edx
> >   4004b9:       01 d0                   add    %edx,%eax
> >   4004bb:       25 ff 03 00 00          and    $0x3ff,%eax
> 
> & 1023.
> 
> With -O2 gcc emits same code regardless of % 1024 or & 1023.
> 
> > Similar patch:
> >
> > commit 1a1d74d378b13ad3f93e8975a0ade0980a49d28b
> > Author: Jakub Kicinski <jakub.kicinski@netronome.com>
> > Date:   Mon Oct 31 20:43:17 2016 +0000
> >
> >     nfp: use AND instead of modulo to get ring indexes
> 
> But in that case the value isn't known at compile time.
> 
> gcc can't know that '= reg1 % reg2' can be rewritten as 'reg1 & (reg2 - 1)'
> 
> But it can do it if reg2 is a known constant (and a power of 2).


Thanks, You are right

I think this patches maybe doing two thing:

1. make codes not depend on compiler optimization
2. remind to not change CONNTRACK_LOCKS to a value which is not power of 2

-RongQing
Florian Westphal Feb. 26, 2019, 6:31 a.m. UTC | #5
Li,Rongqing <lirongqing@baidu.com> wrote:
> I think this patches maybe doing two thing:
> 
> 1. make codes not depend on compiler optimization
> 2. remind to not change CONNTRACK_LOCKS to a value which is not power of 2

You could add a BUILD_BUG_ON_NOT_POWER_OF_2() check for that.
Li RongQing Feb. 26, 2019, 9:20 a.m. UTC | #6
> -----邮件原件-----
> 发件人: Florian Westphal [mailto:fw@strlen.de]
> 发送时间: 2019年2月26日 14:32
> 收件人: Li,Rongqing <lirongqing@baidu.com>
> 抄送: Florian Westphal <fw@strlen.de>; netfilter-devel@vger.kernel.org
> 主题: Re: 答复: 答复: [PATCH][nf-next] netfilter: replace modulo operation
> with bitwise AND
> 
> Li,Rongqing <lirongqing@baidu.com> wrote:
> > I think this patches maybe doing two thing:
> >
> > 1. make codes not depend on compiler optimization 2. remind to not
> > change CONNTRACK_LOCKS to a value which is not power of 2
> 
> You could add a BUILD_BUG_ON_NOT_POWER_OF_2() check for that.


OK, please drop this patch, I will send a new one based on your advice

-RongQing
diff mbox series

Patch

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index ae41e92251dd..1b75a141c63d 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -67,7 +67,9 @@  static inline int nf_conntrack_confirm(struct sk_buff *skb)
 void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
 		 const struct nf_conntrack_l4proto *proto);
 
-#define CONNTRACK_LOCKS 1024
+#define CONNTRACK_LOCKS_BIT 10
+#define CONNTRACK_LOCKS  (1 << CONNTRACK_LOCKS_BIT)
+#define CONNTRACK_LOCKS_MASK (CONNTRACK_LOCKS - 1)
 
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
 void nf_conntrack_lock(spinlock_t *lock);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e139c256e269..a0feb530ed00 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -116,8 +116,8 @@  EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 
 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
 {
-	h1 %= CONNTRACK_LOCKS;
-	h2 %= CONNTRACK_LOCKS;
+	h1 &= CONNTRACK_LOCKS_MASK;
+	h2 &= CONNTRACK_LOCKS_MASK;
 	spin_unlock(&nf_conntrack_locks[h1]);
 	if (h1 != h2)
 		spin_unlock(&nf_conntrack_locks[h2]);
@@ -127,8 +127,8 @@  static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
 static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
 				     unsigned int h2, unsigned int sequence)
 {
-	h1 %= CONNTRACK_LOCKS;
-	h2 %= CONNTRACK_LOCKS;
+	h1 &= CONNTRACK_LOCKS_MASK;
+	h2 &= CONNTRACK_LOCKS_MASK;
 	if (h1 <= h2) {
 		nf_conntrack_lock(&nf_conntrack_locks[h1]);
 		if (h1 != h2)
@@ -1971,7 +1971,7 @@  get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
 	spinlock_t *lockp;
 
 	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
-		lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
+		lockp = &nf_conntrack_locks[*bucket & CONNTRACK_LOCKS_MASK];
 		local_bh_disable();
 		nf_conntrack_lock(lockp);
 		if (*bucket < nf_conntrack_htable_size) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 349b42a65c8a..8f22743c270f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -918,7 +918,7 @@  ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 			nf_ct_put(nf_ct_evict[i]);
 		}
 
-		lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
+		lockp = &nf_conntrack_locks[cb->args[0] & CONNTRACK_LOCKS_MASK];
 		nf_conntrack_lock(lockp);
 		if (cb->args[0] >= nf_conntrack_htable_size) {
 			spin_unlock(lockp);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 35e61038ae96..2b1d377bf3ab 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -588,7 +588,7 @@  nf_nat_setup_info(struct nf_conn *ct,
 
 		srchash = hash_by_src(net,
 				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-		lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
+		lock = &nf_nat_locks[srchash & CONNTRACK_LOCKS_MASK];
 		spin_lock_bh(lock);
 		hlist_add_head_rcu(&ct->nat_bysource,
 				   &nf_nat_bysource[srchash]);
@@ -773,9 +773,9 @@  static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
 	unsigned int h;
 
 	h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
+	spin_lock_bh(&nf_nat_locks[h & CONNTRACK_LOCKS_MASK]);
 	hlist_del_rcu(&ct->nat_bysource);
-	spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
+	spin_unlock_bh(&nf_nat_locks[h & CONNTRACK_LOCKS_MASK]);
 }
 
 static int nf_nat_proto_clean(struct nf_conn *ct, void *data)