diff mbox

[net-next,11/12] ipvs: reorder keys in connection structure

Message ID 1362559342-18784-12-git-send-email-ja@ssi.bg
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Julian Anastasov March 6, 2013, 8:42 a.m. UTC
__ip_vs_conn_in_get and ip_vs_conn_out_get are
hot places. Optimize them, so that ports are matched first.
By moving net and fwmark below, on 32-bit arch we can fit
caddr in 32-byte cache line and all addresses in 64-byte
cache line.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
 include/net/ip_vs.h             |   12 ++++++------
 net/netfilter/ipvs/ip_vs_conn.c |    8 ++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

Comments

Hans Schillstrom March 6, 2013, 9:43 a.m. UTC | #1
Hi Julian
Great job you have done !
I'll test it immediate...


On Wed, 2013-03-06 at 10:42 +0200, Julian Anastasov wrote:
> 	__ip_vs_conn_in_get and ip_vs_conn_out_get are
> hot places. Optimize them, so that ports are matched first.
> By moving net and fwmark below, on 32-bit arch we can fit
> caddr in 32-byte cache line and all addresses in 64-byte
> cache line.

Earlier I made some rearrangements like the one you have made.
My conclusion at that time was that the best gain was to have
fwmark and net within the first 64 bytes, and move daddr to the next
cache line.

I uesd UDP at ~7Gbit/sec and 256k source address into a x86_64 machine,
and a 50/50 mix of fwmarks and port in that tests. 

I guess that you have made similar test, and even take
ip_vs_conn_out_get() into your calculations ?


Regards
Hans

> 
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> ---
>  include/net/ip_vs.h             |   12 ++++++------
>  net/netfilter/ipvs/ip_vs_conn.c |    8 ++++----
>  2 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index 9059360..2bc30e6 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -566,20 +566,19 @@ struct ip_vs_conn_param {
>   */
>  struct ip_vs_conn {
>  	struct hlist_node	c_list;         /* hashed list heads */
> -#ifdef CONFIG_NET_NS
> -	struct net              *net;           /* Name space */
> -#endif
>  	/* Protocol, addresses and port numbers */
> -	u16                     af;             /* address family */
>  	__be16                  cport;
> -	__be16                  vport;
>  	__be16                  dport;
> -	__u32                   fwmark;         /* Fire wall mark from skb */
> +	__be16                  vport;
> +	u16			af;		/* address family */
>  	union nf_inet_addr      caddr;          /* client address */
>  	union nf_inet_addr      vaddr;          /* virtual address */
>  	union nf_inet_addr      daddr;          /* destination address */
>  	volatile __u32          flags;          /* status flags */
>  	__u16                   protocol;       /* Which protocol (TCP/UDP) */
> +#ifdef CONFIG_NET_NS
> +	struct net              *net;           /* Name space */
> +#endif
>  
>  	/* counter and timer */
>  	atomic_t		refcnt;		/* reference count */
> @@ -593,6 +592,7 @@ struct ip_vs_conn {
>  						 * state transition triggerd
>  						 * synchronization
>  						 */
> +	__u32			fwmark;		/* Fire wall mark from skb */
>  	unsigned long		sync_endtime;	/* jiffies + sent_retries */
>  
>  	/* Control members */
> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index b0cd2be..a4d8ec5 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -265,8 +265,8 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
>  	rcu_read_lock();
>  
>  	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
> -		if (cp->af == p->af &&
> -		    p->cport == cp->cport && p->vport == cp->vport &&
> +		if (p->cport == cp->cport && p->vport == cp->vport &&
> +		    cp->af == p->af &&
>  		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
>  		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
>  		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
> @@ -404,8 +404,8 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
>  	rcu_read_lock();
>  
>  	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
> -		if (cp->af == p->af &&
> -		    p->vport == cp->cport && p->cport == cp->dport &&
> +		if (p->vport == cp->cport && p->cport == cp->dport &&
> +		    cp->af == p->af &&
>  		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
>  		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
>  		    p->protocol == cp->protocol &&
Julian Anastasov March 6, 2013, 9:01 p.m. UTC | #2
Hello,

On Wed, 6 Mar 2013, Hans Schillstrom wrote:

> Hi Julian
> Great job you have done !
> I'll test it immediate...

	Thanks, it would be good to catch the problems
in early phase...

> On Wed, 2013-03-06 at 10:42 +0200, Julian Anastasov wrote:
> > 	__ip_vs_conn_in_get and ip_vs_conn_out_get are
> > hot places. Optimize them, so that ports are matched first.
> > By moving net and fwmark below, on 32-bit arch we can fit
> > caddr in 32-byte cache line and all addresses in 64-byte
> > cache line.
> 
> Earlier I made some rearrangements like the one you have made.
> My conclusion at that time was that the best gain was to have
> fwmark and net within the first 64 bytes, and move daddr to the next
> cache line.

	But fwmark is used only for lookups in backup
server. The net field is checked first only in
ip_vs_ct_in_get (on scheduling), it can be optimized too.
Modern CPUs have 64-byte cache line and may be the
places of these fields do not play much because checking
the two ports is enough to differentiate most of the
connections. The addresses play when ports do not
differ, i.e. mostly for persistent connections. So,
on 64-byte cache line it would be more difficult to
see any difference.

> I uesd UDP at ~7Gbit/sec and 256k source address into a x86_64 machine,
> and a 50/50 mix of fwmarks and port in that tests. 
> 
> I guess that you have made similar test, and even take
> ip_vs_conn_out_get() into your calculations ?

	No, I have only virtual boxes for tests...

> Regards
> Hans
> 
> > 
> > Signed-off-by: Julian Anastasov <ja@ssi.bg>
> > ---
> >  include/net/ip_vs.h             |   12 ++++++------
> >  net/netfilter/ipvs/ip_vs_conn.c |    8 ++++----
> >  2 files changed, 10 insertions(+), 10 deletions(-)
> > 
> > diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> > index 9059360..2bc30e6 100644
> > --- a/include/net/ip_vs.h
> > +++ b/include/net/ip_vs.h
> > @@ -566,20 +566,19 @@ struct ip_vs_conn_param {
> >   */
> >  struct ip_vs_conn {
> >  	struct hlist_node	c_list;         /* hashed list heads */
> > -#ifdef CONFIG_NET_NS
> > -	struct net              *net;           /* Name space */
> > -#endif
> >  	/* Protocol, addresses and port numbers */
> > -	u16                     af;             /* address family */
> >  	__be16                  cport;
> > -	__be16                  vport;
> >  	__be16                  dport;
> > -	__u32                   fwmark;         /* Fire wall mark from skb */
> > +	__be16                  vport;
> > +	u16			af;		/* address family */
> >  	union nf_inet_addr      caddr;          /* client address */
> >  	union nf_inet_addr      vaddr;          /* virtual address */
> >  	union nf_inet_addr      daddr;          /* destination address */
> >  	volatile __u32          flags;          /* status flags */
> >  	__u16                   protocol;       /* Which protocol (TCP/UDP) */
> > +#ifdef CONFIG_NET_NS
> > +	struct net              *net;           /* Name space */
> > +#endif
> >  
> >  	/* counter and timer */
> >  	atomic_t		refcnt;		/* reference count */
> > @@ -593,6 +592,7 @@ struct ip_vs_conn {
> >  						 * state transition triggerd
> >  						 * synchronization
> >  						 */
> > +	__u32			fwmark;		/* Fire wall mark from skb */
> >  	unsigned long		sync_endtime;	/* jiffies + sent_retries */
> >  
> >  	/* Control members */
> > diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> > index b0cd2be..a4d8ec5 100644
> > --- a/net/netfilter/ipvs/ip_vs_conn.c
> > +++ b/net/netfilter/ipvs/ip_vs_conn.c
> > @@ -265,8 +265,8 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
> >  	rcu_read_lock();
> >  
> >  	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
> > -		if (cp->af == p->af &&
> > -		    p->cport == cp->cport && p->vport == cp->vport &&
> > +		if (p->cport == cp->cport && p->vport == cp->vport &&
> > +		    cp->af == p->af &&
> >  		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
> >  		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
> >  		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
> > @@ -404,8 +404,8 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
> >  	rcu_read_lock();
> >  
> >  	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
> > -		if (cp->af == p->af &&
> > -		    p->vport == cp->cport && p->cport == cp->dport &&
> > +		if (p->vport == cp->cport && p->cport == cp->dport &&
> > +		    cp->af == p->af &&
> >  		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
> >  		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
> >  		    p->protocol == cp->protocol &&

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans Schillstrom March 7, 2013, 7:49 a.m. UTC | #3
Hi Julian
On Wed, 2013-03-06 at 23:01 +0200, Julian Anastasov wrote:
> 	Hello,
> 
> On Wed, 6 Mar 2013, Hans Schillstrom wrote:
> 
> > Hi Julian
> > Great job you have done !
> > I'll test it immediate...
> 
> 	Thanks, it would be good to catch the problems
> in early phase...
> 
> > On Wed, 2013-03-06 at 10:42 +0200, Julian Anastasov wrote:
> > > 	__ip_vs_conn_in_get and ip_vs_conn_out_get are
> > > hot places. Optimize them, so that ports are matched first.
> > > By moving net and fwmark below, on 32-bit arch we can fit
> > > caddr in 32-byte cache line and all addresses in 64-byte
> > > cache line.
> > 
> > Earlier I made some rearrangements like the one you have made.
> > My conclusion at that time was that the best gain was to have
> > fwmark and net within the first 64 bytes, and move daddr to the next
> > cache line.
> 
> 	But fwmark is used only for lookups in backup
> server. The net field is checked first only in
> ip_vs_ct_in_get (on scheduling), it can be optimized too.
> Modern CPUs have 64-byte cache line and may be the
> places of these fields do not play much because checking
> the two ports is enough to differentiate most of the
> connections. The addresses play when ports do not
> differ, i.e. mostly for persistent connections. So,
> on 64-byte cache line it would be more difficult to
> see any difference.

I made some tests on weaker machine (i7-3930K) with moderate background
load, there is absolute no measurable difference with daddr in first
cache line or in second line.
So based on that I prefer your solution since it keeps data together.


> > I uesd UDP at ~7Gbit/sec and 256k source address into a x86_64 machine,
> > and a 50/50 mix of fwmarks and port in that tests. 
> > 
> > I guess that you have made similar test, and even take
> > ip_vs_conn_out_get() into your calculations ?
> 
> 	No, I have only virtual boxes for tests...
> 
> > Regards
> > Hans
> > 
> > > 
> > > Signed-off-by: Julian Anastasov <ja@ssi.bg>

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>

> > > ---
> > >  include/net/ip_vs.h             |   12 ++++++------
> > >  net/netfilter/ipvs/ip_vs_conn.c |    8 ++++----
> > >  2 files changed, 10 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> > > index 9059360..2bc30e6 100644
> > > --- a/include/net/ip_vs.h
> > > +++ b/include/net/ip_vs.h
> > > @@ -566,20 +566,19 @@ struct ip_vs_conn_param {
> > >   */
> > >  struct ip_vs_conn {
> > >  	struct hlist_node	c_list;         /* hashed list heads */
> > > -#ifdef CONFIG_NET_NS
> > > -	struct net              *net;           /* Name space */
> > > -#endif
> > >  	/* Protocol, addresses and port numbers */
> > > -	u16                     af;             /* address family */
> > >  	__be16                  cport;
> > > -	__be16                  vport;
> > >  	__be16                  dport;
> > > -	__u32                   fwmark;         /* Fire wall mark from skb */
> > > +	__be16                  vport;
> > > +	u16			af;		/* address family */
> > >  	union nf_inet_addr      caddr;          /* client address */
> > >  	union nf_inet_addr      vaddr;          /* virtual address */
> > >  	union nf_inet_addr      daddr;          /* destination address */
> > >  	volatile __u32          flags;          /* status flags */
> > >  	__u16                   protocol;       /* Which protocol (TCP/UDP) */
> > > +#ifdef CONFIG_NET_NS
> > > +	struct net              *net;           /* Name space */
> > > +#endif
> > >  
> > >  	/* counter and timer */
> > >  	atomic_t		refcnt;		/* reference count */
> > > @@ -593,6 +592,7 @@ struct ip_vs_conn {
> > >  						 * state transition triggerd
> > >  						 * synchronization
> > >  						 */
> > > +	__u32			fwmark;		/* Fire wall mark from skb */
> > >  	unsigned long		sync_endtime;	/* jiffies + sent_retries */
> > >  
> > >  	/* Control members */
> > > diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> > > index b0cd2be..a4d8ec5 100644
> > > --- a/net/netfilter/ipvs/ip_vs_conn.c
> > > +++ b/net/netfilter/ipvs/ip_vs_conn.c
> > > @@ -265,8 +265,8 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
> > >  	rcu_read_lock();
> > >  
> > >  	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
> > > -		if (cp->af == p->af &&
> > > -		    p->cport == cp->cport && p->vport == cp->vport &&
> > > +		if (p->cport == cp->cport && p->vport == cp->vport &&
> > > +		    cp->af == p->af &&
> > >  		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
> > >  		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
> > >  		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
> > > @@ -404,8 +404,8 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
> > >  	rcu_read_lock();
> > >  
> > >  	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
> > > -		if (cp->af == p->af &&
> > > -		    p->vport == cp->cport && p->cport == cp->dport &&
> > > +		if (p->vport == cp->cport && p->cport == cp->dport &&
> > > +		    cp->af == p->af &&
> > >  		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
> > >  		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
> > >  		    p->protocol == cp->protocol &&
> 
> Regards
> 
> --
> Julian Anastasov <ja@ssi.bg>
Julian Anastasov March 7, 2013, 11:23 p.m. UTC | #4
Hello,

On Thu, 7 Mar 2013, Hans Schillstrom wrote:

> I made some tests on weaker machine (i7-3930K) with moderate background
> load, there is absolute no measurable difference with daddr in first
> cache line or in second line.
> So based on that I prefer your solution since it keeps data together.

	Thanks for the review and the tests. But I'll
slightly extend this patch in v2 with more optimizations...

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 9059360..2bc30e6 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -566,20 +566,19 @@  struct ip_vs_conn_param {
  */
 struct ip_vs_conn {
 	struct hlist_node	c_list;         /* hashed list heads */
-#ifdef CONFIG_NET_NS
-	struct net              *net;           /* Name space */
-#endif
 	/* Protocol, addresses and port numbers */
-	u16                     af;             /* address family */
 	__be16                  cport;
-	__be16                  vport;
 	__be16                  dport;
-	__u32                   fwmark;         /* Fire wall mark from skb */
+	__be16                  vport;
+	u16			af;		/* address family */
 	union nf_inet_addr      caddr;          /* client address */
 	union nf_inet_addr      vaddr;          /* virtual address */
 	union nf_inet_addr      daddr;          /* destination address */
 	volatile __u32          flags;          /* status flags */
 	__u16                   protocol;       /* Which protocol (TCP/UDP) */
+#ifdef CONFIG_NET_NS
+	struct net              *net;           /* Name space */
+#endif
 
 	/* counter and timer */
 	atomic_t		refcnt;		/* reference count */
@@ -593,6 +592,7 @@  struct ip_vs_conn {
 						 * state transition triggerd
 						 * synchronization
 						 */
+	__u32			fwmark;		/* Fire wall mark from skb */
 	unsigned long		sync_endtime;	/* jiffies + sent_retries */
 
 	/* Control members */
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b0cd2be..a4d8ec5 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -265,8 +265,8 @@  __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 	rcu_read_lock();
 
 	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == p->af &&
-		    p->cport == cp->cport && p->vport == cp->vport &&
+		if (p->cport == cp->cport && p->vport == cp->vport &&
+		    cp->af == p->af &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
 		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
@@ -404,8 +404,8 @@  struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 	rcu_read_lock();
 
 	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == p->af &&
-		    p->vport == cp->cport && p->cport == cp->dport &&
+		if (p->vport == cp->cport && p->cport == cp->dport &&
+		    cp->af == p->af &&
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
 		    p->protocol == cp->protocol &&