diff mbox

ipvs: Use atomic operations atomicly

Message ID 20090901015909.GA12252@verge.net.au
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Simon Horman Sept. 1, 2009, 1:59 a.m. UTC
On Mon, Aug 31, 2009 at 02:22:26PM +0200, Patrick McHardy wrote:
> Simon Horman wrote:
> > A pointed out by Shin Hong, IPVS doesn't always use atomic operations
> > in an atomic manner. While this seems unlikely to be manifest in
> > strange behaviour, it seems appropriate to clean this up.
> > 
> > Cc: 홍신 shin hong <hongshin@gmail.com>
> > Signed-off-by: Simon Horman <horms@verge.net.au>
> 
> Applied, thanks.
> 
> >  	if (af == AF_INET &&
> >  	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
> >  	    (((cp->protocol != IPPROTO_TCP ||
> >  	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
> > -	      (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
> > +	      (pkts % sysctl_ip_vs_sync_threshold[1]
> 
> It seems that proc_do_sync_threshold() should check whether this value
> is zero. The current checks also look racy since incorrect values are
> first updated, then overwritten again.

Hi,

I'm wondering if an approach along the lines of the following is valid.
The idea is that the value in the ctl_table is essentially a scratch
value that is used by the parser and then copied into ip_vs_sync_threshold
if it is valid. I'm concerned that there are atomicity issues
surrounding writing ip_vs_sync_threshold while there might be readers.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Patrick McHardy Sept. 2, 2009, 12:56 p.m. UTC | #1
Simon Horman wrote:
> On Mon, Aug 31, 2009 at 02:22:26PM +0200, Patrick McHardy wrote:
>> It seems that proc_do_sync_threshold() should check whether this value
>> is zero. The current checks also look racy since incorrect values are
>> first updated, then overwritten again.
> 
> I'm wondering if an approach along the lines of the following is valid.
> The idea is that the value in the ctl_table is essentially a scratch
> value that is used by the parser and then copied into ip_vs_sync_threshold
> if it is valid.

Even simpler would be to use a temporary buffer on the stack for copying
the values from userspace and then copy them to the final buffer after
validation.

> I'm concerned that there are atomicity issues
> surrounding writing ip_vs_sync_threshold while there might be readers.

That might be a problem if they are required to be "synchronized".

> --- a/net/netfilter/ipvs/ip_vs_core.c
> +++ b/net/netfilter/ipvs/ip_vs_core.c
> @@ -1362,8 +1362,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
>  	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
>  	    (((cp->protocol != IPPROTO_TCP ||
>  	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
> -	      (pkts % sysctl_ip_vs_sync_threshold[1]
> -	       == sysctl_ip_vs_sync_threshold[0])) ||
> +	      (pkts % ip_vs_sync_threshold[1] == ip_vs_sync_threshold[0])) ||
>  	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
>  	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
>  	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index fba2892..8a9ff21 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -76,6 +76,11 @@ static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
>  /* number of virtual services */
>  static int ip_vs_num_services = 0;
>  
> +/* threshold handling */
> +static int ip_vs_sync_threshold_min = 0;
> +static int ip_vs_sync_threshold_max = INT_MAX;
> +int ip_vs_sync_threshold[2] = { 3, 50 };
> +

min should be 1 I guess or you still need to manually check
that ip_vs_sync_threshold[1] != 0 to avoid a division be zero.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 98978e7..28d0c4f 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -774,8 +774,8 @@  extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 extern int sysctl_ip_vs_cache_bypass;
 extern int sysctl_ip_vs_expire_nodest_conn;
 extern int sysctl_ip_vs_expire_quiescent_template;
-extern int sysctl_ip_vs_sync_threshold[2];
 extern int sysctl_ip_vs_nat_icmp_send;
+extern int ip_vs_sync_threshold[2];
 extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];
 
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b95699f..f3572b6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1362,8 +1362,7 @@  ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
 	    (((cp->protocol != IPPROTO_TCP ||
 	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-	      (pkts % sysctl_ip_vs_sync_threshold[1]
-	       == sysctl_ip_vs_sync_threshold[0])) ||
+	      (pkts % ip_vs_sync_threshold[1] == ip_vs_sync_threshold[0])) ||
 	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
 	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fba2892..8a9ff21 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -76,6 +76,11 @@  static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
 /* number of virtual services */
 static int ip_vs_num_services = 0;
 
+/* threshold handling */
+static int ip_vs_sync_threshold_min = 0;
+static int ip_vs_sync_threshold_max = INT_MAX;
+int ip_vs_sync_threshold[2] = { 3, 50 };
+
 /* sysctl variables */
 static int sysctl_ip_vs_drop_entry = 0;
 static int sysctl_ip_vs_drop_packet = 0;
@@ -85,7 +90,7 @@  static int sysctl_ip_vs_am_droprate = 10;
 int sysctl_ip_vs_cache_bypass = 0;
 int sysctl_ip_vs_expire_nodest_conn = 0;
 int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
+static int sysctl_ip_vs_sync_threshold[2];
 int sysctl_ip_vs_nat_icmp_send = 0;
 
 
@@ -1521,17 +1526,12 @@  proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
 		       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
-	int val[2];
 	int rc;
 
-	/* backup the value first */
-	memcpy(val, valp, sizeof(val));
-
-	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
-	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
-		/* Restore the correct value */
-		memcpy(valp, val, sizeof(val));
-	}
+	rc = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	if (write && (valp[0] < valp[1]))
+		memcpy(ip_vs_sync_threshold, valp,
+		       sizeof(ip_vs_sync_threshold));
 	return rc;
 }
 
@@ -1698,6 +1698,8 @@  static struct ctl_table vs_vars[] = {
 		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
 		.mode		= 0644,
 		.proc_handler	= proc_do_sync_threshold,
+		.extra1		= &ip_vs_sync_threshold_min,
+		.extra2		= &ip_vs_sync_threshold_max,
 	},
 	{
 		.procname	= "nat_icmp_send",
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index e177f0d..d3322fb 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -438,7 +438,7 @@  static void ip_vs_process_message(const char *buffer, const size_t buflen)
 
 		if (opt)
 			memcpy(&cp->in_seq, opt, sizeof(*opt));
-		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+		atomic_set(&cp->in_pkts, ip_vs_sync_threshold[0]);
 		cp->state = state;
 		cp->old_state = cp->state;
 		/*