diff mbox

ipvs: improved SH fallback strategy

Message ID 20130925092638.GD19768@eldamar.org.uk
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Alexander Frolkin Sept. 25, 2013, 9:26 a.m. UTC
Improve the SH fallback realserver selection strategy.

With sh and sh-fallback, if a realserver is down, this attempts to
distribute the traffic that would have gone to that server evenly
among the remaining servers.
 
Signed-off-by: Alexander Frolkin <avf@eldamar.org.uk>

--

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Julian Anastasov Sept. 26, 2013, 5:30 a.m. UTC | #1
Hello,

On Wed, 25 Sep 2013, Alexander Frolkin wrote:

> Improve the SH fallback realserver selection strategy.
> 
> With sh and sh-fallback, if a realserver is down, this attempts to
> distribute the traffic that would have gone to that server evenly
> among the remaining servers.
>  
> Signed-off-by: Alexander Frolkin <avf@eldamar.org.uk>
> 
> --
> diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
> index 3588fae..3d5ab7c 100644
> --- a/net/netfilter/ipvs/ip_vs_sh.c
> +++ b/net/netfilter/ipvs/ip_vs_sh.c
> @@ -115,27 +115,47 @@ ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
>  }
>  
>  
> -/* As ip_vs_sh_get, but with fallback if selected server is unavailable */
> +/* As ip_vs_sh_get, but with fallback if selected server is unavailable
> + *
> + * The fallback strategy loops around the table starting from a "random"
> + * point (in fact, it is chosen to be the original hash value to make the
> + * algorithm deterministic) to find a new server.
> + */
>  static inline struct ip_vs_dest *
>  ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
>  		      const union nf_inet_addr *addr, __be16 port)
>  {
> -	unsigned int offset;
> -	unsigned int hash;
> +	unsigned int offset, roffset;
> +	unsigned int hash, ihash;
>  	struct ip_vs_dest *dest;
>  
> -	for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
> -		hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
> -		dest = rcu_dereference(s->buckets[hash].dest);
> -		if (!dest)
> -			break;
> -		if (is_unavailable(dest))
> -			IP_VS_DBG_BUF(6, "SH: selected unavailable server "
> -				      "%s:%d (offset %d)",
> +	/* first try the dest it's supposed to go to */
> +	ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
> +	dest = rcu_dereference(s->buckets[ihash].dest);
> +	if (!dest)
> +		return NULL;

	Can we reduce the indentation here, eg:

	if (!is_unavailable(dest))
		return dest;
	IP_VS_DBG_BUF(6, "SH: selected unavailable server "
	...
	for ()...
	...
	return NULL;

> +	if (is_unavailable(dest)) {
> +		IP_VS_DBG_BUF(6, "SH: selected unavailable server "
> +		      "%s:%d, reselecting",
> +		      IP_VS_DBG_ADDR(svc->af, &dest->addr),
> +		      ntohs(dest->port));
> +		/* if the original dest is unavailable, loop around the table
> +		 * starting from ihash to find a new dest
> +		 */
> +		for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
> +			roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE;
> +			hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset);
> +			dest = rcu_dereference(s->buckets[hash].dest);

	Every result of rcu_dereference should be checked
for NULL (no dests anymore):
			if (!dest)
				break;

	Then make sure there is correct indentation
for IP_VS_DBG_BUF parameters.

> +			if (is_unavailable(dest))
> +				IP_VS_DBG_BUF(6, "SH: selected unavailable "
> +				      "server %s:%d (offset %d), reselecting",
>  				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
> -				      ntohs(dest->port), offset);
> -		else
> -			return dest;
> +				      ntohs(dest->port), roffset);
> +			else
> +				return dest;
> +		}
> +	} else {
> +		return dest;
>  	}
>  
>  	return NULL;

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 3588fae..3d5ab7c 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -115,27 +115,47 @@  ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
 }
 
 
-/* As ip_vs_sh_get, but with fallback if selected server is unavailable */
+/* As ip_vs_sh_get, but with fallback if selected server is unavailable
+ *
+ * The fallback strategy loops around the table starting from a "random"
+ * point (in fact, it is chosen to be the original hash value to make the
+ * algorithm deterministic) to find a new server.
+ */
 static inline struct ip_vs_dest *
 ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
 		      const union nf_inet_addr *addr, __be16 port)
 {
-	unsigned int offset;
-	unsigned int hash;
+	unsigned int offset, roffset;
+	unsigned int hash, ihash;
 	struct ip_vs_dest *dest;
 
-	for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
-		hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
-		dest = rcu_dereference(s->buckets[hash].dest);
-		if (!dest)
-			break;
-		if (is_unavailable(dest))
-			IP_VS_DBG_BUF(6, "SH: selected unavailable server "
-				      "%s:%d (offset %d)",
+	/* first try the dest it's supposed to go to */
+	ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+	dest = rcu_dereference(s->buckets[ihash].dest);
+	if (!dest)
+		return NULL;
+	if (is_unavailable(dest)) {
+		IP_VS_DBG_BUF(6, "SH: selected unavailable server "
+		      "%s:%d, reselecting",
+		      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+		      ntohs(dest->port));
+		/* if the original dest is unavailable, loop around the table
+		 * starting from ihash to find a new dest
+		 */
+		for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
+			roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE;
+			hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset);
+			dest = rcu_dereference(s->buckets[hash].dest);
+			if (is_unavailable(dest))
+				IP_VS_DBG_BUF(6, "SH: selected unavailable "
+				      "server %s:%d (offset %d), reselecting",
 				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
-				      ntohs(dest->port), offset);
-		else
-			return dest;
+				      ntohs(dest->port), roffset);
+			else
+				return dest;
+		}
+	} else {
+		return dest;
 	}
 
 	return NULL;