@@ -870,6 +870,7 @@ struct netns_ipvs {
#endif
int sysctl_snat_reroute;
int sysctl_sync_ver;
+ int sysctl_sync_qlen_max;
int sysctl_cache_bypass;
int sysctl_expire_nodest_conn;
int sysctl_expire_quiescent_template;
@@ -890,6 +891,8 @@ struct netns_ipvs {
struct timer_list est_timer; /* Estimation timer */
/* ip_vs_sync */
struct list_head sync_queue;
+ int sync_queue_len;
+ unsigned int sync_queue_sent;
spinlock_t sync_lock;
struct ip_vs_sync_buff *sync_buff;
spinlock_t sync_buff_lock;
@@ -912,6 +915,8 @@ struct netns_ipvs {
#define DEFAULT_SYNC_THRESHOLD 3
#define DEFAULT_SYNC_PERIOD 50
#define DEFAULT_SYNC_VER 1
+#define IPVS_SYNC_WAKEUP_RATE 32
+#define IPVS_SYNC_QLEN_MAX (IPVS_SYNC_WAKEUP_RATE * 4)
#ifdef CONFIG_SYSCTL
@@ -930,6 +935,11 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
return ipvs->sysctl_sync_ver;
}
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_qlen_max;
+}
+
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -947,6 +957,11 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
return DEFAULT_SYNC_VER;
}
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+ return IPVS_SYNC_QLEN_MAX;
+}
+
#endif
/*
@@ -1718,6 +1718,12 @@ static struct ctl_table vs_vars[] = {
.proc_handler = &proc_do_sync_mode,
},
{
+ .procname = "sync_qlen_max",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "cache_bypass",
.maxlen = sizeof(int),
.mode = 0644,
@@ -3662,6 +3668,8 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
tbl[idx++].data = &ipvs->sysctl_snat_reroute;
ipvs->sysctl_sync_ver = 1;
tbl[idx++].data = &ipvs->sysctl_sync_ver;
+ ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+ tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
@@ -312,6 +312,7 @@ static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
struct ip_vs_sync_buff,
list);
list_del(&sb->list);
+ ipvs->sync_queue_len--;
}
spin_unlock_bh(&ipvs->sync_lock);
@@ -358,9 +359,13 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs)
struct ip_vs_sync_buff *sb = ipvs->sync_buff;
spin_lock(&ipvs->sync_lock);
- if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ if (ipvs->sync_state & IP_VS_STATE_MASTER &&
+ ipvs->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
list_add_tail(&sb->list, &ipvs->sync_queue);
- else
+ ipvs->sync_queue_len++;
+ if (!((++ipvs->sync_queue_sent) & (IPVS_SYNC_WAKEUP_RATE-1)))
+ wake_up_process(ipvs->master_thread);
+ } else
ip_vs_sync_buff_release(sb);
spin_unlock(&ipvs->sync_lock);
}
@@ -405,10 +410,11 @@ void ip_vs_sync_switch_mode(struct net *net, int mode)
ipvs->sync_buff = NULL;
} else {
spin_lock_bh(&ipvs->sync_lock);
- if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ if (ipvs->sync_state & IP_VS_STATE_MASTER) {
list_add_tail(&ipvs->sync_buff->list,
&ipvs->sync_queue);
- else
+ ipvs->sync_queue_len++;
+ } else
ip_vs_sync_buff_release(ipvs->sync_buff);
spin_unlock_bh(&ipvs->sync_lock);
}
@@ -1392,18 +1398,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
return len;
}
-static void
+static int
ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
{
int msize;
+ int ret;
msize = msg->size;
/* Put size in network byte order */
msg->size = htons(msg->size);
- if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
- pr_err("ip_vs_send_async error\n");
+ ret = ip_vs_send_async(sock, (char *)msg, msize);
+ if (ret >= 0 || ret == -EAGAIN)
+ return ret;
+ pr_err("ip_vs_send_async error %d\n", ret);
+ return 0;
}
static int
@@ -1428,32 +1438,58 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
return len;
}
+/* Get next buffer to send */
+static inline struct ip_vs_sync_buff *
+next_sync_buff(struct netns_ipvs *ipvs)
+{
+ struct ip_vs_sync_buff *sb;
+
+ sb = sb_dequeue(ipvs);
+ if (sb)
+ return sb;
+ /* Do not delay entries in buffer for more than 2 seconds */
+ return get_curr_sync_buff(ipvs, 2 * HZ);
+}
static int sync_thread_master(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+ struct sock *sk = tinfo->sock->sk;
struct ip_vs_sync_buff *sb;
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d\n",
ipvs->master_mcast_ifn, ipvs->master_syncid);
- while (!kthread_should_stop()) {
- while ((sb = sb_dequeue(ipvs))) {
- ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
- ip_vs_sync_buff_release(sb);
+ for (;;) {
+ sb = next_sync_buff(ipvs);
+ if (!sb) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread_should_stop())
+ break;
+ schedule_timeout(HZ / 5);
+ continue;
}
- /* check if entries stay in ipvs->sync_buff for 2 seconds */
- sb = get_curr_sync_buff(ipvs, 2 * HZ);
- if (sb) {
- ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
- ip_vs_sync_buff_release(sb);
+retry:
+ if (unlikely(kthread_should_stop()))
+ break;
+ if (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
+ int ret = 0;
+
+ __wait_event_interruptible(*sk_sleep(sk),
+ sock_writeable(sk) ||
+ kthread_should_stop(),
+ ret);
+ goto retry;
}
-
- schedule_timeout_interruptible(HZ);
+ ip_vs_sync_buff_release(sb);
}
+ __set_current_state(TASK_RUNNING);
+
+ if (sb)
+ ip_vs_sync_buff_release(sb);
/* clean up the sync_buff queue */
while ((sb = sb_dequeue(ipvs)))
@@ -1538,6 +1574,8 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
realtask = &ipvs->master_thread;
name = "ipvs_master:%d";
threadfn = sync_thread_master;
+ ipvs->sync_queue_len = 0;
+ ipvs->sync_queue_sent = 0;
sock = make_send_sock(net);
} else if (state == IP_VS_STATE_BACKUP) {
if (ipvs->backup_thread)