@@ -994,7 +994,11 @@ static inline void sk_wmem_free_skb(struct sock
*sk, struct sk_buff *skb)
* Since ~2.3.5 it is also exclusive sleep lock serializing
* accesses from user process context.
*/
-#define sock_owned_by_user(sk) ((sk)->sk_lock.owned)
+#define sock_owned_by_user(sk) ((sk)->sk_lock.owned)
+/* backlog processing, see __release_sock(sk) */
+#define sock_owned_by_backlog(sk) ((sk)->sk_lock.owned < 0)
+/* sock owned by user, but not for backlog processing */
+#define __sock_owned_by_user(sk) ((sk)->sk_lock.owned > 0)
/*
* Macro so as to not evaluate some arguments when
@@ -2015,8 +2015,10 @@ void release_sock(struct sock *sk)
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
spin_lock_bh(&sk->sk_lock.slock);
- if (sk->sk_backlog.tail)
+ if (sk->sk_backlog.tail) {
+ sk->sk_lock.owned = -1;
__release_sock(sk);
+ }
sk->sk_lock.owned = 0;
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
@@ -211,10 +211,22 @@ static inline struct sock
*get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
- child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
- if (child)
- inet_csk_reqsk_queue_add(sk, req, child);
- else
+ bh_lock_sock_nested(sk);
+ /* TODO: move syn_recv_sock before this lock */
+ spin_lock(&icsk->icsk_accept_queue.rskq_accept_lock);
+
+ if (likely(icsk->icsk_accept_queue.rskq_active)) {
+ child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
+ if (child)
+ inet_csk_reqsk_queue_do_add(sk, req, child);
+ } else {
+ child = NULL;
+ }
+
+ spin_unlock(&icsk->icsk_accept_queue.rskq_accept_lock);
+ bh_unlock_sock(sk);
+
+ if (unlikely(child == NULL))
reqsk_free(req);
return child;
@@ -1812,10 +1812,15 @@ void tcp_set_state(struct sock *sk, int state)
if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
+ if (oldstate == TCP_LISTEN)
+ /* We have to prevent race condition in syn_recv_sock */
+ bh_lock_sock_nested(sk);
sk->sk_prot->unhash(sk);
if (inet_csk(sk)->icsk_bind_hash &&
!(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
inet_put_port(sk);
+ if (oldstate == TCP_LISTEN)
+ bh_unlock_sock(sk);
/* fall through */
default:
if (oldstate == TCP_ESTABLISHED)
@@ -1338,7 +1338,24 @@ int tcp_v4_conn_request(struct sock *sk, struct
sk_buff *skb)
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+ return 0;
+
+ bh_lock_sock_nested(sk);
+
+ if (__sock_owned_by_user(sk)) {
+ /* Some inefficiency: it leads to double syn_table lookup */
+ if (likely(!sk_add_backlog(sk, skb)))
+ skb_get(skb);
+ else
+ NET_INC_STATS_BH(dev_net(skb->dev),
+ LINUX_MIB_TCPBACKLOGDROP);
goto drop;
+ }
+
+ if (inet_csk(sk)->icsk_accept_queue.listen_opt == NULL) {
+ /* socket is closing */
+ goto drop;
+ }
/* TW buckets are converted to open requests without