diff mbox series

[v3,bpf-next,10/11] bpf: Add BPF_SOCK_OPS_STATE_CB

Message ID 20171231171030.2998769-11-brakmo@fb.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: More sock_ops callbacks | expand

Commit Message

Lawrence Brakmo Dec. 31, 2017, 5:10 p.m. UTC
Adds support for calling sock_ops BPF program when there is a TCP state
change. Two arguments are used; one for the old state and another for
the new state.

There is a new enum in include/uapi/linux/bpf.h that exports the TCP
states that prepends BPF_ to the current TCP state names. If it is ever
necessary to change the internal TCP state values (other than adding
more to the end), then it will become necessary to convert from the
internal TCP state value to the BPF value before calling the BPF
sock_ops function.

New op: BPF_SOCK_OPS_STATE_CB.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
---
 include/uapi/linux/bpf.h | 26 ++++++++++++++++++++++++++
 include/uapi/linux/tcp.h |  1 +
 net/ipv4/tcp.c           |  4 ++++
 3 files changed, 31 insertions(+)

Comments

kernel test robot Jan. 2, 2018, 4:01 a.m. UTC | #1
Hi Lawrence,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]
[also build test WARNING on next-20171222]
[cannot apply to bpf-next/master linus/master net/master v4.15-rc6]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Lawrence-Brakmo/bpf-More-sock_ops-callbacks/20180102-111550
config: x86_64-randconfig-x007-201800 (attached as .config)
compiler: gcc-7 (Debian 7.2.0-12) 7.2.1 20171025
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/atomic.h:5:0,
                    from include/linux/atomic.h:5,
                    from include/linux/crypto.h:20,
                    from include/crypto/hash.h:16,
                    from net//ipv4/tcp.c:250:
   net//ipv4/tcp.c: In function 'tcp_set_state':
>> net//ipv4/tcp.c:2041:34: warning: comparison between 'enum <anonymous>' and 'enum <anonymous>' [-Wenum-compare]
     BUILD_BUG_ON(BPF_TCP_MAX_STATES != TCP_MAX_STATES);
                                     ^
   include/linux/compiler.h:301:19: note: in definition of macro '__compiletime_assert'
      bool __cond = !(condition);    \
                      ^~~~~~~~~
   include/linux/compiler.h:324:2: note: in expansion of macro '_compiletime_assert'
     _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
     ^~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:47:37: note: in expansion of macro 'compiletime_assert'
    #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                        ^~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:71:2: note: in expansion of macro 'BUILD_BUG_ON_MSG'
     BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
     ^~~~~~~~~~~~~~~~
>> net//ipv4/tcp.c:2041:2: note: in expansion of macro 'BUILD_BUG_ON'
     BUILD_BUG_ON(BPF_TCP_MAX_STATES != TCP_MAX_STATES);
     ^~~~~~~~~~~~

vim +/BUILD_BUG_ON +2041 net//ipv4/tcp.c

  2036	
  2037	void tcp_set_state(struct sock *sk, int state)
  2038	{
  2039		int oldstate = sk->sk_state;
  2040	
> 2041		BUILD_BUG_ON(BPF_TCP_MAX_STATES != TCP_MAX_STATES);
  2042		if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
  2043			tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
  2044	
  2045		switch (state) {
  2046		case TCP_ESTABLISHED:
  2047			if (oldstate != TCP_ESTABLISHED)
  2048				TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
  2049			break;
  2050	
  2051		case TCP_CLOSE:
  2052			if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
  2053				TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
  2054	
  2055			sk->sk_prot->unhash(sk);
  2056			if (inet_csk(sk)->icsk_bind_hash &&
  2057			    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
  2058				inet_put_port(sk);
  2059			/* fall through */
  2060		default:
  2061			if (oldstate == TCP_ESTABLISHED)
  2062				TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
  2063		}
  2064	
  2065		/* Change state AFTER socket is unhashed to avoid closed
  2066		 * socket sitting in hash tables.
  2067		 */
  2068		inet_sk_state_store(sk, state);
  2069	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Lawrence Brakmo Jan. 3, 2018, 8:21 a.m. UTC | #2
We created a new enum to allow different TCP state names between the kernel and BPF sock_ops programs. Since we want to catch changes in the original enum, we are comparing the values of the last entries of the enum types (which is triggering the warning). 

On 1/1/18, 8:02 PM, "kbuild test robot" <lkp@intel.com> wrote:

    Hi Lawrence,
    
    Thank you for the patch! Perhaps something to improve:
    
    [auto build test WARNING on net-next/master]
    [also build test WARNING on next-20171222]
    [cannot apply to bpf-next/master linus/master net/master v4.15-rc6]
    [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
    
    url:    https://github.com/0day-ci/linux/commits/Lawrence-Brakmo/bpf-More-sock_ops-callbacks/20180102-111550
    config: x86_64-randconfig-x007-201800 (attached as .config)
    compiler: gcc-7 (Debian 7.2.0-12) 7.2.1 20171025
    reproduce:
            # save the attached .config to linux build tree
            make ARCH=x86_64 
    
    All warnings (new ones prefixed by >>):
    
       In file included from arch/x86/include/asm/atomic.h:5:0,
                        from include/linux/atomic.h:5,
                        from include/linux/crypto.h:20,
                        from include/crypto/hash.h:16,
                        from net//ipv4/tcp.c:250:
       net//ipv4/tcp.c: In function 'tcp_set_state':
    >> net//ipv4/tcp.c:2041:34: warning: comparison between 'enum <anonymous>' and 'enum <anonymous>' [-Wenum-compare]

         BUILD_BUG_ON(BPF_TCP_MAX_STATES != TCP_MAX_STATES);
                                         ^
       include/linux/compiler.h:301:19: note: in definition of macro '__compiletime_assert'
          bool __cond = !(condition);    \
                          ^~~~~~~~~
       include/linux/compiler.h:324:2: note: in expansion of macro '_compiletime_assert'
         _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
         ^~~~~~~~~~~~~~~~~~~
       include/linux/build_bug.h:47:37: note: in expansion of macro 'compiletime_assert'
        #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                            ^~~~~~~~~~~~~~~~~~
       include/linux/build_bug.h:71:2: note: in expansion of macro 'BUILD_BUG_ON_MSG'
         BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
         ^~~~~~~~~~~~~~~~
    >> net//ipv4/tcp.c:2041:2: note: in expansion of macro 'BUILD_BUG_ON'

         BUILD_BUG_ON(BPF_TCP_MAX_STATES != TCP_MAX_STATES);
         ^~~~~~~~~~~~
    
    vim +/BUILD_BUG_ON +2041 net//ipv4/tcp.c
    
      2036	
      2037	void tcp_set_state(struct sock *sk, int state)
      2038	{
      2039		int oldstate = sk->sk_state;
      2040	
    > 2041		BUILD_BUG_ON(BPF_TCP_MAX_STATES != TCP_MAX_STATES);

      2042		if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
      2043			tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
      2044	
      2045		switch (state) {
      2046		case TCP_ESTABLISHED:
      2047			if (oldstate != TCP_ESTABLISHED)
      2048				TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
      2049			break;
      2050	
      2051		case TCP_CLOSE:
      2052			if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
      2053				TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
      2054	
      2055			sk->sk_prot->unhash(sk);
      2056			if (inet_csk(sk)->icsk_bind_hash &&
      2057			    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
      2058				inet_put_port(sk);
      2059			/* fall through */
      2060		default:
      2061			if (oldstate == TCP_ESTABLISHED)
      2062				TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
      2063		}
      2064	
      2065		/* Change state AFTER socket is unhashed to avoid closed
      2066		 * socket sitting in hash tables.
      2067		 */
      2068		inet_sk_state_store(sk, state);
      2069	
    
    ---
    0-DAY kernel test infrastructure                Open Source Technology Center
    https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.01.org_pipermail_kbuild-2Dall&d=DwIBAg&c=5VD0RTtNlTh3ycd41b3MUw&r=pq_Mqvzfy-C8ltkgyx1u_g&m=Q7DgPe3X_jy4GHXyZsGUWwNoAYwdvyKbdsSNwopo4LA&s=hDQMCMBEVY9MOxG2fIWSCTeKMAmXhkbFc6batWKJgBg&e=                   Intel Corporation
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 415b951..b653273 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1024,6 +1024,32 @@  enum {
 					 * Arg1: sequence number of 1st byte
 					 * Arg2: # segments
 					 */
+	BPF_SOCK_OPS_STATE_CB,		/* Called when TCP changes state.
+					 * Arg1: old_state
+					 * Arg2: new_state
+					 */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+	BPF_TCP_ESTABLISHED = 1,
+	BPF_TCP_SYN_SENT,
+	BPF_TCP_SYN_RECV,
+	BPF_TCP_FIN_WAIT1,
+	BPF_TCP_FIN_WAIT2,
+	BPF_TCP_TIME_WAIT,
+	BPF_TCP_CLOSE,
+	BPF_TCP_CLOSE_WAIT,
+	BPF_TCP_LAST_ACK,
+	BPF_TCP_LISTEN,
+	BPF_TCP_CLOSING,	/* Now a valid state */
+	BPF_TCP_NEW_SYN_RECV,
+
+	BPF_TCP_MAX_STATES	/* Leave at the end! */
 };
 
 #define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index dc36d3c..211322c 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -262,6 +262,7 @@  struct tcp_md5sig {
 /* Definitions for bpf_sock_ops_flags */
 #define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
 #define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
 
 /* INET_DIAG_MD5SIG */
 struct tcp_diag_md5sig {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a1fe7a7..b321dfd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2044,6 +2044,10 @@  void tcp_set_state(struct sock *sk, int state)
 {
 	int oldstate = sk->sk_state;
 
+	BUILD_BUG_ON(BPF_TCP_MAX_STATES != TCP_MAX_STATES);
+	if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
+		tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
+
 	switch (state) {
 	case TCP_ESTABLISHED:
 		if (oldstate != TCP_ESTABLISHED)