diff mbox series

[bpf-next,v6,07/11] bpf: Add support for reading sk_state and more

Message ID 20180120014548.2941040-8-brakmo@fb.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: More sock_ops callbacks | expand

Commit Message

Lawrence Brakmo Jan. 20, 2018, 1:45 a.m. UTC
Add support for reading many more tcp_sock fields

  state,	same as sk->sk_state
  rtt_min	same as sk->rtt_min.s[0].v (current rtt_min)
  snd_ssthresh
  rcv_nxt
  snd_nxt
  snd_una
  mss_cache
  ecn_flags
  rate_delivered
  rate_interval_us
  packets_out
  retrans_out
  total_retrans
  segs_in
  data_segs_in
  segs_out
  data_segs_out
  bytes_received (__u64)
  bytes_acked    (__u64)

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
---
 include/uapi/linux/bpf.h |  19 +++++++
 net/core/filter.c        | 134 ++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 140 insertions(+), 13 deletions(-)

Comments

Daniel Borkmann Jan. 24, 2018, 1:05 a.m. UTC | #1
On 01/20/2018 02:45 AM, Lawrence Brakmo wrote:
> Add support for reading many more tcp_sock fields
> 
>   state,	same as sk->sk_state
>   rtt_min	same as sk->rtt_min.s[0].v (current rtt_min)
>   snd_ssthresh
>   rcv_nxt
>   snd_nxt
>   snd_una
>   mss_cache
>   ecn_flags
>   rate_delivered
>   rate_interval_us
>   packets_out
>   retrans_out
>   total_retrans
>   segs_in
>   data_segs_in
>   segs_out
>   data_segs_out
>   bytes_received (__u64)
>   bytes_acked    (__u64)
> 
> Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
> ---
>  include/uapi/linux/bpf.h |  19 +++++++
>  net/core/filter.c        | 134 ++++++++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 140 insertions(+), 13 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 2a8c40a..ff34f3c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -979,6 +979,25 @@ struct bpf_sock_ops {
>  	__u32 snd_cwnd;
>  	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
>  	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
> +	__u32 state;
> +	__u32 rtt_min;
> +	__u32 snd_ssthresh;
> +	__u32 rcv_nxt;
> +	__u32 snd_nxt;
> +	__u32 snd_una;
> +	__u32 mss_cache;
> +	__u32 ecn_flags;
> +	__u32 rate_delivered;
> +	__u32 rate_interval_us;
> +	__u32 packets_out;
> +	__u32 retrans_out;
> +	__u32 total_retrans;
> +	__u32 segs_in;
> +	__u32 data_segs_in;
> +	__u32 segs_out;
> +	__u32 data_segs_out;

Btw, this will have a 4 bytes hole in here which the user can otherwise
address out of the prog. Could you add the sk_txhash from the next patch
in between here instead?

> +	__u64 bytes_received;
> +	__u64 bytes_acked;
>  };
>  
>  /* List of known BPF sock_ops operators.
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c9411dc..98665ba 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3849,34 +3849,43 @@ void bpf_warn_invalid_xdp_action(u32 act)
>  }
>  EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
>  
> -static bool __is_valid_sock_ops_access(int off, int size)
> +static bool sock_ops_is_valid_access(int off, int size,
> +				     enum bpf_access_type type,
> +				     struct bpf_insn_access_aux *info)
>  {
> +	const int size_default = sizeof(__u32);
> +
>  	if (off < 0 || off >= sizeof(struct bpf_sock_ops))
>  		return false;
> +
>  	/* The verifier guarantees that size > 0. */
>  	if (off % size != 0)
>  		return false;
> -	if (size != sizeof(__u32))
> -		return false;
> -
> -	return true;
> -}
>  
> -static bool sock_ops_is_valid_access(int off, int size,
> -				     enum bpf_access_type type,
> -				     struct bpf_insn_access_aux *info)
> -{
>  	if (type == BPF_WRITE) {
>  		switch (off) {
> -		case offsetof(struct bpf_sock_ops, op) ...
> -		     offsetof(struct bpf_sock_ops, replylong[3]):
> +		case bpf_ctx_range_till(struct bpf_sock_ops, op, replylong[3]):
> +			if (size != size_default)
> +				return false;
>  			break;
>  		default:
>  			return false;
>  		}
> +	} else {
> +		switch (off) {
> +		case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
> +					bytes_acked):
> +			if (size != sizeof(__u64))
> +				return false;
> +			break;
> +		default:
> +			if (size != size_default)
> +				return false;
> +			break;
> +		}
>  	}
>  
> -	return __is_valid_sock_ops_access(off, size);
> +	return true;
>  }
Lawrence Brakmo Jan. 24, 2018, 1:27 a.m. UTC | #2
On 1/23/18, 5:05 PM, "Daniel Borkmann" <daniel@iogearbox.net> wrote:

    On 01/20/2018 02:45 AM, Lawrence Brakmo wrote:
    > Add support for reading many more tcp_sock fields

    > 

    >   state,	same as sk->sk_state

    >   rtt_min	same as sk->rtt_min.s[0].v (current rtt_min)

    >   snd_ssthresh

    >   rcv_nxt

    >   snd_nxt

    >   snd_una

    >   mss_cache

    >   ecn_flags

    >   rate_delivered

    >   rate_interval_us

    >   packets_out

    >   retrans_out

    >   total_retrans

    >   segs_in

    >   data_segs_in

    >   segs_out

    >   data_segs_out

    >   bytes_received (__u64)

    >   bytes_acked    (__u64)

    > 

    > Signed-off-by: Lawrence Brakmo <brakmo@fb.com>

    > ---

    >  include/uapi/linux/bpf.h |  19 +++++++

    >  net/core/filter.c        | 134 ++++++++++++++++++++++++++++++++++++++++++-----

    >  2 files changed, 140 insertions(+), 13 deletions(-)

    > 

    > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

    > index 2a8c40a..ff34f3c 100644

    > --- a/include/uapi/linux/bpf.h

    > +++ b/include/uapi/linux/bpf.h

    > @@ -979,6 +979,25 @@ struct bpf_sock_ops {

    >  	__u32 snd_cwnd;

    >  	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */

    >  	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */

    > +	__u32 state;

    > +	__u32 rtt_min;

    > +	__u32 snd_ssthresh;

    > +	__u32 rcv_nxt;

    > +	__u32 snd_nxt;

    > +	__u32 snd_una;

    > +	__u32 mss_cache;

    > +	__u32 ecn_flags;

    > +	__u32 rate_delivered;

    > +	__u32 rate_interval_us;

    > +	__u32 packets_out;

    > +	__u32 retrans_out;

    > +	__u32 total_retrans;

    > +	__u32 segs_in;

    > +	__u32 data_segs_in;

    > +	__u32 segs_out;

    > +	__u32 data_segs_out;

    
    Btw, this will have a 4 bytes hole in here which the user can otherwise
    address out of the prog. Could you add the sk_txhash from the next patch
    in between here instead?
    
Good point. Will fix in new patch. Thanks Daniel.

    > +	__u64 bytes_received;

    > +	__u64 bytes_acked;

    >  };

    >  

    >  /* List of known BPF sock_ops operators.

    > diff --git a/net/core/filter.c b/net/core/filter.c

    > index c9411dc..98665ba 100644

    > --- a/net/core/filter.c

    > +++ b/net/core/filter.c

    > @@ -3849,34 +3849,43 @@ void bpf_warn_invalid_xdp_action(u32 act)

    >  }

    >  EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);

    >  

    > -static bool __is_valid_sock_ops_access(int off, int size)

    > +static bool sock_ops_is_valid_access(int off, int size,

    > +				     enum bpf_access_type type,

    > +				     struct bpf_insn_access_aux *info)

    >  {

    > +	const int size_default = sizeof(__u32);

    > +

    >  	if (off < 0 || off >= sizeof(struct bpf_sock_ops))

    >  		return false;

    > +

    >  	/* The verifier guarantees that size > 0. */

    >  	if (off % size != 0)

    >  		return false;

    > -	if (size != sizeof(__u32))

    > -		return false;

    > -

    > -	return true;

    > -}

    >  

    > -static bool sock_ops_is_valid_access(int off, int size,

    > -				     enum bpf_access_type type,

    > -				     struct bpf_insn_access_aux *info)

    > -{

    >  	if (type == BPF_WRITE) {

    >  		switch (off) {

    > -		case offsetof(struct bpf_sock_ops, op) ...

    > -		     offsetof(struct bpf_sock_ops, replylong[3]):

    > +		case bpf_ctx_range_till(struct bpf_sock_ops, op, replylong[3]):

    > +			if (size != size_default)

    > +				return false;

    >  			break;

    >  		default:

    >  			return false;

    >  		}

    > +	} else {

    > +		switch (off) {

    > +		case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,

    > +					bytes_acked):

    > +			if (size != sizeof(__u64))

    > +				return false;

    > +			break;

    > +		default:

    > +			if (size != size_default)

    > +				return false;

    > +			break;

    > +		}

    >  	}

    >  

    > -	return __is_valid_sock_ops_access(off, size);

    > +	return true;

    >  }
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2a8c40a..ff34f3c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -979,6 +979,25 @@  struct bpf_sock_ops {
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
 	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+	__u32 state;
+	__u32 rtt_min;
+	__u32 snd_ssthresh;
+	__u32 rcv_nxt;
+	__u32 snd_nxt;
+	__u32 snd_una;
+	__u32 mss_cache;
+	__u32 ecn_flags;
+	__u32 rate_delivered;
+	__u32 rate_interval_us;
+	__u32 packets_out;
+	__u32 retrans_out;
+	__u32 total_retrans;
+	__u32 segs_in;
+	__u32 data_segs_in;
+	__u32 segs_out;
+	__u32 data_segs_out;
+	__u64 bytes_received;
+	__u64 bytes_acked;
 };
 
 /* List of known BPF sock_ops operators.
diff --git a/net/core/filter.c b/net/core/filter.c
index c9411dc..98665ba 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3849,34 +3849,43 @@  void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
-static bool __is_valid_sock_ops_access(int off, int size)
+static bool sock_ops_is_valid_access(int off, int size,
+				     enum bpf_access_type type,
+				     struct bpf_insn_access_aux *info)
 {
+	const int size_default = sizeof(__u32);
+
 	if (off < 0 || off >= sizeof(struct bpf_sock_ops))
 		return false;
+
 	/* The verifier guarantees that size > 0. */
 	if (off % size != 0)
 		return false;
-	if (size != sizeof(__u32))
-		return false;
-
-	return true;
-}
 
-static bool sock_ops_is_valid_access(int off, int size,
-				     enum bpf_access_type type,
-				     struct bpf_insn_access_aux *info)
-{
 	if (type == BPF_WRITE) {
 		switch (off) {
-		case offsetof(struct bpf_sock_ops, op) ...
-		     offsetof(struct bpf_sock_ops, replylong[3]):
+		case bpf_ctx_range_till(struct bpf_sock_ops, op, replylong[3]):
+			if (size != size_default)
+				return false;
 			break;
 		default:
 			return false;
 		}
+	} else {
+		switch (off) {
+		case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
+					bytes_acked):
+			if (size != sizeof(__u64))
+				return false;
+			break;
+		default:
+			if (size != size_default)
+				return false;
+			break;
+		}
 	}
 
-	return __is_valid_sock_ops_access(off, size);
+	return true;
 }
 
 static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
@@ -4493,6 +4502,32 @@  static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 					       is_fullsock));
 		break;
 
+	case offsetof(struct bpf_sock_ops, state):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_state));
+		break;
+
+	case offsetof(struct bpf_sock_ops, rtt_min):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+			     sizeof(struct minmax));
+		BUILD_BUG_ON(sizeof(struct minmax) <
+			     sizeof(struct minmax_sample));
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct tcp_sock, rtt_min) +
+				      FIELD_SIZEOF(struct minmax_sample, t));
+		break;
+
 /* Helper macro for adding read access to tcp_sock or sock fields. */
 #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
 	do {								      \
@@ -4575,6 +4610,79 @@  static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 		SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
 				   struct tcp_sock);
 		break;
+
+	case offsetof(struct bpf_sock_ops, snd_ssthresh):
+		SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, rcv_nxt):
+		SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, snd_nxt):
+		SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, snd_una):
+		SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, mss_cache):
+		SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, ecn_flags):
+		SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, rate_delivered):
+		SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, rate_interval_us):
+		SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, packets_out):
+		SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, retrans_out):
+		SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, total_retrans):
+		SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, segs_in):
+		SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, data_segs_in):
+		SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, segs_out):
+		SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, data_segs_out):
+		SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, bytes_received):
+		SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, bytes_acked):
+		SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
+		break;
 	}
 	return insn - insn_buf;
 }