diff mbox series

[bpf-next,v4,07/11] bpf: Add support for reading sk_state and more

Message ID 20180104235533.3672006-8-brakmo@fb.com
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series bpf: More sock_ops callbacks | expand

Commit Message

Lawrence Brakmo Jan. 4, 2018, 11:55 p.m. UTC
Add support for reading many more tcp_sock fields

  state,	same as sk->sk_state
  rtt_min	same as sk->rtt_min.s[0].v (current rtt_min)
  snd_ssthresh
  rcv_nxt
  snd_nxt
  snd_una
  mss_cache
  ecn_flags
  rate_delivered
  rate_interval_us
  packets_out
  retrans_out
  total_retrans
  segs_in
  data_segs_in
  segs_out
  data_segs_out
  bytes_received (__u64)
  bytes_acked    (__u64)

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
---
 include/uapi/linux/bpf.h |  19 +++++++++
 net/core/filter.c        | 101 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 119 insertions(+), 1 deletion(-)

Comments

Daniel Borkmann Jan. 6, 2018, 1:46 a.m. UTC | #1
On 01/05/2018 12:55 AM, Lawrence Brakmo wrote:
> Add support for reading many more tcp_sock fields
> 
>   state,	same as sk->sk_state
>   rtt_min	same as sk->rtt_min.s[0].v (current rtt_min)
>   snd_ssthresh
>   rcv_nxt
>   snd_nxt
>   snd_una
>   mss_cache
>   ecn_flags
>   rate_delivered
>   rate_interval_us
>   packets_out
>   retrans_out
>   total_retrans
>   segs_in
>   data_segs_in
>   segs_out
>   data_segs_out
>   bytes_received (__u64)
>   bytes_acked    (__u64)
> 
> Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
> ---
>  include/uapi/linux/bpf.h |  19 +++++++++
>  net/core/filter.c        | 101 ++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 119 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index a1316c7..a8f4cf0 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -963,6 +963,25 @@ struct bpf_sock_ops {
>  	__u32 snd_cwnd;
>  	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
>  	__u32 bpf_sock_ops_flags; /* flags defined in uapi/linux/tcp.h */
> +	__u32 state;
> +	__u32 rtt_min;
> +	__u32 snd_ssthresh;
> +	__u32 rcv_nxt;
> +	__u32 snd_nxt;
> +	__u32 snd_una;
> +	__u32 mss_cache;
> +	__u32 ecn_flags;
> +	__u32 rate_delivered;
> +	__u32 rate_interval_us;
> +	__u32 packets_out;
> +	__u32 retrans_out;
> +	__u32 total_retrans;
> +	__u32 segs_in;
> +	__u32 data_segs_in;
> +	__u32 segs_out;
> +	__u32 data_segs_out;
> +	__u64 bytes_received;
> +	__u64 bytes_acked;
>  };
>  
>  /* List of known BPF sock_ops operators.
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 76fd6e9..d4c5c1a 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3829,7 +3829,7 @@ static bool __is_valid_sock_ops_access(int off, int size)
>  	/* The verifier guarantees that size > 0. */
>  	if (off % size != 0)
>  		return false;
> -	if (size != sizeof(__u32))
> +	if (size != sizeof(__u32) && size != sizeof(__u64))
>  		return false;

Doesn't this have the side-effect that this would kind of let is_valid_access()
callback allow not for narrower but wider access, e.g. on the u32 members when
doing BPF_DW. It would still get ignored later on in the ctx rewrite, though, but
it also means that we could try with 2nd part of u64 member offsets with BPF_W
which would then return a 'verifier is misconfigured' error. I think it would
be better to enforce BPF_DW access only on the u64 types and BPF_W access only
on the u32 types before it becomes uapi instead opening this up generically. E.g.
we do similar approach in pe_prog_is_valid_access() for sample_period (taking the
the narrow access bits aside from there).

Thanks,
Daniel
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a1316c7..a8f4cf0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -963,6 +963,25 @@  struct bpf_sock_ops {
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
 	__u32 bpf_sock_ops_flags; /* flags defined in uapi/linux/tcp.h */
+	__u32 state;
+	__u32 rtt_min;
+	__u32 snd_ssthresh;
+	__u32 rcv_nxt;
+	__u32 snd_nxt;
+	__u32 snd_una;
+	__u32 mss_cache;
+	__u32 ecn_flags;
+	__u32 rate_delivered;
+	__u32 rate_interval_us;
+	__u32 packets_out;
+	__u32 retrans_out;
+	__u32 total_retrans;
+	__u32 segs_in;
+	__u32 data_segs_in;
+	__u32 segs_out;
+	__u32 data_segs_out;
+	__u64 bytes_received;
+	__u64 bytes_acked;
 };
 
 /* List of known BPF sock_ops operators.
diff --git a/net/core/filter.c b/net/core/filter.c
index 76fd6e9..d4c5c1a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3829,7 +3829,7 @@  static bool __is_valid_sock_ops_access(int off, int size)
 	/* The verifier guarantees that size > 0. */
 	if (off % size != 0)
 		return false;
-	if (size != sizeof(__u32))
+	if (size != sizeof(__u32) && size != sizeof(__u64))
 		return false;
 
 	return true;
@@ -4449,6 +4449,32 @@  static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 					       is_fullsock));
 		break;
 
+	case offsetof(struct bpf_sock_ops, state):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_state));
+		break;
+
+	case offsetof(struct bpf_sock_ops, rtt_min):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+			     sizeof(struct minmax));
+		BUILD_BUG_ON(sizeof(struct minmax) <
+			     sizeof(struct minmax_sample));
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct tcp_sock, rtt_min) +
+				      FIELD_SIZEOF(struct minmax_sample, t));
+		break;
+
 /* Helper macro for adding read access to tcp_sock or sock fields. */
 #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
 	do {								      \
@@ -4532,6 +4558,79 @@  static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 					  bpf_sock_ops_flags,
 					  struct tcp_sock, type);
 		break;
+
+	case offsetof(struct bpf_sock_ops, snd_ssthresh):
+		SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, rcv_nxt):
+		SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, snd_nxt):
+		SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, snd_una):
+		SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, mss_cache):
+		SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, ecn_flags):
+		SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, rate_delivered):
+		SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, rate_interval_us):
+		SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, packets_out):
+		SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, retrans_out):
+		SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, total_retrans):
+		SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, segs_in):
+		SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, data_segs_in):
+		SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, segs_out):
+		SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, data_segs_out):
+		SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, bytes_received):
+		SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
+				   struct tcp_sock);
+		break;
+
+	case offsetof(struct bpf_sock_ops, bytes_acked):
+		SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
+		break;
 	}
 	return insn - insn_buf;
 }