Message ID | 20180120014548.2941040-8-brakmo@fb.com |
---|---|
State | Changes Requested, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: More sock_ops callbacks | expand |
On 01/20/2018 02:45 AM, Lawrence Brakmo wrote: > Add support for reading many more tcp_sock fields > > state, same as sk->sk_state > rtt_min same as sk->rtt_min.s[0].v (current rtt_min) > snd_ssthresh > rcv_nxt > snd_nxt > snd_una > mss_cache > ecn_flags > rate_delivered > rate_interval_us > packets_out > retrans_out > total_retrans > segs_in > data_segs_in > segs_out > data_segs_out > bytes_received (__u64) > bytes_acked (__u64) > > Signed-off-by: Lawrence Brakmo <brakmo@fb.com> > --- > include/uapi/linux/bpf.h | 19 +++++++ > net/core/filter.c | 134 ++++++++++++++++++++++++++++++++++++++++++----- > 2 files changed, 140 insertions(+), 13 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 2a8c40a..ff34f3c 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -979,6 +979,25 @@ struct bpf_sock_ops { > __u32 snd_cwnd; > __u32 srtt_us; /* Averaged RTT << 3 in usecs */ > __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ > + __u32 state; > + __u32 rtt_min; > + __u32 snd_ssthresh; > + __u32 rcv_nxt; > + __u32 snd_nxt; > + __u32 snd_una; > + __u32 mss_cache; > + __u32 ecn_flags; > + __u32 rate_delivered; > + __u32 rate_interval_us; > + __u32 packets_out; > + __u32 retrans_out; > + __u32 total_retrans; > + __u32 segs_in; > + __u32 data_segs_in; > + __u32 segs_out; > + __u32 data_segs_out; Btw, this will have a 4 bytes hole in here which the user can otherwise address out of the prog. Could you add the sk_txhash from the next patch in between here instead? > + __u64 bytes_received; > + __u64 bytes_acked; > }; > > /* List of known BPF sock_ops operators. > diff --git a/net/core/filter.c b/net/core/filter.c > index c9411dc..98665ba 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3849,34 +3849,43 @@ void bpf_warn_invalid_xdp_action(u32 act) > } > EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); > > -static bool __is_valid_sock_ops_access(int off, int size) > +static bool sock_ops_is_valid_access(int off, int size, > + enum bpf_access_type type, > + struct bpf_insn_access_aux *info) > { > + const int size_default = sizeof(__u32); > + > if (off < 0 || off >= sizeof(struct bpf_sock_ops)) > return false; > + > /* The verifier guarantees that size > 0. */ > if (off % size != 0) > return false; > - if (size != sizeof(__u32)) > - return false; > - > - return true; > -} > > -static bool sock_ops_is_valid_access(int off, int size, > - enum bpf_access_type type, > - struct bpf_insn_access_aux *info) > -{ > if (type == BPF_WRITE) { > switch (off) { > - case offsetof(struct bpf_sock_ops, op) ... > - offsetof(struct bpf_sock_ops, replylong[3]): > + case bpf_ctx_range_till(struct bpf_sock_ops, op, replylong[3]): > + if (size != size_default) > + return false; > break; > default: > return false; > } > + } else { > + switch (off) { > + case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received, > + bytes_acked): > + if (size != sizeof(__u64)) > + return false; > + break; > + default: > + if (size != size_default) > + return false; > + break; > + } > } > > - return __is_valid_sock_ops_access(off, size); > + return true; > }
On 1/23/18, 5:05 PM, "Daniel Borkmann" <daniel@iogearbox.net> wrote: On 01/20/2018 02:45 AM, Lawrence Brakmo wrote: > Add support for reading many more tcp_sock fields > > state, same as sk->sk_state > rtt_min same as sk->rtt_min.s[0].v (current rtt_min) > snd_ssthresh > rcv_nxt > snd_nxt > snd_una > mss_cache > ecn_flags > rate_delivered > rate_interval_us > packets_out > retrans_out > total_retrans > segs_in > data_segs_in > segs_out > data_segs_out > bytes_received (__u64) > bytes_acked (__u64) > > Signed-off-by: Lawrence Brakmo <brakmo@fb.com> > --- > include/uapi/linux/bpf.h | 19 +++++++ > net/core/filter.c | 134 ++++++++++++++++++++++++++++++++++++++++++----- > 2 files changed, 140 insertions(+), 13 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 2a8c40a..ff34f3c 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -979,6 +979,25 @@ struct bpf_sock_ops { > __u32 snd_cwnd; > __u32 srtt_us; /* Averaged RTT << 3 in usecs */ > __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ > + __u32 state; > + __u32 rtt_min; > + __u32 snd_ssthresh; > + __u32 rcv_nxt; > + __u32 snd_nxt; > + __u32 snd_una; > + __u32 mss_cache; > + __u32 ecn_flags; > + __u32 rate_delivered; > + __u32 rate_interval_us; > + __u32 packets_out; > + __u32 retrans_out; > + __u32 total_retrans; > + __u32 segs_in; > + __u32 data_segs_in; > + __u32 segs_out; > + __u32 data_segs_out; Btw, this will have a 4 bytes hole in here which the user can otherwise address out of the prog. Could you add the sk_txhash from the next patch in between here instead? Good point. Will fix in new patch. Thanks Daniel. > + __u64 bytes_received; > + __u64 bytes_acked; > }; > > /* List of known BPF sock_ops operators. > diff --git a/net/core/filter.c b/net/core/filter.c > index c9411dc..98665ba 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3849,34 +3849,43 @@ void bpf_warn_invalid_xdp_action(u32 act) > } > EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); > > -static bool __is_valid_sock_ops_access(int off, int size) > +static bool sock_ops_is_valid_access(int off, int size, > + enum bpf_access_type type, > + struct bpf_insn_access_aux *info) > { > + const int size_default = sizeof(__u32); > + > if (off < 0 || off >= sizeof(struct bpf_sock_ops)) > return false; > + > /* The verifier guarantees that size > 0. */ > if (off % size != 0) > return false; > - if (size != sizeof(__u32)) > - return false; > - > - return true; > -} > > -static bool sock_ops_is_valid_access(int off, int size, > - enum bpf_access_type type, > - struct bpf_insn_access_aux *info) > -{ > if (type == BPF_WRITE) { > switch (off) { > - case offsetof(struct bpf_sock_ops, op) ... > - offsetof(struct bpf_sock_ops, replylong[3]): > + case bpf_ctx_range_till(struct bpf_sock_ops, op, replylong[3]): > + if (size != size_default) > + return false; > break; > default: > return false; > } > + } else { > + switch (off) { > + case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received, > + bytes_acked): > + if (size != sizeof(__u64)) > + return false; > + break; > + default: > + if (size != size_default) > + return false; > + break; > + } > } > > - return __is_valid_sock_ops_access(off, size); > + return true; > }
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2a8c40a..ff34f3c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -979,6 +979,25 @@ struct bpf_sock_ops { __u32 snd_cwnd; __u32 srtt_us; /* Averaged RTT << 3 in usecs */ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u64 bytes_received; + __u64 bytes_acked; }; /* List of known BPF sock_ops operators. diff --git a/net/core/filter.c b/net/core/filter.c index c9411dc..98665ba 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3849,34 +3849,43 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -static bool __is_valid_sock_ops_access(int off, int size) +static bool sock_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) { + const int size_default = sizeof(__u32); + if (off < 0 || off >= sizeof(struct bpf_sock_ops)) return false; + /* The verifier guarantees that size > 0. */ if (off % size != 0) return false; - if (size != sizeof(__u32)) - return false; - - return true; -} -static bool sock_ops_is_valid_access(int off, int size, - enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ if (type == BPF_WRITE) { switch (off) { - case offsetof(struct bpf_sock_ops, op) ... - offsetof(struct bpf_sock_ops, replylong[3]): + case bpf_ctx_range_till(struct bpf_sock_ops, op, replylong[3]): + if (size != size_default) + return false; break; default: return false; } + } else { + switch (off) { + case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received, + bytes_acked): + if (size != sizeof(__u64)) + return false; + break; + default: + if (size != size_default) + return false; + break; + } } - return __is_valid_sock_ops_access(off, size); + return true; } static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write, @@ -4493,6 +4502,32 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, is_fullsock)); break; + case offsetof(struct bpf_sock_ops, state): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_state)); + break; + + case offsetof(struct bpf_sock_ops, rtt_min): + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != + sizeof(struct minmax)); + BUILD_BUG_ON(sizeof(struct minmax) < + sizeof(struct minmax_sample)); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct tcp_sock, rtt_min) + + FIELD_SIZEOF(struct minmax_sample, t)); + break; + /* Helper macro for adding read access to tcp_sock or sock fields. */ #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ do { \ @@ -4575,6 +4610,79 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags, struct tcp_sock); break; + + case offsetof(struct bpf_sock_ops, snd_ssthresh): + SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, rcv_nxt): + SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, snd_nxt): + SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, snd_una): + SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, mss_cache): + SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, ecn_flags): + SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, rate_delivered): + SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, rate_interval_us): + SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, packets_out): + SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, retrans_out): + SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, total_retrans): + SOCK_OPS_GET_FIELD(total_retrans, total_retrans, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, segs_in): + SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, data_segs_in): + SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, segs_out): + SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, data_segs_out): + SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, bytes_received): + SOCK_OPS_GET_FIELD(bytes_received, bytes_received, + struct tcp_sock); + break; + + case offsetof(struct bpf_sock_ops, bytes_acked): + SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock); + break; } return insn - insn_buf; }
Add support for reading many more tcp_sock fields state, same as sk->sk_state rtt_min same as sk->rtt_min.s[0].v (current rtt_min) snd_ssthresh rcv_nxt snd_nxt snd_una mss_cache ecn_flags rate_delivered rate_interval_us packets_out retrans_out total_retrans segs_in data_segs_in segs_out data_segs_out bytes_received (__u64) bytes_acked (__u64) Signed-off-by: Lawrence Brakmo <brakmo@fb.com> --- include/uapi/linux/bpf.h | 19 +++++++ net/core/filter.c | 134 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 140 insertions(+), 13 deletions(-)