Message ID | 20180326153657.2229959-1-tehnerd@fb.com |
---|---|
State | Accepted, archived |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf-next] : add sock_ops R/W access to ipv4 tos | expand |
On 03/26/2018 05:36 PM, Nikita V. Shirokov wrote: > bpf: Add sock_ops R/W access to ipv4 tos > > Sample usage for tos: > > bpf_getsockopt(skops, SOL_IP, IP_TOS, &v, sizeof(v)) > > where skops is a pointer to the ctx (struct bpf_sock_ops). > > Signed-off-by: Nikita V. Shirokov <tehnerd@fb.com> > --- > net/core/filter.c | 35 +++++++++++++++++++++++++++++++++++ > 1 file changed, 35 insertions(+) > > diff --git a/net/core/filter.c b/net/core/filter.c > index 00c711c..afd8255 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3462,6 +3462,27 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, > ret = -EINVAL; > } > #ifdef CONFIG_INET > + } else if (level == SOL_IP) { > + if (optlen != sizeof(int) || sk->sk_family != AF_INET) > + return -EINVAL; > + > + val = *((int *)optval); > + /* Only some options are supported */ > + switch (optname) { > + case IP_TOS: > + if (val < -1 || val > 0xff) { > + ret = -EINVAL; > + } else { > + struct inet_sock *inet = inet_sk(sk); > + > + if (val == -1) > + val = 0; > + inet->tos = val; Should this not have the exact same semantics given the helper resembles the normal setsockopt? do_ip_setsockopt() does the following when setting IP_TOS: case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; val |= inet->tos & INET_ECN_MASK; } if (inet->tos != val) { inet->tos = val; sk->sk_priority = rt_tos2priority(val); sk_dst_reset(sk); } break; E.g. why we don't need to set sk->sk_priority as well or reset the dst entry here? > + } > + break; > + default: > + ret = -EINVAL; > + } > #if IS_ENABLED(CONFIG_IPV6) > } else if (level == SOL_IPV6) { > if (optlen != sizeof(int) || sk->sk_family != AF_INET6) > @@ -3561,6 +3582,20 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, > } else { > goto err_clear; > } > + } else if (level == SOL_IP) { > + struct inet_sock *inet = inet_sk(sk); > + > + if (optlen != sizeof(int) || sk->sk_family != AF_INET) > + goto err_clear; > + > + /* Only some options are supported */ > + switch (optname) { > + case IP_TOS: > + *((int *)optval) = (int)inet->tos; This part is fine though, same as in do_ip_getsockopt(). > + break; > + default: > + goto err_clear; > + } > #if IS_ENABLED(CONFIG_IPV6) > } else if (level == SOL_IPV6) { > struct ipv6_pinfo *np = inet6_sk(sk); >
>>On 03/26/2018 05:36 PM, Nikita V. Shirokov wrote: >> bpf: Add sock_ops R/W access to ipv4 tos >> >> Sample usage for tos: >> >> bpf_getsockopt(skops, SOL_IP, IP_TOS, &v, sizeof(v)) >> >> where skops is a pointer to the ctx (struct bpf_sock_ops). >> >> Signed-off-by: Nikita V. Shirokov <tehnerd@fb.com> >> --- >> net/core/filter.c | 35 +++++++++++++++++++++++++++++++++++ >> 1 file changed, 35 insertions(+) >> >> diff --git a/net/core/filter.c b/net/core/filter.c >> index 00c711c..afd8255 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -3462,6 +3462,27 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, >> ret = -EINVAL; >> } >> #ifdef CONFIG_INET >> + } else if (level == SOL_IP) { >> + if (optlen != sizeof(int) || sk->sk_family != AF_INET) >> + return -EINVAL; >> + >> + val = *((int *)optval); >> + /* Only some options are supported */ >> + switch (optname) { >> + case IP_TOS: >> + if (val < -1 || val > 0xff) { >> + ret = -EINVAL; >> + } else { >> + struct inet_sock *inet = inet_sk(sk); >> + >> + if (val == -1) >> + val = 0; >> + inet->tos = val; > >Should this not have the exact same semantics given the helper resembles >the normal setsockopt? do_ip_setsockopt() does the following when setting >IP_TOS: > > case IP_TOS: /* This sets both TOS and Precedence */ > if (sk->sk_type == SOCK_STREAM) { > val &= ~INET_ECN_MASK; > val |= inet->tos & INET_ECN_MASK; > } > if (inet->tos != val) { > inet->tos = val; > sk->sk_priority = rt_tos2priority(val); > sk_dst_reset(sk); > } > break; > >E.g. why we don't need to set sk->sk_priority as well or reset the dst >entry here? > it feels like initially (w/ commit for IP_TOS in ip_sockglue.c) there were some usecase in mind where reflection of tos to prio was needed + some policy based routing (thats why dst_reset). but e.g. for ipv6 (IPV6_TCLASS, same as TOS but in ipv6 world) we do just this - set new tclass value and call it the day. in my opinion this aproach is more flexible (e.g. we have separate bpf_setsockopt for SOL_PRIORITY) as it did only what we want (i can imagine few usecases where we want just to change TOS w/o changing priority) however if you feel strong and want to reproduce the same behavior as in regular setsockopt, i can totally make v2. please let me know >> + } >> + break; >> + default: >> + ret = -EINVAL; >> + } >> #if IS_ENABLED(CONFIG_IPV6) >> } else if (level == SOL_IPV6) { >> if (optlen != sizeof(int) || sk->sk_family != AF_INET6) >> @@ -3561,6 +3582,20 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, >> } else { >> goto err_clear; >> } >> + } else if (level == SOL_IP) { >> + struct inet_sock *inet = inet_sk(sk); >> + >> + if (optlen != sizeof(int) || sk->sk_family != AF_INET) >> + goto err_clear; >> + >> + /* Only some options are supported */ >> + switch (optname) { >> + case IP_TOS: >> + *((int *)optval) = (int)inet->tos; > >This part is fine though, same as in do_ip_getsockopt(). > >> + break; >> + default: >> + goto err_clear; >> + } >> #if IS_ENABLED(CONFIG_IPV6) >> } else if (level == SOL_IPV6) { >> struct ipv6_pinfo *np = inet6_sk(sk); >>
On 03/28/2018 07:41 PM, Nikita Shirokov wrote: >>> On 03/26/2018 05:36 PM, Nikita V. Shirokov wrote: >>> bpf: Add sock_ops R/W access to ipv4 tos >>> >>> Sample usage for tos: >>> >>> bpf_getsockopt(skops, SOL_IP, IP_TOS, &v, sizeof(v)) >>> >>> where skops is a pointer to the ctx (struct bpf_sock_ops). >>> >>> Signed-off-by: Nikita V. Shirokov <tehnerd@fb.com> >>> --- >>> net/core/filter.c | 35 +++++++++++++++++++++++++++++++++++ >>> 1 file changed, 35 insertions(+) >>> >>> diff --git a/net/core/filter.c b/net/core/filter.c >>> index 00c711c..afd8255 100644 >>> --- a/net/core/filter.c >>> +++ b/net/core/filter.c >>> @@ -3462,6 +3462,27 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, >>> ret = -EINVAL; >>> } >>> #ifdef CONFIG_INET >>> + } else if (level == SOL_IP) { >>> + if (optlen != sizeof(int) || sk->sk_family != AF_INET) >>> + return -EINVAL; >>> + >>> + val = *((int *)optval); >>> + /* Only some options are supported */ >>> + switch (optname) { >>> + case IP_TOS: >>> + if (val < -1 || val > 0xff) { >>> + ret = -EINVAL; >>> + } else { >>> + struct inet_sock *inet = inet_sk(sk); >>> + >>> + if (val == -1) >>> + val = 0; >>> + inet->tos = val; >> >> Should this not have the exact same semantics given the helper resembles >> the normal setsockopt? do_ip_setsockopt() does the following when setting >> IP_TOS: >> >> case IP_TOS: /* This sets both TOS and Precedence */ >> if (sk->sk_type == SOCK_STREAM) { >> val &= ~INET_ECN_MASK; >> val |= inet->tos & INET_ECN_MASK; >> } >> if (inet->tos != val) { >> inet->tos = val; >> sk->sk_priority = rt_tos2priority(val); >> sk_dst_reset(sk); >> } >> break; >> >> E.g. why we don't need to set sk->sk_priority as well or reset the dst >> entry here? > > it feels like initially (w/ commit for IP_TOS in ip_sockglue.c) there were some usecase in mind > where reflection of tos to prio was needed + some policy based routing (thats why dst_reset). > but e.g. for ipv6 (IPV6_TCLASS, same as TOS but in ipv6 world) we do just this - set new tclass value > and call it the day. in my opinion this aproach is more flexible (e.g. we have separate > bpf_setsockopt for SOL_PRIORITY) as it did only what we want (i can imagine few usecases > where we want just to change TOS w/o changing priority) Ok, fair point, that way the behavior is exactly the same as in v6 case. Applied to bpf-next, thanks Nikita!
diff --git a/net/core/filter.c b/net/core/filter.c index 00c711c..afd8255 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3462,6 +3462,27 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, ret = -EINVAL; } #ifdef CONFIG_INET + } else if (level == SOL_IP) { + if (optlen != sizeof(int) || sk->sk_family != AF_INET) + return -EINVAL; + + val = *((int *)optval); + /* Only some options are supported */ + switch (optname) { + case IP_TOS: + if (val < -1 || val > 0xff) { + ret = -EINVAL; + } else { + struct inet_sock *inet = inet_sk(sk); + + if (val == -1) + val = 0; + inet->tos = val; + } + break; + default: + ret = -EINVAL; + } #if IS_ENABLED(CONFIG_IPV6) } else if (level == SOL_IPV6) { if (optlen != sizeof(int) || sk->sk_family != AF_INET6) @@ -3561,6 +3582,20 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, } else { goto err_clear; } + } else if (level == SOL_IP) { + struct inet_sock *inet = inet_sk(sk); + + if (optlen != sizeof(int) || sk->sk_family != AF_INET) + goto err_clear; + + /* Only some options are supported */ + switch (optname) { + case IP_TOS: + *((int *)optval) = (int)inet->tos; + break; + default: + goto err_clear; + } #if IS_ENABLED(CONFIG_IPV6) } else if (level == SOL_IPV6) { struct ipv6_pinfo *np = inet6_sk(sk);
bpf: Add sock_ops R/W access to ipv4 tos Sample usage for tos: bpf_getsockopt(skops, SOL_IP, IP_TOS, &v, sizeof(v)) where skops is a pointer to the ctx (struct bpf_sock_ops). Signed-off-by: Nikita V. Shirokov <tehnerd@fb.com> --- net/core/filter.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+)