diff mbox series

[bpf,2/3] bpf: Add csum_level helper for fixing up csum levels

Message ID 279ae3717cb3d03c0ffeb511493c93c450a01e1a.1591108731.git.daniel@iogearbox.net
State Accepted
Delegated to: BPF Maintainers
Headers show
Series Fix csum unnecessary on bpf_skb_adjust_room | expand

Commit Message

Daniel Borkmann June 2, 2020, 2:58 p.m. UTC
Add a bpf_csum_level() helper which BPF programs can use in combination
with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET
flag to the latter to avoid falling back to CHECKSUM_NONE.

The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels
via BPF_CSUM_LEVEL_{INC,DEC} which calls __skb_{incr,decr}_checksum_unnecessary()
on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the skb's
csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the
current level. Without this helper, there is no way to otherwise adjust the
skb->csum_level. I did not add an extra dummy flags as there is plenty of free
bitspace in level argument itself iff ever needed in future.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h       | 43 +++++++++++++++++++++++++++++++++-
 net/core/filter.c              | 38 ++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++-
 3 files changed, 122 insertions(+), 2 deletions(-)

Comments

Lorenz Bauer June 2, 2020, 3:19 p.m. UTC | #1
On Tue, 2 Jun 2020 at 15:58, Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> Add a bpf_csum_level() helper which BPF programs can use in combination
> with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET
> flag to the latter to avoid falling back to CHECKSUM_NONE.
>
> The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels
> via BPF_CSUM_LEVEL_{INC,DEC} which calls __skb_{incr,decr}_checksum_unnecessary()
> on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the skb's
> csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the
> current level. Without this helper, there is no way to otherwise adjust the
> skb->csum_level. I did not add an extra dummy flags as there is plenty of free
> bitspace in level argument itself iff ever needed in future.
>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> ---
>  include/uapi/linux/bpf.h       | 43 +++++++++++++++++++++++++++++++++-
>  net/core/filter.c              | 38 ++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++-
>  3 files changed, 122 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 3ba2bbbed80c..46622901cba7 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3220,6 +3220,38 @@ union bpf_attr {
>   *             calculation.
>   *     Return
>   *             Requested value, or 0, if flags are not recognized.
> + *
> + * int bpf_csum_level(struct sk_buff *skb, u64 level)

u64 flags? We can also stuff things into level I guess.

> + *     Description
> + *             Change the skbs checksum level by one layer up or down, or
> + *             reset it entirely to none in order to have the stack perform
> + *             checksum validation. The level is applicable to the following
> + *             protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
> + *             | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
> + *             through **bpf_skb_adjust_room**\ () helper with passing in
> + *             **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
> + *             to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
> + *             the UDP header is removed. Similarly, an encap of the latter
> + *             into the former could be accompanied by a helper call to
> + *             **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
> + *             skb is still intended to be processed in higher layers of the
> + *             stack instead of just egressing at tc.
> + *
> + *             There are three supported level settings at this time:
> + *
> + *             * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
> + *               with CHECKSUM_UNNECESSARY.
> + *             * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
> + *               with CHECKSUM_UNNECESSARY.
> + *             * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
> + *               sets CHECKSUM_NONE to force checksum validation by the stack.
> + *             * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
> + *               skb->csum_level.
> + *     Return
> + *             0 on success, or a negative error in case of failure. In the
> + *             case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
> + *             is returned or the error code -EACCES in case the skb is not
> + *             subject to CHECKSUM_UNNECESSARY.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -3356,7 +3388,8 @@ union bpf_attr {
>         FN(ringbuf_reserve),            \
>         FN(ringbuf_submit),             \
>         FN(ringbuf_discard),            \
> -       FN(ringbuf_query),
> +       FN(ringbuf_query),              \
> +       FN(csum_level),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> @@ -3433,6 +3466,14 @@ enum {
>         BPF_F_CURRENT_NETNS             = (-1L),
>  };
>
> +/* BPF_FUNC_csum_level level values. */
> +enum {
> +       BPF_CSUM_LEVEL_QUERY,
> +       BPF_CSUM_LEVEL_INC,
> +       BPF_CSUM_LEVEL_DEC,
> +       BPF_CSUM_LEVEL_RESET,
> +};
> +
>  /* BPF_FUNC_skb_adjust_room flags. */
>  enum {
>         BPF_F_ADJ_ROOM_FIXED_GSO        = (1ULL << 0),
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 278dcc0af961..d01a244b5087 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2015,6 +2015,40 @@ static const struct bpf_func_proto bpf_csum_update_proto = {
>         .arg2_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
> +{
> +       /* The interface is to be used in combination with bpf_skb_adjust_room()
> +        * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
> +        * is passed as flags, for example.
> +        */
> +       switch (level) {
> +       case BPF_CSUM_LEVEL_INC:
> +               __skb_incr_checksum_unnecessary(skb);
> +               break;
> +       case BPF_CSUM_LEVEL_DEC:
> +               __skb_decr_checksum_unnecessary(skb);
> +               break;
> +       case BPF_CSUM_LEVEL_RESET:
> +               __skb_reset_checksum_unnecessary(skb);
> +               break;
> +       case BPF_CSUM_LEVEL_QUERY:
> +               return skb->ip_summed == CHECKSUM_UNNECESSARY ?
> +                      skb->csum_level : -EACCES;
> +       default:
> +               return -EINVAL;
> +       }
> +
> +       return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_csum_level_proto = {
> +       .func           = bpf_csum_level,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_CTX,
> +       .arg2_type      = ARG_ANYTHING,
> +};
> +
>  static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
>  {
>         return dev_forward_skb(dev, skb);
> @@ -6280,6 +6314,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_csum_diff_proto;
>         case BPF_FUNC_csum_update:
>                 return &bpf_csum_update_proto;
> +       case BPF_FUNC_csum_level:
> +               return &bpf_csum_level_proto;
>         case BPF_FUNC_l3_csum_replace:
>                 return &bpf_l3_csum_replace_proto;
>         case BPF_FUNC_l4_csum_replace:
> @@ -6613,6 +6649,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_skb_store_bytes_proto;
>         case BPF_FUNC_csum_update:
>                 return &bpf_csum_update_proto;
> +       case BPF_FUNC_csum_level:
> +               return &bpf_csum_level_proto;
>         case BPF_FUNC_l3_csum_replace:
>                 return &bpf_l3_csum_replace_proto;
>         case BPF_FUNC_l4_csum_replace:
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 3ba2bbbed80c..46622901cba7 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -3220,6 +3220,38 @@ union bpf_attr {
>   *             calculation.
>   *     Return
>   *             Requested value, or 0, if flags are not recognized.
> + *
> + * int bpf_csum_level(struct sk_buff *skb, u64 level)
> + *     Description
> + *             Change the skbs checksum level by one layer up or down, or
> + *             reset it entirely to none in order to have the stack perform
> + *             checksum validation. The level is applicable to the following
> + *             protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
> + *             | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
> + *             through **bpf_skb_adjust_room**\ () helper with passing in
> + *             **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
> + *             to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
> + *             the UDP header is removed. Similarly, an encap of the latter
> + *             into the former could be accompanied by a helper call to
> + *             **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
> + *             skb is still intended to be processed in higher layers of the
> + *             stack instead of just egressing at tc.
> + *
> + *             There are three supported level settings at this time:
> + *
> + *             * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
> + *               with CHECKSUM_UNNECESSARY.
> + *             * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
> + *               with CHECKSUM_UNNECESSARY.
> + *             * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
> + *               sets CHECKSUM_NONE to force checksum validation by the stack.
> + *             * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
> + *               skb->csum_level.
> + *     Return
> + *             0 on success, or a negative error in case of failure. In the
> + *             case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
> + *             is returned or the error code -EACCES in case the skb is not
> + *             subject to CHECKSUM_UNNECESSARY.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -3356,7 +3388,8 @@ union bpf_attr {
>         FN(ringbuf_reserve),            \
>         FN(ringbuf_submit),             \
>         FN(ringbuf_discard),            \
> -       FN(ringbuf_query),
> +       FN(ringbuf_query),              \
> +       FN(csum_level),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> @@ -3433,6 +3466,14 @@ enum {
>         BPF_F_CURRENT_NETNS             = (-1L),
>  };
>
> +/* BPF_FUNC_csum_level level values. */
> +enum {
> +       BPF_CSUM_LEVEL_QUERY,
> +       BPF_CSUM_LEVEL_INC,
> +       BPF_CSUM_LEVEL_DEC,
> +       BPF_CSUM_LEVEL_RESET,
> +};
> +
>  /* BPF_FUNC_skb_adjust_room flags. */
>  enum {
>         BPF_F_ADJ_ROOM_FIXED_GSO        = (1ULL << 0),
> --
> 2.21.0
>

Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Daniel Borkmann June 2, 2020, 3:35 p.m. UTC | #2
On 6/2/20 5:19 PM, Lorenz Bauer wrote:
> On Tue, 2 Jun 2020 at 15:58, Daniel Borkmann <daniel@iogearbox.net> wrote:
>>
>> Add a bpf_csum_level() helper which BPF programs can use in combination
>> with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET
>> flag to the latter to avoid falling back to CHECKSUM_NONE.
>>
>> The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels
>> via BPF_CSUM_LEVEL_{INC,DEC} which calls __skb_{incr,decr}_checksum_unnecessary()
>> on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the skb's
>> csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the
>> current level. Without this helper, there is no way to otherwise adjust the
>> skb->csum_level. I did not add an extra dummy flags as there is plenty of free
>> bitspace in level argument itself iff ever needed in future.
>>
>> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
>> ---
>>   include/uapi/linux/bpf.h       | 43 +++++++++++++++++++++++++++++++++-
>>   net/core/filter.c              | 38 ++++++++++++++++++++++++++++++
>>   tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++-
>>   3 files changed, 122 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 3ba2bbbed80c..46622901cba7 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -3220,6 +3220,38 @@ union bpf_attr {
>>    *             calculation.
>>    *     Return
>>    *             Requested value, or 0, if flags are not recognized.
>> + *
>> + * int bpf_csum_level(struct sk_buff *skb, u64 level)
> 
> u64 flags? We can also stuff things into level I guess.

Yeah, I did mention it in the commit log. There is plenty of bit space to extend
with flags in there iff ever needed. Originally, helper was called bpf_csum_adjust()
but then renamed into bpf_csum_level() to be more 'topic specific' (aka do one thing
and do it well...) and avoid future api overloading, so if necessary level can be
used since I don't think the enum will be extended much further from what we have
here anyway.

[...]
> 
> Acked-by: Lorenz Bauer <lmb@cloudflare.com>

Thanks!
Alan Maguire June 2, 2020, 4:41 p.m. UTC | #3
On Tue, 2 Jun 2020, Daniel Borkmann wrote:

> On 6/2/20 5:19 PM, Lorenz Bauer wrote:
> > On Tue, 2 Jun 2020 at 15:58, Daniel Borkmann <daniel@iogearbox.net> wrote:
> >>
> >> Add a bpf_csum_level() helper which BPF programs can use in combination
> >> with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET
> >> flag to the latter to avoid falling back to CHECKSUM_NONE.
> >>
> >> The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels
> >> via BPF_CSUM_LEVEL_{INC,DEC} which calls
> >> __skb_{incr,decr}_checksum_unnecessary()
> >> on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the
> >> skb's
> >> csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the
> >> current level. Without this helper, there is no way to otherwise adjust the
> >> skb->csum_level. I did not add an extra dummy flags as there is plenty of
> >> free
> >> bitspace in level argument itself iff ever needed in future.
> >>
> >> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> >> ---
> >>   include/uapi/linux/bpf.h       | 43 +++++++++++++++++++++++++++++++++-
> >>   net/core/filter.c              | 38 ++++++++++++++++++++++++++++++
> >>   tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++-
> >>   3 files changed, 122 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> >> index 3ba2bbbed80c..46622901cba7 100644
> >> --- a/include/uapi/linux/bpf.h
> >> +++ b/include/uapi/linux/bpf.h
> >> @@ -3220,6 +3220,38 @@ union bpf_attr {
> >>    *             calculation.
> >>    *     Return
> >>    *             Requested value, or 0, if flags are not recognized.
> >> + *
> >> + * int bpf_csum_level(struct sk_buff *skb, u64 level)
> > 
> > u64 flags? We can also stuff things into level I guess.
> 
> Yeah, I did mention it in the commit log. There is plenty of bit space to
> extend
> with flags in there iff ever needed. Originally, helper was called
> bpf_csum_adjust()
> but then renamed into bpf_csum_level() to be more 'topic specific' (aka do one
> thing
> and do it well...) and avoid future api overloading, so if necessary level can
> be
> used since I don't think the enum will be extended much further from what we
> have
> here anyway.
> 
> [...]
> > 
> > Acked-by: Lorenz Bauer <lmb@cloudflare.com>
>

Looks great! The only thing that gave me pause was
the -EACCES return value for the case where we query
and the skb is not subject to CHECKSUM_UNNECESSESARY ;
-ENOENT ("no such level") feels slightly closer to the
situation to me but either is a reasonable choice I think.

Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Daniel Borkmann June 2, 2020, 5:43 p.m. UTC | #4
On 6/2/20 6:41 PM, Alan Maguire wrote:
> On Tue, 2 Jun 2020, Daniel Borkmann wrote:
>> On 6/2/20 5:19 PM, Lorenz Bauer wrote:
>>> On Tue, 2 Jun 2020 at 15:58, Daniel Borkmann <daniel@iogearbox.net> wrote:
>>>>
>>>> Add a bpf_csum_level() helper which BPF programs can use in combination
>>>> with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET
>>>> flag to the latter to avoid falling back to CHECKSUM_NONE.
>>>>
>>>> The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels
>>>> via BPF_CSUM_LEVEL_{INC,DEC} which calls
>>>> __skb_{incr,decr}_checksum_unnecessary()
>>>> on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the
>>>> skb's
>>>> csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the
>>>> current level. Without this helper, there is no way to otherwise adjust the
>>>> skb->csum_level. I did not add an extra dummy flags as there is plenty of
>>>> free
>>>> bitspace in level argument itself iff ever needed in future.
>>>>
>>>> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
>>>> ---
>>>>    include/uapi/linux/bpf.h       | 43 +++++++++++++++++++++++++++++++++-
>>>>    net/core/filter.c              | 38 ++++++++++++++++++++++++++++++
>>>>    tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++-
>>>>    3 files changed, 122 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>>>> index 3ba2bbbed80c..46622901cba7 100644
>>>> --- a/include/uapi/linux/bpf.h
>>>> +++ b/include/uapi/linux/bpf.h
>>>> @@ -3220,6 +3220,38 @@ union bpf_attr {
>>>>     *             calculation.
>>>>     *     Return
>>>>     *             Requested value, or 0, if flags are not recognized.
>>>> + *
>>>> + * int bpf_csum_level(struct sk_buff *skb, u64 level)
>>>
>>> u64 flags? We can also stuff things into level I guess.
>>
>> Yeah, I did mention it in the commit log. There is plenty of bit space to
>> extend
>> with flags in there iff ever needed. Originally, helper was called
>> bpf_csum_adjust()
>> but then renamed into bpf_csum_level() to be more 'topic specific' (aka do one
>> thing
>> and do it well...) and avoid future api overloading, so if necessary level can
>> be
>> used since I don't think the enum will be extended much further from what we
>> have
>> here anyway.
>>
>> [...]
>>>
>>> Acked-by: Lorenz Bauer <lmb@cloudflare.com>
> 
> Looks great! The only thing that gave me pause was
> the -EACCES return value for the case where we query
> and the skb is not subject to CHECKSUM_UNNECESSESARY ;
> -ENOENT ("no such level") feels slightly closer to the
> situation to me but either is a reasonable choice I think.

My thinking was in the line of 'error since we cannot access skb->csum_level
for the given skb->ip_summed'. I don't feel strong about which code it is either
way though; important thing is that it is documented & distinguishable from
other errors, so that the program has a way to make sense of the data returned
by BPF_CSUM_LEVEL_QUERY.

> Reviewed-by: Alan Maguire <alan.maguire@oracle.com>

Thanks!
Daniel
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3ba2bbbed80c..46622901cba7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3220,6 +3220,38 @@  union bpf_attr {
  *		calculation.
  *	Return
  *		Requested value, or 0, if flags are not recognized.
+ *
+ * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * 	Description
+ * 		Change the skbs checksum level by one layer up or down, or
+ * 		reset it entirely to none in order to have the stack perform
+ * 		checksum validation. The level is applicable to the following
+ * 		protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+ * 		| ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+ * 		through **bpf_skb_adjust_room**\ () helper with passing in
+ * 		**BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one	call
+ * 		to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
+ * 		the UDP header is removed. Similarly, an encap of the latter
+ * 		into the former could be accompanied by a helper call to
+ * 		**bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
+ * 		skb is still intended to be processed in higher layers of the
+ * 		stack instead of just egressing at tc.
+ *
+ * 		There are three supported level settings at this time:
+ *
+ * 		* **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
+ * 		  with CHECKSUM_UNNECESSARY.
+ * 		* **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
+ * 		  with CHECKSUM_UNNECESSARY.
+ * 		* **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
+ * 		  sets CHECKSUM_NONE to force checksum validation by the stack.
+ * 		* **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
+ * 		  skb->csum_level.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure. In the
+ * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
+ * 		is returned or the error code -EACCES in case the skb is not
+ * 		subject to CHECKSUM_UNNECESSARY.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3356,7 +3388,8 @@  union bpf_attr {
 	FN(ringbuf_reserve),		\
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
-	FN(ringbuf_query),
+	FN(ringbuf_query),		\
+	FN(csum_level),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3433,6 +3466,14 @@  enum {
 	BPF_F_CURRENT_NETNS		= (-1L),
 };
 
+/* BPF_FUNC_csum_level level values. */
+enum {
+	BPF_CSUM_LEVEL_QUERY,
+	BPF_CSUM_LEVEL_INC,
+	BPF_CSUM_LEVEL_DEC,
+	BPF_CSUM_LEVEL_RESET,
+};
+
 /* BPF_FUNC_skb_adjust_room flags. */
 enum {
 	BPF_F_ADJ_ROOM_FIXED_GSO	= (1ULL << 0),
diff --git a/net/core/filter.c b/net/core/filter.c
index 278dcc0af961..d01a244b5087 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2015,6 +2015,40 @@  static const struct bpf_func_proto bpf_csum_update_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
+{
+	/* The interface is to be used in combination with bpf_skb_adjust_room()
+	 * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
+	 * is passed as flags, for example.
+	 */
+	switch (level) {
+	case BPF_CSUM_LEVEL_INC:
+		__skb_incr_checksum_unnecessary(skb);
+		break;
+	case BPF_CSUM_LEVEL_DEC:
+		__skb_decr_checksum_unnecessary(skb);
+		break;
+	case BPF_CSUM_LEVEL_RESET:
+		__skb_reset_checksum_unnecessary(skb);
+		break;
+	case BPF_CSUM_LEVEL_QUERY:
+		return skb->ip_summed == CHECKSUM_UNNECESSARY ?
+		       skb->csum_level : -EACCES;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_csum_level_proto = {
+	.func		= bpf_csum_level,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
 {
 	return dev_forward_skb(dev, skb);
@@ -6280,6 +6314,8 @@  tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_csum_diff_proto;
 	case BPF_FUNC_csum_update:
 		return &bpf_csum_update_proto;
+	case BPF_FUNC_csum_level:
+		return &bpf_csum_level_proto;
 	case BPF_FUNC_l3_csum_replace:
 		return &bpf_l3_csum_replace_proto;
 	case BPF_FUNC_l4_csum_replace:
@@ -6613,6 +6649,8 @@  lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skb_store_bytes_proto;
 	case BPF_FUNC_csum_update:
 		return &bpf_csum_update_proto;
+	case BPF_FUNC_csum_level:
+		return &bpf_csum_level_proto;
 	case BPF_FUNC_l3_csum_replace:
 		return &bpf_l3_csum_replace_proto;
 	case BPF_FUNC_l4_csum_replace:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3ba2bbbed80c..46622901cba7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3220,6 +3220,38 @@  union bpf_attr {
  *		calculation.
  *	Return
  *		Requested value, or 0, if flags are not recognized.
+ *
+ * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * 	Description
+ * 		Change the skbs checksum level by one layer up or down, or
+ * 		reset it entirely to none in order to have the stack perform
+ * 		checksum validation. The level is applicable to the following
+ * 		protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+ * 		| ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+ * 		through **bpf_skb_adjust_room**\ () helper with passing in
+ * 		**BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one	call
+ * 		to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
+ * 		the UDP header is removed. Similarly, an encap of the latter
+ * 		into the former could be accompanied by a helper call to
+ * 		**bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
+ * 		skb is still intended to be processed in higher layers of the
+ * 		stack instead of just egressing at tc.
+ *
+ * 		There are three supported level settings at this time:
+ *
+ * 		* **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
+ * 		  with CHECKSUM_UNNECESSARY.
+ * 		* **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
+ * 		  with CHECKSUM_UNNECESSARY.
+ * 		* **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
+ * 		  sets CHECKSUM_NONE to force checksum validation by the stack.
+ * 		* **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
+ * 		  skb->csum_level.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure. In the
+ * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
+ * 		is returned or the error code -EACCES in case the skb is not
+ * 		subject to CHECKSUM_UNNECESSARY.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3356,7 +3388,8 @@  union bpf_attr {
 	FN(ringbuf_reserve),		\
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
-	FN(ringbuf_query),
+	FN(ringbuf_query),		\
+	FN(csum_level),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3433,6 +3466,14 @@  enum {
 	BPF_F_CURRENT_NETNS		= (-1L),
 };
 
+/* BPF_FUNC_csum_level level values. */
+enum {
+	BPF_CSUM_LEVEL_QUERY,
+	BPF_CSUM_LEVEL_INC,
+	BPF_CSUM_LEVEL_DEC,
+	BPF_CSUM_LEVEL_RESET,
+};
+
 /* BPF_FUNC_skb_adjust_room flags. */
 enum {
 	BPF_F_ADJ_ROOM_FIXED_GSO	= (1ULL << 0),