Message ID | 20191021105938.11820-1-bjorn.topel@gmail.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf-next,v2] libbpf: use implicit XSKMAP lookup from AF_XDP XDP program | expand |
Björn Töpel <bjorn.topel@gmail.com> writes: > From: Björn Töpel <bjorn.topel@intel.com> > > In commit 43e74c0267a3 ("bpf_xdp_redirect_map: Perform map lookup in > eBPF helper") the bpf_redirect_map() helper learned to do map lookup, > which means that the explicit lookup in the XDP program for AF_XDP is > not needed for post-5.3 kernels. > > This commit adds the implicit map lookup with default action, which > improves the performance for the "rx_drop" [1] scenario with ~4%. > > For pre-5.3 kernels, the bpf_redirect_map() returns XDP_ABORTED, and a > fallback path for backward compatibility is entered, where explicit > lookup is still performed. This means a slight regression for older > kernels (an additional bpf_redirect_map() call), but I consider that a > fair punishment for users not upgrading their kernels. ;-) > > v1->v2: Backward compatibility (Toke) [2] > > [1] # xdpsock -i eth0 -z -r > [2] https://lore.kernel.org/bpf/87pnirb3dc.fsf@toke.dk/ > > Suggested-by: Toke Høiland-Jørgensen <toke@redhat.com> > Signed-off-by: Björn Töpel <bjorn.topel@intel.com> > --- > tools/lib/bpf/xsk.c | 45 +++++++++++++++++++++++++++++++++++---------- > 1 file changed, 35 insertions(+), 10 deletions(-) > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c > index b0f532544c91..391a126b3fd8 100644 > --- a/tools/lib/bpf/xsk.c > +++ b/tools/lib/bpf/xsk.c > @@ -274,33 +274,58 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) > /* This is the C-program: > * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) > * { > - * int index = ctx->rx_queue_index; > + * int ret, index = ctx->rx_queue_index; > * > * // A set entry here means that the correspnding queue_id > * // has an active AF_XDP socket bound to it. > + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); > + * ret &= XDP_PASS | XDP_REDIRECT; Why the masking? Looks a bit weird (XDP return codes are not defined as bitmask values), and it's not really needed, is it? -Toke
On Mon, 21 Oct 2019 at 13:50, Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > Björn Töpel <bjorn.topel@gmail.com> writes: > > > From: Björn Töpel <bjorn.topel@intel.com> > > > > In commit 43e74c0267a3 ("bpf_xdp_redirect_map: Perform map lookup in > > eBPF helper") the bpf_redirect_map() helper learned to do map lookup, > > which means that the explicit lookup in the XDP program for AF_XDP is > > not needed for post-5.3 kernels. > > > > This commit adds the implicit map lookup with default action, which > > improves the performance for the "rx_drop" [1] scenario with ~4%. > > > > For pre-5.3 kernels, the bpf_redirect_map() returns XDP_ABORTED, and a > > fallback path for backward compatibility is entered, where explicit > > lookup is still performed. This means a slight regression for older > > kernels (an additional bpf_redirect_map() call), but I consider that a > > fair punishment for users not upgrading their kernels. ;-) > > > > v1->v2: Backward compatibility (Toke) [2] > > > > [1] # xdpsock -i eth0 -z -r > > [2] https://lore.kernel.org/bpf/87pnirb3dc.fsf@toke.dk/ > > > > Suggested-by: Toke Høiland-Jørgensen <toke@redhat.com> > > Signed-off-by: Björn Töpel <bjorn.topel@intel.com> > > --- > > tools/lib/bpf/xsk.c | 45 +++++++++++++++++++++++++++++++++++---------- > > 1 file changed, 35 insertions(+), 10 deletions(-) > > > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c > > index b0f532544c91..391a126b3fd8 100644 > > --- a/tools/lib/bpf/xsk.c > > +++ b/tools/lib/bpf/xsk.c > > @@ -274,33 +274,58 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) > > /* This is the C-program: > > * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) > > * { > > - * int index = ctx->rx_queue_index; > > + * int ret, index = ctx->rx_queue_index; > > * > > * // A set entry here means that the correspnding queue_id > > * // has an active AF_XDP socket bound to it. > > + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); > > + * ret &= XDP_PASS | XDP_REDIRECT; > > Why the masking? Looks a bit weird (XDP return codes are not defined as > bitmask values), and it's not really needed, is it? > bpf_redirect_map() returns a 32-bit signed int, so the upper 32-bit will need to be cleared. Having an explicit AND is one instruction less than two shifts. So, it's an optimization (every instruction is sacred). Compare these two: 0000000000000000 xdp_sock_prog: ; int ret, index = ctx->rx_queue_index; 0: 61 12 10 00 00 00 00 00 r2 = *(u32 *)(r1 + 16) 1: 63 2a fc ff 00 00 00 00 *(u32 *)(r10 - 4) = r2 ; ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); 2: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll 4: b7 03 00 00 02 00 00 00 r3 = 2 5: 85 00 00 00 33 00 00 00 call 51 ; ret &= XDP_PASS | XDP_REDIRECT; 6: 57 00 00 00 06 00 00 00 r0 &= 6 ; if (ret) 7: 55 00 0d 00 00 00 00 00 if r0 != 0 goto +13 <LBB0_3> 8: bf a2 00 00 00 00 00 00 r2 = r10 ; if (bpf_map_lookup_elem(&xsks_map, &index)) 9: 07 02 00 00 fc ff ff ff r2 += -4 10: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll 12: 85 00 00 00 01 00 00 00 call 1 13: bf 01 00 00 00 00 00 00 r1 = r0 14: b7 00 00 00 02 00 00 00 r0 = 2 15: 15 01 05 00 00 00 00 00 if r1 == 0 goto +5 <LBB0_3> ; return bpf_redirect_map(&xsks_map, index, 0); 16: 61 a2 fc ff 00 00 00 00 r2 = *(u32 *)(r10 - 4) 17: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll 19: b7 03 00 00 00 00 00 00 r3 = 0 20: 85 00 00 00 33 00 00 00 call 51 00000000000000a8 LBB0_3: ; } 21: 95 00 00 00 00 00 00 00 exit Disassembly of section xdp_sock: 0000000000000000 xdp_sock_prog: ; int ret, index = ctx->rx_queue_index; 0: 61 12 10 00 00 00 00 00 r2 = *(u32 *)(r1 + 16) 1: 63 2a fc ff 00 00 00 00 *(u32 *)(r10 - 4) = r2 ; ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); 2: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll 4: b7 03 00 00 02 00 00 00 r3 = 2 5: 85 00 00 00 33 00 00 00 call 51 6: 67 00 00 00 20 00 00 00 r0 <<= 32 7: c7 00 00 00 20 00 00 00 r0 s>>= 32 ; if (ret > 0) 8: 65 00 0d 00 00 00 00 00 if r0 s> 0 goto +13 <LBB0_3> 9: bf a2 00 00 00 00 00 00 r2 = r10 ; if (bpf_map_lookup_elem(&xsks_map, &index)) 10: 07 02 00 00 fc ff ff ff r2 += -4 11: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll 13: 85 00 00 00 01 00 00 00 call 1 14: bf 01 00 00 00 00 00 00 r1 = r0 15: b7 00 00 00 02 00 00 00 r0 = 2 16: 15 01 05 00 00 00 00 00 if r1 == 0 goto +5 <LBB0_3> ; return bpf_redirect_map(&xsks_map, index, 0); 17: 61 a2 fc ff 00 00 00 00 r2 = *(u32 *)(r10 - 4) 18: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll 20: b7 03 00 00 00 00 00 00 r3 = 0 21: 85 00 00 00 33 00 00 00 call 51 00000000000000b0 LBB0_3: ; } 22: 95 00 00 00 00 00 00 00 exit Björn > -Toke >
Björn Töpel <bjorn.topel@gmail.com> writes: > On Mon, 21 Oct 2019 at 13:50, Toke Høiland-Jørgensen <toke@redhat.com> wrote: >> >> Björn Töpel <bjorn.topel@gmail.com> writes: >> >> > From: Björn Töpel <bjorn.topel@intel.com> >> > >> > In commit 43e74c0267a3 ("bpf_xdp_redirect_map: Perform map lookup in >> > eBPF helper") the bpf_redirect_map() helper learned to do map lookup, >> > which means that the explicit lookup in the XDP program for AF_XDP is >> > not needed for post-5.3 kernels. >> > >> > This commit adds the implicit map lookup with default action, which >> > improves the performance for the "rx_drop" [1] scenario with ~4%. >> > >> > For pre-5.3 kernels, the bpf_redirect_map() returns XDP_ABORTED, and a >> > fallback path for backward compatibility is entered, where explicit >> > lookup is still performed. This means a slight regression for older >> > kernels (an additional bpf_redirect_map() call), but I consider that a >> > fair punishment for users not upgrading their kernels. ;-) >> > >> > v1->v2: Backward compatibility (Toke) [2] >> > >> > [1] # xdpsock -i eth0 -z -r >> > [2] https://lore.kernel.org/bpf/87pnirb3dc.fsf@toke.dk/ >> > >> > Suggested-by: Toke Høiland-Jørgensen <toke@redhat.com> >> > Signed-off-by: Björn Töpel <bjorn.topel@intel.com> >> > --- >> > tools/lib/bpf/xsk.c | 45 +++++++++++++++++++++++++++++++++++---------- >> > 1 file changed, 35 insertions(+), 10 deletions(-) >> > >> > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c >> > index b0f532544c91..391a126b3fd8 100644 >> > --- a/tools/lib/bpf/xsk.c >> > +++ b/tools/lib/bpf/xsk.c >> > @@ -274,33 +274,58 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) >> > /* This is the C-program: >> > * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) >> > * { >> > - * int index = ctx->rx_queue_index; >> > + * int ret, index = ctx->rx_queue_index; >> > * >> > * // A set entry here means that the correspnding queue_id >> > * // has an active AF_XDP socket bound to it. >> > + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); >> > + * ret &= XDP_PASS | XDP_REDIRECT; >> >> Why the masking? Looks a bit weird (XDP return codes are not defined as >> bitmask values), and it's not really needed, is it? >> > > bpf_redirect_map() returns a 32-bit signed int, so the upper 32-bit > will need to be cleared. Having an explicit AND is one instruction > less than two shifts. So, it's an optimization (every instruction is > sacred). OIC. Well, a comment explaining that might be nice (since you're doing per-instruction comments anyway)? :) -Toke
On Mon, 21 Oct 2019 at 14:19, Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > Björn Töpel <bjorn.topel@gmail.com> writes: > [...] > > > > bpf_redirect_map() returns a 32-bit signed int, so the upper 32-bit > > will need to be cleared. Having an explicit AND is one instruction > > less than two shifts. So, it's an optimization (every instruction is > > sacred). > > OIC. Well, a comment explaining that might be nice (since you're doing > per-instruction comments anyway)? :) > Sure, I can do a v3 with a comment, unless someone has a better idea avoiding both shifts and AND. Thanks for taking a look! Björn > -Toke >
On Mon, 21 Oct 2019 at 15:37, Björn Töpel <bjorn.topel@gmail.com> wrote: > > On Mon, 21 Oct 2019 at 14:19, Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > > > Björn Töpel <bjorn.topel@gmail.com> writes: > > > [...] > > > > > > bpf_redirect_map() returns a 32-bit signed int, so the upper 32-bit > > > will need to be cleared. Having an explicit AND is one instruction > > > less than two shifts. So, it's an optimization (every instruction is > > > sacred). > > > > OIC. Well, a comment explaining that might be nice (since you're doing > > per-instruction comments anyway)? :) > > > > Sure, I can do a v3 with a comment, unless someone has a better idea > avoiding both shifts and AND. > > Thanks for taking a look! > Now wait, there are the JMP32 instructions that Jiong added. So, shifts/AND can be avoided. Now, regarding backward compat... JMP32 is pretty new. I need to think a bit how to approach this. I mean, I'd like to be able to use new BPF instructions. Björn > > Björn > > > > -Toke > >
Björn Töpel <bjorn.topel@gmail.com> writes: > On Mon, 21 Oct 2019 at 15:37, Björn Töpel <bjorn.topel@gmail.com> wrote: >> >> On Mon, 21 Oct 2019 at 14:19, Toke Høiland-Jørgensen <toke@redhat.com> wrote: >> > >> > Björn Töpel <bjorn.topel@gmail.com> writes: >> > >> [...] >> > > >> > > bpf_redirect_map() returns a 32-bit signed int, so the upper 32-bit >> > > will need to be cleared. Having an explicit AND is one instruction >> > > less than two shifts. So, it's an optimization (every instruction is >> > > sacred). >> > >> > OIC. Well, a comment explaining that might be nice (since you're doing >> > per-instruction comments anyway)? :) >> > >> >> Sure, I can do a v3 with a comment, unless someone has a better idea >> avoiding both shifts and AND. >> >> Thanks for taking a look! >> > > Now wait, there are the JMP32 instructions that Jiong added. So, > shifts/AND can be avoided. Now, regarding backward compat... JMP32 is > pretty new. I need to think a bit how to approach this. I mean, I'd > like to be able to use new BPF instructions. Well, they went into kernel 5.1 AFAICT; does AF_XDP even work properly in kernels older than that? For the xdp-tutorial we've just been telling people to upgrade their kernels to use it (see, e.g., https://github.com/xdp-project/xdp-tutorial/issues/76). -Toke
On Mon, 21 Oct 2019 at 17:43, Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > Björn Töpel <bjorn.topel@gmail.com> writes: > > > On Mon, 21 Oct 2019 at 15:37, Björn Töpel <bjorn.topel@gmail.com> wrote: > >> > >> On Mon, 21 Oct 2019 at 14:19, Toke Høiland-Jørgensen <toke@redhat.com> wrote: > >> > > >> > Björn Töpel <bjorn.topel@gmail.com> writes: > >> > > >> [...] > >> > > > >> > > bpf_redirect_map() returns a 32-bit signed int, so the upper 32-bit > >> > > will need to be cleared. Having an explicit AND is one instruction > >> > > less than two shifts. So, it's an optimization (every instruction is > >> > > sacred). > >> > > >> > OIC. Well, a comment explaining that might be nice (since you're doing > >> > per-instruction comments anyway)? :) > >> > > >> > >> Sure, I can do a v3 with a comment, unless someone has a better idea > >> avoiding both shifts and AND. > >> > >> Thanks for taking a look! > >> > > > > Now wait, there are the JMP32 instructions that Jiong added. So, > > shifts/AND can be avoided. Now, regarding backward compat... JMP32 is > > pretty new. I need to think a bit how to approach this. I mean, I'd > > like to be able to use new BPF instructions. > > Well, they went into kernel 5.1 AFAICT; does AF_XDP even work properly > in kernels older than that? For the xdp-tutorial we've just been telling > people to upgrade their kernels to use it (see, e.g., > https://github.com/xdp-project/xdp-tutorial/issues/76). > Yeah, let's take that route, i.e. using JMP32 and one program. One could argue that libbpf could do runtime checks and load the simpler program w/o the fallback for post-5.3 only, and avoiding the branching all together. Björn > -Toke >
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index b0f532544c91..391a126b3fd8 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -274,33 +274,58 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* This is the C-program: * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) * { - * int index = ctx->rx_queue_index; + * int ret, index = ctx->rx_queue_index; * * // A set entry here means that the correspnding queue_id * // has an active AF_XDP socket bound to it. + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); + * ret &= XDP_PASS | XDP_REDIRECT; + * if (ret) + * return ret; + * + * // Fallback for pre-5.3 kernels, not supporting default + * // action in the flags parameter. * if (bpf_map_lookup_elem(&xsks_map, &index)) * return bpf_redirect_map(&xsks_map, index, 0); - * * return XDP_PASS; * } */ struct bpf_insn prog[] = { - /* r1 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16), - /* *(u32 *)(r10 - 4) = r1 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r2 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* r0 &= XDP_PASS | XDP_REDIRECT */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, XDP_PASS | XDP_REDIRECT), + /* if r0 != 0 goto pc+13 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 13), + /* r2 = r10 */ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* r2 += -4 */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + /* r1 = xskmap[] */ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* call bpf_map_lookup_elem */ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + /* r1 = r0 */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV32_IMM(BPF_REG_0, 2), - /* if r1 == 0 goto +5 */ + /* r0 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_0, 2), + /* if r1 == 0 goto pc+5 */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), /* r2 = *(u32 *)(r10 - 4) */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), - BPF_MOV32_IMM(BPF_REG_3, 0), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = 0 */ + BPF_MOV64_IMM(BPF_REG_3, 0), + /* call bpf_redirect_map */ BPF_EMIT_CALL(BPF_FUNC_redirect_map), /* The jumps are to this instruction */ BPF_EXIT_INSN(),