diff mbox series

arm64: bpf: Fix branch offset in JIT

Message ID 20200914083622.116554-1-ilias.apalodimas@linaro.org
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series arm64: bpf: Fix branch offset in JIT | expand

Commit Message

Ilias Apalodimas Sept. 14, 2020, 8:36 a.m. UTC
Running the eBPF test_verifier leads to random errors looking like this:

[ 6525.735488] Unexpected kernel BRK exception at EL1
[ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP
[ 6525.741609] Modules linked in: nls_utf8 cifs libdes libarc4 dns_resolver fscache binfmt_misc nls_ascii nls_cp437 vfat fat aes_ce_blk crypto_simd cryptd aes_ce_cipher ghash_ce gf128mul efi_pstore sha2_ce sha256_arm64 sha1_ce evdev efivars efivarfs ip_tables x_tables autofs4 btrfs blake2b_generic xor xor_neon zstd_compress raid6_pq libcrc32c crc32c_generic ahci xhci_pci libahci xhci_hcd igb libata i2c_algo_bit nvme realtek usbcore nvme_core scsi_mod t10_pi netsec mdio_devres of_mdio gpio_keys fixed_phy libphy gpio_mb86s7x
[ 6525.787760] CPU: 3 PID: 7881 Comm: test_verifier Tainted: G        W         5.9.0-rc1+ #47
[ 6525.796111] Hardware name: Socionext SynQuacer E-series DeveloperBox, BIOS build #1 Jun  6 2020
[ 6525.804812] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--)
[ 6525.810390] pc : bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
[ 6525.815613] lr : bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
[ 6525.820832] sp : ffff8000130cbb80
[ 6525.824141] x29: ffff8000130cbbb0 x28: 0000000000000000
[ 6525.829451] x27: 000005ef6fcbf39b x26: 0000000000000000
[ 6525.834759] x25: ffff8000130cbb80 x24: ffff800011dc7038
[ 6525.840067] x23: ffff8000130cbd00 x22: ffff0008f624d080
[ 6525.845375] x21: 0000000000000001 x20: ffff800011dc7000
[ 6525.850682] x19: 0000000000000000 x18: 0000000000000000
[ 6525.855990] x17: 0000000000000000 x16: 0000000000000000
[ 6525.861298] x15: 0000000000000000 x14: 0000000000000000
[ 6525.866606] x13: 0000000000000000 x12: 0000000000000000
[ 6525.871913] x11: 0000000000000001 x10: ffff8000000a660c
[ 6525.877220] x9 : ffff800010951810 x8 : ffff8000130cbc38
[ 6525.882528] x7 : 0000000000000000 x6 : 0000009864cfa881
[ 6525.887836] x5 : 00ffffffffffffff x4 : 002880ba1a0b3e9f
[ 6525.893144] x3 : 0000000000000018 x2 : ffff8000000a4374
[ 6525.898452] x1 : 000000000000000a x0 : 0000000000000009
[ 6525.903760] Call trace:
[ 6525.906202]  bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
[ 6525.911076]  bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
[ 6525.915957]  bpf_dispatcher_xdp_func+0x14/0x20
[ 6525.920398]  bpf_test_run+0x70/0x1b0
[ 6525.923969]  bpf_prog_test_run_xdp+0xec/0x190
[ 6525.928326]  __do_sys_bpf+0xc88/0x1b28
[ 6525.932072]  __arm64_sys_bpf+0x24/0x30
[ 6525.935820]  el0_svc_common.constprop.0+0x70/0x168
[ 6525.940607]  do_el0_svc+0x28/0x88
[ 6525.943920]  el0_sync_handler+0x88/0x190
[ 6525.947838]  el0_sync+0x140/0x180
[ 6525.951154] Code: d4202000 d4202000 d4202000 d4202000 (d4202000)
[ 6525.957249] ---[ end trace cecc3f93b14927e2 ]---

The reason seems to be the offset[] creation and usage ctx->offset[]
while building the eBPF body.  The code currently omits the first 
instruction, since build_insn() will increase our ctx->idx before saving 
it.  When "taken loop with back jump to 1st insn" test runs it will
eventually call bpf2a64_offset(-1, 2, ctx). Since negative indexing is
permitted, the current outcome depends on the value stored in
ctx->offset[-1], which has nothing to do with our array.
If the value happens to be 0 the tests will work. If not this error
triggers.

So let's fix it by creating the ctx->offset[] correctly in the first
place and account for the extra instruction while calculating the arm
instruction offsets.

Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
---
 arch/arm64/net/bpf_jit_comp.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

Comments

Will Deacon Sept. 14, 2020, 12:20 p.m. UTC | #1
On Mon, Sep 14, 2020 at 11:36:21AM +0300, Ilias Apalodimas wrote:
> Running the eBPF test_verifier leads to random errors looking like this:
> 
> [ 6525.735488] Unexpected kernel BRK exception at EL1
> [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP
> [ 6525.741609] Modules linked in: nls_utf8 cifs libdes libarc4 dns_resolver fscache binfmt_misc nls_ascii nls_cp437 vfat fat aes_ce_blk crypto_simd cryptd aes_ce_cipher ghash_ce gf128mul efi_pstore sha2_ce sha256_arm64 sha1_ce evdev efivars efivarfs ip_tables x_tables autofs4 btrfs blake2b_generic xor xor_neon zstd_compress raid6_pq libcrc32c crc32c_generic ahci xhci_pci libahci xhci_hcd igb libata i2c_algo_bit nvme realtek usbcore nvme_core scsi_mod t10_pi netsec mdio_devres of_mdio gpio_keys fixed_phy libphy gpio_mb86s7x
> [ 6525.787760] CPU: 3 PID: 7881 Comm: test_verifier Tainted: G        W         5.9.0-rc1+ #47
> [ 6525.796111] Hardware name: Socionext SynQuacer E-series DeveloperBox, BIOS build #1 Jun  6 2020
> [ 6525.804812] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--)
> [ 6525.810390] pc : bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
> [ 6525.815613] lr : bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
> [ 6525.820832] sp : ffff8000130cbb80
> [ 6525.824141] x29: ffff8000130cbbb0 x28: 0000000000000000
> [ 6525.829451] x27: 000005ef6fcbf39b x26: 0000000000000000
> [ 6525.834759] x25: ffff8000130cbb80 x24: ffff800011dc7038
> [ 6525.840067] x23: ffff8000130cbd00 x22: ffff0008f624d080
> [ 6525.845375] x21: 0000000000000001 x20: ffff800011dc7000
> [ 6525.850682] x19: 0000000000000000 x18: 0000000000000000
> [ 6525.855990] x17: 0000000000000000 x16: 0000000000000000
> [ 6525.861298] x15: 0000000000000000 x14: 0000000000000000
> [ 6525.866606] x13: 0000000000000000 x12: 0000000000000000
> [ 6525.871913] x11: 0000000000000001 x10: ffff8000000a660c
> [ 6525.877220] x9 : ffff800010951810 x8 : ffff8000130cbc38
> [ 6525.882528] x7 : 0000000000000000 x6 : 0000009864cfa881
> [ 6525.887836] x5 : 00ffffffffffffff x4 : 002880ba1a0b3e9f
> [ 6525.893144] x3 : 0000000000000018 x2 : ffff8000000a4374
> [ 6525.898452] x1 : 000000000000000a x0 : 0000000000000009
> [ 6525.903760] Call trace:
> [ 6525.906202]  bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
> [ 6525.911076]  bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
> [ 6525.915957]  bpf_dispatcher_xdp_func+0x14/0x20
> [ 6525.920398]  bpf_test_run+0x70/0x1b0
> [ 6525.923969]  bpf_prog_test_run_xdp+0xec/0x190
> [ 6525.928326]  __do_sys_bpf+0xc88/0x1b28
> [ 6525.932072]  __arm64_sys_bpf+0x24/0x30
> [ 6525.935820]  el0_svc_common.constprop.0+0x70/0x168
> [ 6525.940607]  do_el0_svc+0x28/0x88
> [ 6525.943920]  el0_sync_handler+0x88/0x190
> [ 6525.947838]  el0_sync+0x140/0x180
> [ 6525.951154] Code: d4202000 d4202000 d4202000 d4202000 (d4202000)
> [ 6525.957249] ---[ end trace cecc3f93b14927e2 ]---
> 
> The reason seems to be the offset[] creation and usage ctx->offset[]

"seems to be"? Are you unsure?

> while building the eBPF body.  The code currently omits the first 
> instruction, since build_insn() will increase our ctx->idx before saving 
> it.  When "taken loop with back jump to 1st insn" test runs it will
> eventually call bpf2a64_offset(-1, 2, ctx). Since negative indexing is
> permitted, the current outcome depends on the value stored in
> ctx->offset[-1], which has nothing to do with our array.
> If the value happens to be 0 the tests will work. If not this error
> triggers.
> 
> So let's fix it by creating the ctx->offset[] correctly in the first
> place and account for the extra instruction while calculating the arm
> instruction offsets.

No Fixes: tag?

> Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>

Non-author signoffs here. What's going on?

Will
Ilias Apalodimas Sept. 14, 2020, 12:35 p.m. UTC | #2
On Mon, Sep 14, 2020 at 01:20:43PM +0100, Will Deacon wrote:
> On Mon, Sep 14, 2020 at 11:36:21AM +0300, Ilias Apalodimas wrote:
> > Running the eBPF test_verifier leads to random errors looking like this:
> > 
> > [ 6525.735488] Unexpected kernel BRK exception at EL1
> > [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP
> > [ 6525.741609] Modules linked in: nls_utf8 cifs libdes libarc4 dns_resolver fscache binfmt_misc nls_ascii nls_cp437 vfat fat aes_ce_blk crypto_simd cryptd aes_ce_cipher ghash_ce gf128mul efi_pstore sha2_ce sha256_arm64 sha1_ce evdev efivars efivarfs ip_tables x_tables autofs4 btrfs blake2b_generic xor xor_neon zstd_compress raid6_pq libcrc32c crc32c_generic ahci xhci_pci libahci xhci_hcd igb libata i2c_algo_bit nvme realtek usbcore nvme_core scsi_mod t10_pi netsec mdio_devres of_mdio gpio_keys fixed_phy libphy gpio_mb86s7x
> > [ 6525.787760] CPU: 3 PID: 7881 Comm: test_verifier Tainted: G        W         5.9.0-rc1+ #47
> > [ 6525.796111] Hardware name: Socionext SynQuacer E-series DeveloperBox, BIOS build #1 Jun  6 2020
> > [ 6525.804812] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--)
> > [ 6525.810390] pc : bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
> > [ 6525.815613] lr : bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
> > [ 6525.820832] sp : ffff8000130cbb80
> > [ 6525.824141] x29: ffff8000130cbbb0 x28: 0000000000000000
> > [ 6525.829451] x27: 000005ef6fcbf39b x26: 0000000000000000
> > [ 6525.834759] x25: ffff8000130cbb80 x24: ffff800011dc7038
> > [ 6525.840067] x23: ffff8000130cbd00 x22: ffff0008f624d080
> > [ 6525.845375] x21: 0000000000000001 x20: ffff800011dc7000
> > [ 6525.850682] x19: 0000000000000000 x18: 0000000000000000
> > [ 6525.855990] x17: 0000000000000000 x16: 0000000000000000
> > [ 6525.861298] x15: 0000000000000000 x14: 0000000000000000
> > [ 6525.866606] x13: 0000000000000000 x12: 0000000000000000
> > [ 6525.871913] x11: 0000000000000001 x10: ffff8000000a660c
> > [ 6525.877220] x9 : ffff800010951810 x8 : ffff8000130cbc38
> > [ 6525.882528] x7 : 0000000000000000 x6 : 0000009864cfa881
> > [ 6525.887836] x5 : 00ffffffffffffff x4 : 002880ba1a0b3e9f
> > [ 6525.893144] x3 : 0000000000000018 x2 : ffff8000000a4374
> > [ 6525.898452] x1 : 000000000000000a x0 : 0000000000000009
> > [ 6525.903760] Call trace:
> > [ 6525.906202]  bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
> > [ 6525.911076]  bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
> > [ 6525.915957]  bpf_dispatcher_xdp_func+0x14/0x20
> > [ 6525.920398]  bpf_test_run+0x70/0x1b0
> > [ 6525.923969]  bpf_prog_test_run_xdp+0xec/0x190
> > [ 6525.928326]  __do_sys_bpf+0xc88/0x1b28
> > [ 6525.932072]  __arm64_sys_bpf+0x24/0x30
> > [ 6525.935820]  el0_svc_common.constprop.0+0x70/0x168
> > [ 6525.940607]  do_el0_svc+0x28/0x88
> > [ 6525.943920]  el0_sync_handler+0x88/0x190
> > [ 6525.947838]  el0_sync+0x140/0x180
> > [ 6525.951154] Code: d4202000 d4202000 d4202000 d4202000 (d4202000)
> > [ 6525.957249] ---[ end trace cecc3f93b14927e2 ]---
> > 
> > The reason seems to be the offset[] creation and usage ctx->offset[]
> 
> "seems to be"? Are you unsure?

Reading the history and other ports of the JIT implementation, I couldn't 
tell if the decision on skipping the 1st entry was deliberate or not on 
Aarch64. Reading through the mailist list didn't help either [1].
Skipping the 1st entry seems indeed to cause the problem.
I did run the patch though the BPF tests and showed no regressions + fixing 
the error.

> 
> > while building the eBPF body.  The code currently omits the first 
> > instruction, since build_insn() will increase our ctx->idx before saving 
> > it.  When "taken loop with back jump to 1st insn" test runs it will
> > eventually call bpf2a64_offset(-1, 2, ctx). Since negative indexing is
> > permitted, the current outcome depends on the value stored in
> > ctx->offset[-1], which has nothing to do with our array.
> > If the value happens to be 0 the tests will work. If not this error
> > triggers.
> > 
> > So let's fix it by creating the ctx->offset[] correctly in the first
> > place and account for the extra instruction while calculating the arm
> > instruction offsets.
> 
> No Fixes: tag?

I'll re-spin and apply one 

> 
> > Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
> 
> Non-author signoffs here. What's going on?

My bad here, I'll add a Co-developed-by on v2 for the rest of the people and 
move my Signed-off last

[1] https://lore.kernel.org/bpf/CANoWswkaj1HysW3BxBMG9_nd48fm0MxM5egdtmHU6YsEc_GUtQ@mail.gmail.com/T/#u

Thanks
/Ilias
> 
> Will
Ilias Apalodimas Sept. 14, 2020, 1:23 p.m. UTC | #3
Hi Will,

On Mon, Sep 14, 2020 at 03:35:04PM +0300, Ilias Apalodimas wrote:
> On Mon, Sep 14, 2020 at 01:20:43PM +0100, Will Deacon wrote:
> > On Mon, Sep 14, 2020 at 11:36:21AM +0300, Ilias Apalodimas wrote:
> > > Running the eBPF test_verifier leads to random errors looking like this:
> > > 
> > > [ 6525.735488] Unexpected kernel BRK exception at EL1
> > > [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP
> > > [ 6525.741609] Modules linked in: nls_utf8 cifs libdes libarc4 dns_resolver fscache binfmt_misc nls_ascii nls_cp437 vfat fat aes_ce_blk crypto_simd cryptd aes_ce_cipher ghash_ce gf128mul efi_pstore sha2_ce sha256_arm64 sha1_ce evdev efivars efivarfs ip_tables x_tables autofs4 btrfs blake2b_generic xor xor_neon zstd_compress raid6_pq libcrc32c crc32c_generic ahci xhci_pci libahci xhci_hcd igb libata i2c_algo_bit nvme realtek usbcore nvme_core scsi_mod t10_pi netsec mdio_devres of_mdio gpio_keys fixed_phy libphy gpio_mb86s7x
> > > [ 6525.787760] CPU: 3 PID: 7881 Comm: test_verifier Tainted: G        W         5.9.0-rc1+ #47
> > > [ 6525.796111] Hardware name: Socionext SynQuacer E-series DeveloperBox, BIOS build #1 Jun  6 2020
> > > [ 6525.804812] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--)
> > > [ 6525.810390] pc : bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
> > > [ 6525.815613] lr : bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
> > > [ 6525.820832] sp : ffff8000130cbb80
> > > [ 6525.824141] x29: ffff8000130cbbb0 x28: 0000000000000000
> > > [ 6525.829451] x27: 000005ef6fcbf39b x26: 0000000000000000
> > > [ 6525.834759] x25: ffff8000130cbb80 x24: ffff800011dc7038
> > > [ 6525.840067] x23: ffff8000130cbd00 x22: ffff0008f624d080
> > > [ 6525.845375] x21: 0000000000000001 x20: ffff800011dc7000
> > > [ 6525.850682] x19: 0000000000000000 x18: 0000000000000000
> > > [ 6525.855990] x17: 0000000000000000 x16: 0000000000000000
> > > [ 6525.861298] x15: 0000000000000000 x14: 0000000000000000
> > > [ 6525.866606] x13: 0000000000000000 x12: 0000000000000000
> > > [ 6525.871913] x11: 0000000000000001 x10: ffff8000000a660c
> > > [ 6525.877220] x9 : ffff800010951810 x8 : ffff8000130cbc38
> > > [ 6525.882528] x7 : 0000000000000000 x6 : 0000009864cfa881
> > > [ 6525.887836] x5 : 00ffffffffffffff x4 : 002880ba1a0b3e9f
> > > [ 6525.893144] x3 : 0000000000000018 x2 : ffff8000000a4374
> > > [ 6525.898452] x1 : 000000000000000a x0 : 0000000000000009
> > > [ 6525.903760] Call trace:
> > > [ 6525.906202]  bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
> > > [ 6525.911076]  bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
> > > [ 6525.915957]  bpf_dispatcher_xdp_func+0x14/0x20
> > > [ 6525.920398]  bpf_test_run+0x70/0x1b0
> > > [ 6525.923969]  bpf_prog_test_run_xdp+0xec/0x190
> > > [ 6525.928326]  __do_sys_bpf+0xc88/0x1b28
> > > [ 6525.932072]  __arm64_sys_bpf+0x24/0x30
> > > [ 6525.935820]  el0_svc_common.constprop.0+0x70/0x168
> > > [ 6525.940607]  do_el0_svc+0x28/0x88
> > > [ 6525.943920]  el0_sync_handler+0x88/0x190
> > > [ 6525.947838]  el0_sync+0x140/0x180
> > > [ 6525.951154] Code: d4202000 d4202000 d4202000 d4202000 (d4202000)
> > > [ 6525.957249] ---[ end trace cecc3f93b14927e2 ]---
> > > 
> > > The reason seems to be the offset[] creation and usage ctx->offset[]
> > 
> > "seems to be"? Are you unsure?
> 
> Reading the history and other ports of the JIT implementation, I couldn't 
> tell if the decision on skipping the 1st entry was deliberate or not on 
> Aarch64. Reading through the mailist list didn't help either [1].
> Skipping the 1st entry seems indeed to cause the problem.
> I did run the patch though the BPF tests and showed no regressions + fixing 
> the error.

I'll correct myself here.
Looking into 7c2e988f400e ("bpf: fix x64 JIT code generation for jmp to 1st insn")
explains things a bit better.
Jumping back to the 1st insn wasn't allowed until eBPF bounded loops were 
introduced. That's what the 1st instruction was not saved in the original code.

> > 
> > No Fixes: tag?
> 
> I'll re-spin and apply one 
> 
Any suggestion on any Fixes I should apply? The original code was 'correct' and
broke only when bounded loops and their self-tests were introduced.

Thanks
/Ilias
Will Deacon Sept. 14, 2020, 2:01 p.m. UTC | #4
Hi Ilias,

On Mon, Sep 14, 2020 at 04:23:50PM +0300, Ilias Apalodimas wrote:
> On Mon, Sep 14, 2020 at 03:35:04PM +0300, Ilias Apalodimas wrote:
> > On Mon, Sep 14, 2020 at 01:20:43PM +0100, Will Deacon wrote:
> > > On Mon, Sep 14, 2020 at 11:36:21AM +0300, Ilias Apalodimas wrote:
> > > > Running the eBPF test_verifier leads to random errors looking like this:

[...]

> > > > The reason seems to be the offset[] creation and usage ctx->offset[]
> > > 
> > > "seems to be"? Are you unsure?
> > 
> > Reading the history and other ports of the JIT implementation, I couldn't 
> > tell if the decision on skipping the 1st entry was deliberate or not on 
> > Aarch64. Reading through the mailist list didn't help either [1].
> > Skipping the 1st entry seems indeed to cause the problem.
> > I did run the patch though the BPF tests and showed no regressions + fixing 
> > the error.
> 
> I'll correct myself here.
> Looking into 7c2e988f400e ("bpf: fix x64 JIT code generation for jmp to 1st insn")
> explains things a bit better.
> Jumping back to the 1st insn wasn't allowed until eBPF bounded loops were 
> introduced. That's what the 1st instruction was not saved in the original code.
> 
> > > 
> > > No Fixes: tag?
> > 
> > I'll re-spin and apply one 
> > 
> Any suggestion on any Fixes I should apply? The original code was 'correct' and
> broke only when bounded loops and their self-tests were introduced.

Ouch, that's pretty bad as it means nobody is regression testing BPF on
arm64 with mainline. Damn.

The Fixes: tag should identify the commit beyond which we don't need to
backport the fix, so it sounds like introduction of bounded loops, according
to your analysis.

Will
Ilias Apalodimas Sept. 14, 2020, 4:02 p.m. UTC | #5
Hi Will,

On Mon, Sep 14, 2020 at 03:01:15PM +0100, Will Deacon wrote:
> Hi Ilias,
> 

[...]

> > > > 
> > > > No Fixes: tag?
> > > 
> > > I'll re-spin and apply one 
> > > 
> > Any suggestion on any Fixes I should apply? The original code was 'correct' and
> > broke only when bounded loops and their self-tests were introduced.
> 
> Ouch, that's pretty bad as it means nobody is regression testing BPF on
> arm64 with mainline. Damn.

That might not be entirely true. Since offset is a pointer, there's a chance
(and a pretty high one according to my reproducer) that the offset[-1] value 
happens to be 0. In that case the tests will pass fine. I can reproduce the bug
approximately 1 every 6-7 passes here.

I'll send a v2 shortly fixing the tags and adding a few comments on the code,
which will hopefully make future reading easier.

Cheers
/Ilias
Jesper Dangaard Brouer Sept. 14, 2020, 4:12 p.m. UTC | #6
On Mon, 14 Sep 2020 15:01:15 +0100 Will Deacon <will@kernel.org> wrote:

> Hi Ilias,
> 
> On Mon, Sep 14, 2020 at 04:23:50PM +0300, Ilias Apalodimas wrote:
> > On Mon, Sep 14, 2020 at 03:35:04PM +0300, Ilias Apalodimas wrote:  
> > > On Mon, Sep 14, 2020 at 01:20:43PM +0100, Will Deacon wrote:  
> > > > On Mon, Sep 14, 2020 at 11:36:21AM +0300, Ilias Apalodimas wrote:  
> > > > > Running the eBPF test_verifier leads to random errors looking like this:  
> 
> [...]
> > >   
> > Any suggestion on any Fixes I should apply? The original code was 'correct' and
> > broke only when bounded loops and their self-tests were introduced.  
> 
> Ouch, that's pretty bad as it means nobody is regression testing BPF on
> arm64 with mainline. Damn.

Yes, it unfortunately seems that upstream is lacking BPF regression
testing for ARM64 :-(

This bug surfaced when Red Hat QA tested our kernel backports, on
different archs.
Ilias Apalodimas Sept. 14, 2020, 5:02 p.m. UTC | #7
On Mon, Sep 14, 2020 at 06:12:34PM +0200, Jesper Dangaard Brouer wrote:
> 
> On Mon, 14 Sep 2020 15:01:15 +0100 Will Deacon <will@kernel.org> wrote:
> 
> > Hi Ilias,
> > 
> > On Mon, Sep 14, 2020 at 04:23:50PM +0300, Ilias Apalodimas wrote:
> > > On Mon, Sep 14, 2020 at 03:35:04PM +0300, Ilias Apalodimas wrote:  
> > > > On Mon, Sep 14, 2020 at 01:20:43PM +0100, Will Deacon wrote:  
> > > > > On Mon, Sep 14, 2020 at 11:36:21AM +0300, Ilias Apalodimas wrote:  
> > > > > > Running the eBPF test_verifier leads to random errors looking like this:  
> > 
> > [...]
> > > >   
> > > Any suggestion on any Fixes I should apply? The original code was 'correct' and
> > > broke only when bounded loops and their self-tests were introduced.  
> > 
> > Ouch, that's pretty bad as it means nobody is regression testing BPF on
> > arm64 with mainline. Damn.
> 
> Yes, it unfortunately seems that upstream is lacking BPF regression
> testing for ARM64 :-(
> 
> This bug surfaced when Red Hat QA tested our kernel backports, on
> different archs.

Naresh from Linaro reported it during his tests on 5.8-rc1 as well [1].
I've included both Jiri and him on the v2 as reporters.

[1] https://lkml.org/lkml/2020/8/11/58
> 
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
>
Xi Wang Sept. 14, 2020, 5:47 p.m. UTC | #8
On Mon, Sep 14, 2020 at 10:03 AM Ilias Apalodimas
<ilias.apalodimas@linaro.org> wrote:
> Naresh from Linaro reported it during his tests on 5.8-rc1 as well [1].
> I've included both Jiri and him on the v2 as reporters.
>
> [1] https://lkml.org/lkml/2020/8/11/58

I'm curious what you think of Luke's earlier patch to this bug:

https://lore.kernel.org/bpf/CANoWswkaj1HysW3BxBMG9_nd48fm0MxM5egdtmHU6YsEc_GUtQ@mail.gmail.com/T/#m4335b4005da0d60059ba96920fcaaecf2637042a
Ilias Apalodimas Sept. 14, 2020, 5:55 p.m. UTC | #9
On Mon, Sep 14, 2020 at 10:47:33AM -0700, Xi Wang wrote:
> On Mon, Sep 14, 2020 at 10:03 AM Ilias Apalodimas
> <ilias.apalodimas@linaro.org> wrote:
> > Naresh from Linaro reported it during his tests on 5.8-rc1 as well [1].
> > I've included both Jiri and him on the v2 as reporters.
> >
> > [1] https://lkml.org/lkml/2020/8/11/58
> 
> I'm curious what you think of Luke's earlier patch to this bug:

We've briefly discussed this approach with Yauheni while coming up with the 
posted patch.
I think that contructing the array correctly in the first place is better. 
Right now it might only be used in bpf2a64_offset() and bpf_prog_fill_jited_linfo()
but if we fixup the values on the fly in there, everyone that intends to use the
offset for any reason will have to account for the missing instruction.

Cheers
/Ilias
> 
> https://lore.kernel.org/bpf/CANoWswkaj1HysW3BxBMG9_nd48fm0MxM5egdtmHU6YsEc_GUtQ@mail.gmail.com/T/#m4335b4005da0d60059ba96920fcaaecf2637042a
Xi Wang Sept. 14, 2020, 6:08 p.m. UTC | #10
On Mon, Sep 14, 2020 at 10:55 AM Ilias Apalodimas
<ilias.apalodimas@linaro.org> wrote:
> We've briefly discussed this approach with Yauheni while coming up with the
> posted patch.
> I think that contructing the array correctly in the first place is better.
> Right now it might only be used in bpf2a64_offset() and bpf_prog_fill_jited_linfo()
> but if we fixup the values on the fly in there, everyone that intends to use the
> offset for any reason will have to account for the missing instruction.

I don't understand what you mean by "correctly."  What's your correctness spec?

I don't think there's some consistent semantics of "offsets" across
the JITs of different architectures (maybe it's good to clean that
up).  RV64 and RV32 JITs are doing something similar to arm64 with
respect to offsets.  CCing Björn and Luke.
Luke Nelson Sept. 14, 2020, 6:21 p.m. UTC | #11
On Mon, Sep 14, 2020 at 11:08 AM Xi Wang <xi.wang@gmail.com> wrote:
> I don't think there's some consistent semantics of "offsets" across
> the JITs of different architectures (maybe it's good to clean that
> up).  RV64 and RV32 JITs are doing something similar to arm64 with
> respect to offsets.  CCing Björn and Luke.

As I understand it, there are two strategies JITs use to keep track of
the ctx->offset table.

Some JITs (RV32, RV64, arm32, arm64 currently, x86-32) track the end
of each instruction (e.g., ctx->offset[i] marks the beginning of
instruction i + 1).
This requires care to handle jumps to the first instruction to avoid
using ctx->offset[-1]. The RV32 and RV64 JITs have special handling
for this case,
while the arm32, arm64, and x86-32 JITs appear not to. The arm32 and
x32 probably need to be fixed for the same reason arm64 does.

The other strategy is for ctx->offset[i] to track the beginning of
instruction i. The x86-64 JIT currently works this way.
This can be easier to use (no need to special case -1) but looks to be
trickier to construct. This patch changes the arm64 JIT to work this
way.

I don't think either strategy is inherently better, both can be
"correct" as long as the JIT uses ctx->offset in the right way.
This might be a good opportunity to change the JITs to be consistent
about this (especially if the arm32, arm64, and x32 JITs all need to
be fixed anyways).
Having all JITs agree on the meaning of ctx->offset could help future
readers debug / understand the code, and could help to someday verify
the
ctx->offset construction.

Any thoughts?

- Luke
Ilias Apalodimas Sept. 14, 2020, 6:27 p.m. UTC | #12
Hi Xi, 

On Mon, Sep 14, 2020 at 11:08:13AM -0700, Xi Wang wrote:
> On Mon, Sep 14, 2020 at 10:55 AM Ilias Apalodimas
> <ilias.apalodimas@linaro.org> wrote:
> > We've briefly discussed this approach with Yauheni while coming up with the
> > posted patch.
> > I think that contructing the array correctly in the first place is better.
> > Right now it might only be used in bpf2a64_offset() and bpf_prog_fill_jited_linfo()
> > but if we fixup the values on the fly in there, everyone that intends to use the
> > offset for any reason will have to account for the missing instruction.
> 
> I don't understand what you mean by "correctly."  What's your correctness spec?

> 
> I don't think there's some consistent semantics of "offsets" across
> the JITs of different architectures (maybe it's good to clean that
> up).  RV64 and RV32 JITs are doing something similar to arm64 with
> respect to offsets.  CCing Björn and Luke.

Even if that's true, is any reason at all why we should skip the first element 
of the array, that's now needed since 7c2e988f400 to jump back to the first
instruction?
Introducing 2 extra if conditions and hotfix the array on the fly (and for 
every future invocation of that), seems better to you?

Cheers
/Ilias
Ilias Apalodimas Sept. 14, 2020, 6:36 p.m. UTC | #13
Hi Luke, 

On Mon, Sep 14, 2020 at 11:21:58AM -0700, Luke Nelson wrote:
> On Mon, Sep 14, 2020 at 11:08 AM Xi Wang <xi.wang@gmail.com> wrote:
> > I don't think there's some consistent semantics of "offsets" across
> > the JITs of different architectures (maybe it's good to clean that
> > up).  RV64 and RV32 JITs are doing something similar to arm64 with
> > respect to offsets.  CCing Björn and Luke.
> 
> As I understand it, there are two strategies JITs use to keep track of
> the ctx->offset table.
> 
> Some JITs (RV32, RV64, arm32, arm64 currently, x86-32) track the end
> of each instruction (e.g., ctx->offset[i] marks the beginning of
> instruction i + 1).
> This requires care to handle jumps to the first instruction to avoid
> using ctx->offset[-1]. The RV32 and RV64 JITs have special handling
> for this case,
> while the arm32, arm64, and x86-32 JITs appear not to. The arm32 and
> x32 probably need to be fixed for the same reason arm64 does.
> 
> The other strategy is for ctx->offset[i] to track the beginning of
> instruction i. The x86-64 JIT currently works this way.
> This can be easier to use (no need to special case -1) but looks to be
> trickier to construct. This patch changes the arm64 JIT to work this
> way.
> 
> I don't think either strategy is inherently better, both can be
> "correct" as long as the JIT uses ctx->offset in the right way.
> This might be a good opportunity to change the JITs to be consistent
> about this (especially if the arm32, arm64, and x32 JITs all need to
> be fixed anyways).
> Having all JITs agree on the meaning of ctx->offset could help future
> readers debug / understand the code, and could help to someday verify
> the
> ctx->offset construction.
> 
> Any thoughts?

The common strategy does make a lot of sense and yes, both patches will  works 
assuming the ctx->offset ends up being what the JIT engine expects it to be. 
As I mentioned earlier we did consider both, but ended up using the later, 
since as you said, removes the need for handling the special (-1) case.

Cheers
/Ilias

> 
> - Luke
Xi Wang Sept. 14, 2020, 6:52 p.m. UTC | #14
On Mon, Sep 14, 2020 at 11:28 AM Ilias Apalodimas
<ilias.apalodimas@linaro.org> wrote:
> Even if that's true, is any reason at all why we should skip the first element
> of the array, that's now needed since 7c2e988f400 to jump back to the first
> instruction?
> Introducing 2 extra if conditions and hotfix the array on the fly (and for
> every future invocation of that), seems better to you?

My point was that there's no inherently correct/wrong way to construct
offsets.  As Luke explained in his email, 1) there are two different
strategies used by the JITs and 2) there are likely similar bugs
beyond arm64.

Each strategy has pros and cons, and I'm fine with either.  I like the
strategy used in your patch because it's more intuitive (offset[i] is
the start of the emitted instructions for BPF instruction i, rather
than the end), though the changes to the construction process are
trickier.

If we decide to patch the arm64 JIT the way you proposed, we should
consider whether to change other JITs consistently.
Ilias Apalodimas Sept. 14, 2020, 7:24 p.m. UTC | #15
On Mon, Sep 14, 2020 at 11:52:16AM -0700, Xi Wang wrote:
> On Mon, Sep 14, 2020 at 11:28 AM Ilias Apalodimas
> <ilias.apalodimas@linaro.org> wrote:
> > Even if that's true, is any reason at all why we should skip the first element
> > of the array, that's now needed since 7c2e988f400 to jump back to the first
> > instruction?
> > Introducing 2 extra if conditions and hotfix the array on the fly (and for
> > every future invocation of that), seems better to you?
> 
> My point was that there's no inherently correct/wrong way to construct
> offsets.  As Luke explained in his email, 1) there are two different
> strategies used by the JITs and 2) there are likely similar bugs
> beyond arm64.
> 
> Each strategy has pros and cons, and I'm fine with either.  I like the
> strategy used in your patch because it's more intuitive (offset[i] is
> the start of the emitted instructions for BPF instruction i, rather
> than the end), though the changes to the construction process are
> trickier.
> 

Well the arm64 was literally a 'save the idx before building the instruction',
and add another element on the array.  So it's not that trickier, especially
if we document it properly.

I haven't checked the rest of the architectures tbh (apart from x86). 
I assumed the tracking used in arm64 at that point, was a result of how 
eBPF worked before bounded loops were introduced. Maybe I was wrong.
It felt a bit more natural to track the beginning of the emitted 
instructions rather than the end.

> If we decide to patch the arm64 JIT the way you proposed, we should
> consider whether to change other JITs consistently.

I think this is a good idea. Following the code is not exactly a stroll in the
park, so we can at least make it consistent across architectures.

Thanks
/Ilias
diff mbox series

Patch

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f8912e45be7a..5891733a9f39 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -143,9 +143,13 @@  static inline void emit_addr_mov_i64(const int reg, const u64 val,
 	}
 }
 
-static inline int bpf2a64_offset(int bpf_to, int bpf_from,
+static inline int bpf2a64_offset(int bpf_insn, int off,
 				 const struct jit_ctx *ctx)
 {
+	/* arm64 offset is relative to the branch instruction */
+	int bpf_from = bpf_insn + 1;
+	/* BPF JMP offset is relative to the next instruction */
+	int bpf_to = bpf_insn + off + 1;
 	int to = ctx->offset[bpf_to];
 	/* -1 to account for the Branch instruction */
 	int from = ctx->offset[bpf_from] - 1;
@@ -642,7 +646,7 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 
 	/* JUMP off */
 	case BPF_JMP | BPF_JA:
-		jmp_offset = bpf2a64_offset(i + off, i, ctx);
+		jmp_offset = bpf2a64_offset(i, off, ctx);
 		check_imm26(jmp_offset);
 		emit(A64_B(jmp_offset), ctx);
 		break;
@@ -669,7 +673,7 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	case BPF_JMP32 | BPF_JSLE | BPF_X:
 		emit(A64_CMP(is64, dst, src), ctx);
 emit_cond_jmp:
-		jmp_offset = bpf2a64_offset(i + off, i, ctx);
+		jmp_offset = bpf2a64_offset(i, off, ctx);
 		check_imm19(jmp_offset);
 		switch (BPF_OP(code)) {
 		case BPF_JEQ:
@@ -912,18 +916,21 @@  static int build_body(struct jit_ctx *ctx, bool extra_pass)
 		const struct bpf_insn *insn = &prog->insnsi[i];
 		int ret;
 
+		if (ctx->image == NULL)
+			ctx->offset[i] = ctx->idx;
+
 		ret = build_insn(insn, ctx, extra_pass);
 		if (ret > 0) {
 			i++;
 			if (ctx->image == NULL)
-				ctx->offset[i] = ctx->idx;
+				ctx->offset[i] = ctx->offset[i - 1];
 			continue;
 		}
-		if (ctx->image == NULL)
-			ctx->offset[i] = ctx->idx;
 		if (ret)
 			return ret;
 	}
+	if (ctx->image == NULL)
+		ctx->offset[i] = ctx->idx;
 
 	return 0;
 }
@@ -1002,7 +1009,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	memset(&ctx, 0, sizeof(ctx));
 	ctx.prog = prog;
 
-	ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
+	ctx.offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL);
 	if (ctx.offset == NULL) {
 		prog = orig_prog;
 		goto out_off;
@@ -1089,7 +1096,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	prog->jited_len = prog_size;
 
 	if (!prog->is_func || extra_pass) {
-		bpf_prog_fill_jited_linfo(prog, ctx.offset);
+		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
 out_off:
 		kfree(ctx.offset);
 		kfree(jit_data);