@@ -1096,8 +1096,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx);
emit(ARM_ADD_I(tmp[1], r_array, off), ctx);
r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx);
- emit(ARM_MOV_SI(tmp[0], r_index, SRTYPE_ASL, 2), ctx);
- emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx);
+ emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx);
emit(ARM_CMP_I(tmp[1], 0), ctx);
_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
@@ -188,6 +188,10 @@
#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | ARM_INST_LDST__U \
| (rt) << 12 | (rn) << 16 \
| (rm))
+#define ARM_LDR_R_SI(rt, rn, rm, type, imm) \
+ (ARM_INST_LDR_R | ARM_INST_LDST__U \
+ | (rt) << 12 | (rn) << 16 \
+ | (imm) << 7 | (type) << 5 | (rm))
#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | ARM_INST_LDST__U \
| (rt) << 12 | (rn) << 16 \
| (rm))
Rather than pre-shifting the rm register for the ldr in the tail call, shift it in the load instruction. This eliminates one unnecessary instruction. Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> --- arch/arm/net/bpf_jit_32.c | 3 +-- arch/arm/net/bpf_jit_32.h | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-)