diff mbox series

[bpf-next,v5,6/6] bpf, x86: align dispatcher branch targets to 16B

Message ID 20191213175112.30208-7-bjorn.topel@gmail.com
State Accepted
Delegated to: BPF Maintainers
Headers show
Series Introduce the BPF dispatcher | expand

Commit Message

Björn Töpel Dec. 13, 2019, 5:51 p.m. UTC
From: Björn Töpel <bjorn.topel@intel.com>

From Intel 64 and IA-32 Architectures Optimization Reference Manual,
3.4.1.4 Code Alignment, Assembly/Compiler Coding Rule 11: All branch
targets should be 16-byte aligned.

This commits aligns branch targets according to the Intel manual.

The nops used to align branch targets make the dispatcher larger, and
therefore the number of supported dispatch points/programs are
descreased from 64 to 48.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 arch/x86/net/bpf_jit_comp.c | 30 +++++++++++++++++++++++++++++-
 include/linux/bpf.h         |  2 +-
 2 files changed, 30 insertions(+), 2 deletions(-)

Comments

David Laight Dec. 16, 2019, 4:45 p.m. UTC | #1
From: Björn Töpel
> Sent: 13 December 2019 17:51
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> From Intel 64 and IA-32 Architectures Optimization Reference Manual,
> 3.4.1.4 Code Alignment, Assembly/Compiler Coding Rule 11: All branch
> targets should be 16-byte aligned.
> 
> This commits aligns branch targets according to the Intel manual.

I'd Ignore that advice....
It makes very little difference, and none at all on more recent cpu.
Read https://www.agner.org/optimize/microarchitecture.pdf
The extra cache footprint probably makes a bigger difference.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 3ce7ad41bd6f..4c8a2d1f8470 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1548,6 +1548,26 @@  static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
 	return 0;
 }
 
+static void emit_nops(u8 **pprog, unsigned int len)
+{
+	unsigned int i, noplen;
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	while (len > 0) {
+		noplen = len;
+
+		if (noplen > ASM_NOP_MAX)
+			noplen = ASM_NOP_MAX;
+
+		for (i = 0; i < noplen; i++)
+			EMIT1(ideal_nops[noplen][i]);
+		len -= noplen;
+	}
+
+	*pprog = prog;
+}
+
 static int emit_fallback_jump(u8 **pprog)
 {
 	u8 *prog = *pprog;
@@ -1570,8 +1590,8 @@  static int emit_fallback_jump(u8 **pprog)
 
 static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
 {
+	u8 *jg_reloc, *jg_target, *prog = *pprog;
 	int pivot, err, jg_bytes = 1, cnt = 0;
-	u8 *jg_reloc, *prog = *pprog;
 	s64 jg_offset;
 
 	if (a == b) {
@@ -1620,6 +1640,14 @@  static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
 	if (err)
 		return err;
 
+	/* From Intel 64 and IA-32 Architectures Optimization
+	 * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
+	 * Coding Rule 11: All branch targets should be 16-byte
+	 * aligned.
+	 */
+	jg_target = PTR_ALIGN(prog, 16);
+	if (jg_target != prog)
+		emit_nops(&prog, jg_target - prog);
 	jg_offset = prog - jg_reloc;
 	emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5970989b99d1..d467983e61bb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -471,7 +471,7 @@  struct bpf_trampoline {
 	u64 selector;
 };
 
-#define BPF_DISPATCHER_MAX 64 /* Fits in 2048B */
+#define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
 
 struct bpf_dispatcher_prog {
 	struct bpf_prog *prog;