[v3,07/29] x86: bpf_jit, use ENTRY+ENDPROC

Submitted by Jiri Slaby on April 21, 2017, 2:12 p.m.

Details

Message ID 20170421141305.25180-7-jslaby@suse.cz
State Rejected
Delegated to: David Miller
Headers show

Commit Message

Jiri Slaby April 21, 2017, 2:12 p.m.
Do not use a custom macro FUNC for starts of the global functions, use
ENTRY instead.

And while at it, annotate also ends of the functions by ENDPROC.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: netdev@vger.kernel.org
---
 arch/x86/net/bpf_jit.S | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

Comments

Alexei Starovoitov April 21, 2017, 7:32 p.m.
On Fri, Apr 21, 2017 at 04:12:43PM +0200, Jiri Slaby wrote:
> Do not use a custom macro FUNC for starts of the global functions, use
> ENTRY instead.
> 
> And while at it, annotate also ends of the functions by ENDPROC.
> 
> Signed-off-by: Jiri Slaby <jslaby@suse.cz>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
> Cc: James Morris <jmorris@namei.org>
> Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
> Cc: Patrick McHardy <kaber@trash.net>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: x86@kernel.org
> Cc: netdev@vger.kernel.org
> ---
>  arch/x86/net/bpf_jit.S | 32 ++++++++++++++++++--------------
>  1 file changed, 18 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
> index f2a7faf4706e..762c29fb8832 100644
> --- a/arch/x86/net/bpf_jit.S
> +++ b/arch/x86/net/bpf_jit.S
> @@ -23,16 +23,12 @@
>  	32 /* space for rbx,r13,r14,r15 */ + \
>  	8 /* space for skb_copy_bits */)
>  
> -#define FUNC(name) \
> -	.globl name; \
> -	.type name, @function; \
> -	name:
> -
> -FUNC(sk_load_word)
> +ENTRY(sk_load_word)
>  	test	%esi,%esi
>  	js	bpf_slow_path_word_neg
> +ENDPROC(sk_load_word)

this doens't look right.
It will add alignment nops in critical paths of these pseudo functions.
I'm also not sure whether it will still work afterwards.
Was it tested?
I'd prefer if this code kept as-is.

> -FUNC(sk_load_word_positive_offset)
> +ENTRY(sk_load_word_positive_offset)
>  	mov	%r9d,%eax		# hlen
>  	sub	%esi,%eax		# hlen - offset
>  	cmp	$3,%eax
> @@ -40,12 +36,14 @@ FUNC(sk_load_word_positive_offset)
>  	mov     (SKBDATA,%rsi),%eax
>  	bswap   %eax  			/* ntohl() */
>  	ret
> +ENDPROC(sk_load_word_positive_offset)
>  
> -FUNC(sk_load_half)
> +ENTRY(sk_load_half)
>  	test	%esi,%esi
>  	js	bpf_slow_path_half_neg
> +ENDPROC(sk_load_half)
>  
> -FUNC(sk_load_half_positive_offset)
> +ENTRY(sk_load_half_positive_offset)
>  	mov	%r9d,%eax
>  	sub	%esi,%eax		#	hlen - offset
>  	cmp	$1,%eax
> @@ -53,16 +51,19 @@ FUNC(sk_load_half_positive_offset)
>  	movzwl	(SKBDATA,%rsi),%eax
>  	rol	$8,%ax			# ntohs()
>  	ret
> +ENDPROC(sk_load_half_positive_offset)
>  
> -FUNC(sk_load_byte)
> +ENTRY(sk_load_byte)
>  	test	%esi,%esi
>  	js	bpf_slow_path_byte_neg
> +ENDPROC(sk_load_byte)
>  
> -FUNC(sk_load_byte_positive_offset)
> +ENTRY(sk_load_byte_positive_offset)
>  	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
>  	jle	bpf_slow_path_byte
>  	movzbl	(SKBDATA,%rsi),%eax
>  	ret
> +ENDPROC(sk_load_byte_positive_offset)
>  
>  /* rsi contains offset and can be scratched */
>  #define bpf_slow_path_common(LEN)		\
> @@ -119,31 +120,34 @@ bpf_slow_path_word_neg:
>  	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
>  	jl	bpf_error	/* offset lower -> error  */
>  
> -FUNC(sk_load_word_negative_offset)
> +ENTRY(sk_load_word_negative_offset)
>  	sk_negative_common(4)
>  	mov	(%rax), %eax
>  	bswap	%eax
>  	ret
> +ENDPROC(sk_load_word_negative_offset)
>  
>  bpf_slow_path_half_neg:
>  	cmp	SKF_MAX_NEG_OFF, %esi
>  	jl	bpf_error
>  
> -FUNC(sk_load_half_negative_offset)
> +ENTRY(sk_load_half_negative_offset)
>  	sk_negative_common(2)
>  	mov	(%rax),%ax
>  	rol	$8,%ax
>  	movzwl	%ax,%eax
>  	ret
> +ENDPROC(sk_load_half_negative_offset)
>  
>  bpf_slow_path_byte_neg:
>  	cmp	SKF_MAX_NEG_OFF, %esi
>  	jl	bpf_error
>  
> -FUNC(sk_load_byte_negative_offset)
> +ENTRY(sk_load_byte_negative_offset)
>  	sk_negative_common(1)
>  	movzbl	(%rax), %eax
>  	ret
> +ENDPROC(sk_load_byte_negative_offset)
>  
>  bpf_error:
>  # force a return 0 from jit handler
> -- 
> 2.12.2
>
Jiri Slaby April 24, 2017, 6:45 a.m.
On 04/21/2017, 09:32 PM, Alexei Starovoitov wrote:
> On Fri, Apr 21, 2017 at 04:12:43PM +0200, Jiri Slaby wrote:
>> Do not use a custom macro FUNC for starts of the global functions, use
>> ENTRY instead.
>>
>> And while at it, annotate also ends of the functions by ENDPROC.
>>
>> Signed-off-by: Jiri Slaby <jslaby@suse.cz>
>> Cc: "David S. Miller" <davem@davemloft.net>
>> Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
>> Cc: James Morris <jmorris@namei.org>
>> Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
>> Cc: Patrick McHardy <kaber@trash.net>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: "H. Peter Anvin" <hpa@zytor.com>
>> Cc: x86@kernel.org
>> Cc: netdev@vger.kernel.org
>> ---
>>  arch/x86/net/bpf_jit.S | 32 ++++++++++++++++++--------------
>>  1 file changed, 18 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
>> index f2a7faf4706e..762c29fb8832 100644
>> --- a/arch/x86/net/bpf_jit.S
>> +++ b/arch/x86/net/bpf_jit.S
>> @@ -23,16 +23,12 @@
>>  	32 /* space for rbx,r13,r14,r15 */ + \
>>  	8 /* space for skb_copy_bits */)
>>  
>> -#define FUNC(name) \
>> -	.globl name; \
>> -	.type name, @function; \
>> -	name:
>> -
>> -FUNC(sk_load_word)
>> +ENTRY(sk_load_word)
>>  	test	%esi,%esi
>>  	js	bpf_slow_path_word_neg
>> +ENDPROC(sk_load_word)
> 
> this doens't look right.
> It will add alignment nops in critical paths of these pseudo functions.
> I'm also not sure whether it will still work afterwards.
> Was it tested?
> I'd prefer if this code kept as-is.

It cannot stay as-is simply because we want to know where the functions
end to inject debuginfo properly. The code above does not warrant for
any exception.

Executing a nop takes a little and having externally-callable functions
aligned can actually help performance (no, I haven't measured nor tested
the code). But sure, the tool is generic, so I can introduce a local
macros to avoid alignments in the functions:

#define BPF_FUNC_START_LOCAL(name) \
		SYM_START(name, SYM_V_LOCAL, SYM_A_NONE)
#define BPF_FUNC_START(name) \
		SYM_START(name, SYM_V_GLOBAL, SYM_A_NONE)

#define BPF_FUNC_END(name) SYM_FUNC_END(name)

thanks,
David Miller April 24, 2017, 2:41 p.m.
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 24 Apr 2017 08:45:11 +0200

> On 04/21/2017, 09:32 PM, Alexei Starovoitov wrote:
>> On Fri, Apr 21, 2017 at 04:12:43PM +0200, Jiri Slaby wrote:
>>> Do not use a custom macro FUNC for starts of the global functions, use
>>> ENTRY instead.
>>>
>>> And while at it, annotate also ends of the functions by ENDPROC.
>>>
>>> Signed-off-by: Jiri Slaby <jslaby@suse.cz>
>>> Cc: "David S. Miller" <davem@davemloft.net>
>>> Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
>>> Cc: James Morris <jmorris@namei.org>
>>> Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
>>> Cc: Patrick McHardy <kaber@trash.net>
>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>> Cc: Ingo Molnar <mingo@redhat.com>
>>> Cc: "H. Peter Anvin" <hpa@zytor.com>
>>> Cc: x86@kernel.org
>>> Cc: netdev@vger.kernel.org
>>> ---
>>>  arch/x86/net/bpf_jit.S | 32 ++++++++++++++++++--------------
>>>  1 file changed, 18 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
>>> index f2a7faf4706e..762c29fb8832 100644
>>> --- a/arch/x86/net/bpf_jit.S
>>> +++ b/arch/x86/net/bpf_jit.S
>>> @@ -23,16 +23,12 @@
>>>  	32 /* space for rbx,r13,r14,r15 */ + \
>>>  	8 /* space for skb_copy_bits */)
>>>  
>>> -#define FUNC(name) \
>>> -	.globl name; \
>>> -	.type name, @function; \
>>> -	name:
>>> -
>>> -FUNC(sk_load_word)
>>> +ENTRY(sk_load_word)
>>>  	test	%esi,%esi
>>>  	js	bpf_slow_path_word_neg
>>> +ENDPROC(sk_load_word)
>> 
>> this doens't look right.
>> It will add alignment nops in critical paths of these pseudo functions.
>> I'm also not sure whether it will still work afterwards.
>> Was it tested?
>> I'd prefer if this code kept as-is.
> 
> It cannot stay as-is simply because we want to know where the functions
> end to inject debuginfo properly. The code above does not warrant for
> any exception.

I totally and completely disagree.

> Executing a nop takes a little and having externally-callable functions
> aligned can actually help performance (no, I haven't measured nor tested
> the code). But sure, the tool is generic, so I can introduce a local
> macros to avoid alignments in the functions:

Not for this case, it's a bunch of entry points all packed together
intentionally so that SKB accesses of different access sizes (which is
almost always the case) from BPF programs use the smallest amount of
I-cache as possible.
Jiri Slaby April 24, 2017, 2:52 p.m.
On 04/24/2017, 04:41 PM, David Miller wrote:
>> It cannot stay as-is simply because we want to know where the functions
>> end to inject debuginfo properly. The code above does not warrant for
>> any exception.
> 
> I totally and completely disagree.

You can disagree as you wish but there is really nothing special on the
bpf code with respect to annotations.

>> Executing a nop takes a little and having externally-callable functions
>> aligned can actually help performance (no, I haven't measured nor tested
>> the code). But sure, the tool is generic, so I can introduce a local
>> macros to avoid alignments in the functions:
> 
> Not for this case, it's a bunch of entry points all packed together
> intentionally so that SKB accesses of different access sizes (which is
> almost always the case) from BPF programs use the smallest amount of
> I-cache as possible.

And for that reason I suggested the special macros for the code (see the
macros in the e-mail you replied to again). So what problem do you
actually have with the suggested solution?

thanks,
David Miller April 24, 2017, 3:08 p.m.
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 24 Apr 2017 16:52:43 +0200

> On 04/24/2017, 04:41 PM, David Miller wrote:
>>> It cannot stay as-is simply because we want to know where the functions
>>> end to inject debuginfo properly. The code above does not warrant for
>>> any exception.
>> 
>> I totally and completely disagree.
> 
> You can disagree as you wish but there is really nothing special on the
> bpf code with respect to annotations.
> 
>>> Executing a nop takes a little and having externally-callable functions
>>> aligned can actually help performance (no, I haven't measured nor tested
>>> the code). But sure, the tool is generic, so I can introduce a local
>>> macros to avoid alignments in the functions:
>> 
>> Not for this case, it's a bunch of entry points all packed together
>> intentionally so that SKB accesses of different access sizes (which is
>> almost always the case) from BPF programs use the smallest amount of
>> I-cache as possible.
> 
> And for that reason I suggested the special macros for the code (see the
> macros in the e-mail you replied to again). So what problem do you
> actually have with the suggested solution?

If you align the entry points, then the code sequence as a whole is
are no longer densely packed.

Or do I misunderstand how your macros work?
Jiri Slaby April 24, 2017, 3:41 p.m.
On 04/24/2017, 05:08 PM, David Miller wrote:
> If you align the entry points, then the code sequence as a whole is
> are no longer densely packed.

Sure.

> Or do I misunderstand how your macros work?

Perhaps. So the suggested macros for the code are:
#define BPF_FUNC_START_LOCAL(name) \
		SYM_START(name, SYM_V_LOCAL, SYM_A_NONE)
#define BPF_FUNC_START(name) \
		SYM_START(name, SYM_V_GLOBAL, SYM_A_NONE)

and they differ from the standard ones:
#define SYM_FUNC_START_LOCAL(name)                      \
        SYM_START(name, SYM_V_LOCAL, SYM_A_ALIGN)
#define SYM_FUNC_START(name)                            \
        SYM_START(name, SYM_V_GLOBAL, SYM_A_ALIGN)


The difference is SYM_A_NONE vs. SYM_A_ALIGN, which means:
#define SYM_A_ALIGN                             ALIGN
#define SYM_A_NONE                              /* nothing */

Does it look OK now?

thanks,
David Miller April 24, 2017, 3:51 p.m.
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 24 Apr 2017 17:41:06 +0200

> On 04/24/2017, 05:08 PM, David Miller wrote:
>> If you align the entry points, then the code sequence as a whole is
>> are no longer densely packed.
> 
> Sure.
> 
>> Or do I misunderstand how your macros work?
> 
> Perhaps. So the suggested macros for the code are:
> #define BPF_FUNC_START_LOCAL(name) \
> 		SYM_START(name, SYM_V_LOCAL, SYM_A_NONE)
> #define BPF_FUNC_START(name) \
> 		SYM_START(name, SYM_V_GLOBAL, SYM_A_NONE)
> 
> and they differ from the standard ones:
> #define SYM_FUNC_START_LOCAL(name)                      \
>         SYM_START(name, SYM_V_LOCAL, SYM_A_ALIGN)
> #define SYM_FUNC_START(name)                            \
>         SYM_START(name, SYM_V_GLOBAL, SYM_A_ALIGN)
> 
> 
> The difference is SYM_A_NONE vs. SYM_A_ALIGN, which means:
> #define SYM_A_ALIGN                             ALIGN
> #define SYM_A_NONE                              /* nothing */
> 
> Does it look OK now?

I said I'm not OK with the alignment, so personally I am not
with how these macros work and what they will do to the code
generated for BPF packet accesses.

But I'll defer to Alexei on this because I don't have the time
nor the energy to fight this.

Thanks.
Jiri Slaby April 24, 2017, 3:53 p.m.
On 04/24/2017, 05:51 PM, David Miller wrote:
> I said I'm not OK with the alignment

So in short, the suggested macros add no alignment.
Ingo Molnar April 24, 2017, 3:55 p.m.
* Jiri Slaby <jslaby@suse.cz> wrote:

> On 04/24/2017, 05:08 PM, David Miller wrote:
> > If you align the entry points, then the code sequence as a whole is
> > are no longer densely packed.
> 
> Sure.
> 
> > Or do I misunderstand how your macros work?
> 
> Perhaps. So the suggested macros for the code are:
> #define BPF_FUNC_START_LOCAL(name) \
> 		SYM_START(name, SYM_V_LOCAL, SYM_A_NONE)
> #define BPF_FUNC_START(name) \
> 		SYM_START(name, SYM_V_GLOBAL, SYM_A_NONE)
> 
> and they differ from the standard ones:
> #define SYM_FUNC_START_LOCAL(name)                      \
>         SYM_START(name, SYM_V_LOCAL, SYM_A_ALIGN)
> #define SYM_FUNC_START(name)                            \
>         SYM_START(name, SYM_V_GLOBAL, SYM_A_ALIGN)
> 
> 
> The difference is SYM_A_NONE vs. SYM_A_ALIGN, which means:
> #define SYM_A_ALIGN                             ALIGN
> #define SYM_A_NONE                              /* nothing */
> 
> Does it look OK now?

No, the patch changes alignment which is undesirable, it needs to preserve the 
existing (non-)alignment of the symbols!

Thanks,

	Ingo
Jiri Slaby April 24, 2017, 4:02 p.m.
On 04/24/2017, 05:55 PM, Ingo Molnar wrote:
> * Jiri Slaby <jslaby@suse.cz> wrote:
> 
>> On 04/24/2017, 05:08 PM, David Miller wrote:
>>> If you align the entry points, then the code sequence as a whole is
>>> are no longer densely packed.
>>
>> Sure.
>>
>>> Or do I misunderstand how your macros work?
>>
>> Perhaps. So the suggested macros for the code are:
>> #define BPF_FUNC_START_LOCAL(name) \
>> 		SYM_START(name, SYM_V_LOCAL, SYM_A_NONE)
>> #define BPF_FUNC_START(name) \
>> 		SYM_START(name, SYM_V_GLOBAL, SYM_A_NONE)
>>
>> and they differ from the standard ones:
>> #define SYM_FUNC_START_LOCAL(name)                      \
>>         SYM_START(name, SYM_V_LOCAL, SYM_A_ALIGN)
>> #define SYM_FUNC_START(name)                            \
>>         SYM_START(name, SYM_V_GLOBAL, SYM_A_ALIGN)
>>
>>
>> The difference is SYM_A_NONE vs. SYM_A_ALIGN, which means:
>> #define SYM_A_ALIGN                             ALIGN
>> #define SYM_A_NONE                              /* nothing */
>>
>> Does it look OK now?
> 
> No, the patch changes alignment which is undesirable, it needs to preserve the 
> existing (non-)alignment of the symbols!

OK, so I am not expressing myself explicitly enough, it seems.

So, correct, the patch v3 adds alignments. I suggested in the discussion
the macros above. They do not add alignments. If everybody is OK with
that, v4 of the patch won't add alignments. OK?

thanks,
Ingo Molnar April 24, 2017, 4:40 p.m.
* Jiri Slaby <jslaby@suse.cz> wrote:

> On 04/24/2017, 05:55 PM, Ingo Molnar wrote:
> > * Jiri Slaby <jslaby@suse.cz> wrote:
> > 
> >> On 04/24/2017, 05:08 PM, David Miller wrote:
> >>> If you align the entry points, then the code sequence as a whole is
> >>> are no longer densely packed.
> >>
> >> Sure.
> >>
> >>> Or do I misunderstand how your macros work?
> >>
> >> Perhaps. So the suggested macros for the code are:
> >> #define BPF_FUNC_START_LOCAL(name) \
> >> 		SYM_START(name, SYM_V_LOCAL, SYM_A_NONE)
> >> #define BPF_FUNC_START(name) \
> >> 		SYM_START(name, SYM_V_GLOBAL, SYM_A_NONE)
> >>
> >> and they differ from the standard ones:
> >> #define SYM_FUNC_START_LOCAL(name)                      \
> >>         SYM_START(name, SYM_V_LOCAL, SYM_A_ALIGN)
> >> #define SYM_FUNC_START(name)                            \
> >>         SYM_START(name, SYM_V_GLOBAL, SYM_A_ALIGN)
> >>
> >>
> >> The difference is SYM_A_NONE vs. SYM_A_ALIGN, which means:
> >> #define SYM_A_ALIGN                             ALIGN
> >> #define SYM_A_NONE                              /* nothing */
> >>
> >> Does it look OK now?
> > 
> > No, the patch changes alignment which is undesirable, it needs to preserve the 
> > existing (non-)alignment of the symbols!
> 
> OK, so I am not expressing myself explicitly enough, it seems.
> 
> So, correct, the patch v3 adds alignments. I suggested in the discussion
> the macros above. They do not add alignments. If everybody is OK with
> that, v4 of the patch won't add alignments. OK?

Yes.

Thanks,

	Ingo
Alexei Starovoitov April 24, 2017, 4:47 p.m.
On Mon, Apr 24, 2017 at 06:02:51PM +0200, Jiri Slaby wrote:
> On 04/24/2017, 05:55 PM, Ingo Molnar wrote:
> > * Jiri Slaby <jslaby@suse.cz> wrote:
> > 
> >> On 04/24/2017, 05:08 PM, David Miller wrote:
> >>> If you align the entry points, then the code sequence as a whole is
> >>> are no longer densely packed.
> >>
> >> Sure.
> >>
> >>> Or do I misunderstand how your macros work?
> >>
> >> Perhaps. So the suggested macros for the code are:
> >> #define BPF_FUNC_START_LOCAL(name) \
> >> 		SYM_START(name, SYM_V_LOCAL, SYM_A_NONE)
> >> #define BPF_FUNC_START(name) \
> >> 		SYM_START(name, SYM_V_GLOBAL, SYM_A_NONE)
> >>
> >> and they differ from the standard ones:
> >> #define SYM_FUNC_START_LOCAL(name)                      \
> >>         SYM_START(name, SYM_V_LOCAL, SYM_A_ALIGN)
> >> #define SYM_FUNC_START(name)                            \
> >>         SYM_START(name, SYM_V_GLOBAL, SYM_A_ALIGN)
> >>
> >>
> >> The difference is SYM_A_NONE vs. SYM_A_ALIGN, which means:
> >> #define SYM_A_ALIGN                             ALIGN
> >> #define SYM_A_NONE                              /* nothing */
> >>
> >> Does it look OK now?
> > 
> > No, the patch changes alignment which is undesirable, it needs to preserve the 
> > existing (non-)alignment of the symbols!
> 
> OK, so I am not expressing myself explicitly enough, it seems.
> 
> So, correct, the patch v3 adds alignments. I suggested in the discussion
> the macros above. They do not add alignments. If everybody is OK with
> that, v4 of the patch won't add alignments. OK?

can we go back to what problem this patch set is trying to solve?
Sounds like you want to add _function_ start/end marks to aid debugging?
Debugging with what? What tool will recognize this stuff?

Take a look at what your patch does:
+ENTRY(sk_load_word)
        test    %esi,%esi
        js      bpf_slow_path_word_neg
+ENDPROC(sk_load_word)

Does above two assembler instructions look like a function?

or this:
+ENTRY(sk_load_byte_positive_offset)
        cmp     %esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
        jle     bpf_slow_path_byte
        movzbl  (SKBDATA,%rsi),%eax
        ret
+ENDPROC(sk_load_byte_positive_offset)

This assembler code doesn't represent functions. There is no prologue/epilogue
and no stack frame. JITed code uses 'call' insn to jump into them, but they're
not your typical C functions.
Take a look at bpf_slow_path_common() macro that creates the frame before
calling into C code with 'call skb_copy_bits;'

I still think that this code should be left alone.
Even macro names you're proposing:
 #define BPF_FUNC_START_LOCAL
don't sound right. These are not functions.
Jiri Slaby April 24, 2017, 5:51 p.m.
On 04/24/2017, 06:47 PM, Alexei Starovoitov wrote:
> On Mon, Apr 24, 2017 at 06:02:51PM +0200, Jiri Slaby wrote:
>> On 04/24/2017, 05:55 PM, Ingo Molnar wrote:
>>> * Jiri Slaby <jslaby@suse.cz> wrote:
>>>
>>>> On 04/24/2017, 05:08 PM, David Miller wrote:
>>>>> If you align the entry points, then the code sequence as a whole is
>>>>> are no longer densely packed.
>>>>
>>>> Sure.
>>>>
>>>>> Or do I misunderstand how your macros work?
>>>>
>>>> Perhaps. So the suggested macros for the code are:
>>>> #define BPF_FUNC_START_LOCAL(name) \
>>>> 		SYM_START(name, SYM_V_LOCAL, SYM_A_NONE)
>>>> #define BPF_FUNC_START(name) \
>>>> 		SYM_START(name, SYM_V_GLOBAL, SYM_A_NONE)
>>>>
>>>> and they differ from the standard ones:
>>>> #define SYM_FUNC_START_LOCAL(name)                      \
>>>>         SYM_START(name, SYM_V_LOCAL, SYM_A_ALIGN)
>>>> #define SYM_FUNC_START(name)                            \
>>>>         SYM_START(name, SYM_V_GLOBAL, SYM_A_ALIGN)
>>>>
>>>>
>>>> The difference is SYM_A_NONE vs. SYM_A_ALIGN, which means:
>>>> #define SYM_A_ALIGN                             ALIGN
>>>> #define SYM_A_NONE                              /* nothing */
>>>>
>>>> Does it look OK now?
>>>
>>> No, the patch changes alignment which is undesirable, it needs to preserve the 
>>> existing (non-)alignment of the symbols!
>>
>> OK, so I am not expressing myself explicitly enough, it seems.
>>
>> So, correct, the patch v3 adds alignments. I suggested in the discussion
>> the macros above. They do not add alignments. If everybody is OK with
>> that, v4 of the patch won't add alignments. OK?
> 
> can we go back to what problem this patch set is trying to solve?
> Sounds like you want to add _function_ start/end marks to aid debugging?
> Debugging with what? What tool will recognize this stuff?

objtool will generate DWARF debuginfo between every ENTRY and ENDPROC
(dubbed differently at the end of the series). DWARF is understood by
everything, including the kernel proper (we have DWARF unwinder in
SUSE's kernels available for decades).

> Take a look at what your patch does:
> +ENTRY(sk_load_word)
>         test    %esi,%esi
>         js      bpf_slow_path_word_neg
> +ENDPROC(sk_load_word)
> 
> Does above two assembler instructions look like a function?

Yes, you are right, the code is complete mess.

For example what's the point of making the sk_load_word_positive_offset
label a global, callable function? Note that this is exactly the reason
why this particular two hunks look weird to you even though the
annotations only mechanically paraphrase what is in the current code.

> or this:
> +ENTRY(sk_load_byte_positive_offset)
>         cmp     %esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
>         jle     bpf_slow_path_byte
>         movzbl  (SKBDATA,%rsi),%eax
>         ret
> +ENDPROC(sk_load_byte_positive_offset)
> 
> This assembler code doesn't represent functions. There is no prologue/epilogue
> and no stack frame. JITed code uses 'call' insn to jump into them, but they're
> not your typical C functions.

I am not looking for C functions everywhere in assembly, actually. (In
the ideal assembly, I would and in most cases I really am.) But you are
right the annotations of the current code aren't right. It results in my
annotations being wrong too -- based on invalid assumptions.

> Take a look at bpf_slow_path_common() macro that creates the frame before
> calling into C code with 'call skb_copy_bits;'
> 
> I still think that this code should be left alone.
> Even macro names you're proposing:
>  #define BPF_FUNC_START_LOCAL
> don't sound right. These are not functions.

Well, what is the reason to call them FUNC in the current code then?

thanks,
David Miller April 24, 2017, 6:24 p.m.
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 24 Apr 2017 19:51:54 +0200

> For example what's the point of making the sk_load_word_positive_offset
> label a global, callable function? Note that this is exactly the reason
> why this particular two hunks look weird to you even though the
> annotations only mechanically paraphrase what is in the current code.

So that it can be referenced by the eBPF JIT, because these are
helpers for eBPF JIT generated code.  Every architecture implementing
an eBPF JIT has this "mess".

You can't even put a tracepoint or kprobe on these things and expect
to see "arguments" or "return PC" values in the usual spots.  This
code has special calling conventions and register usage as Alexei
explained.

I would suggest that you read and understand how this assembler is
designed, how it is called from the generated JIT code, and what it's
semantics and register usage are, before trying to annotating it.

Thank you.
Jiri Slaby April 25, 2017, 2:41 p.m.
On 04/24/2017, 08:24 PM, David Miller wrote:
> From: Jiri Slaby <jslaby@suse.cz>
> Date: Mon, 24 Apr 2017 19:51:54 +0200
> 
>> For example what's the point of making the sk_load_word_positive_offset
>> label a global, callable function? Note that this is exactly the reason
>> why this particular two hunks look weird to you even though the
>> annotations only mechanically paraphrase what is in the current code.
> 
> So that it can be referenced by the eBPF JIT, because these are
> helpers for eBPF JIT generated code.  Every architecture implementing
> an eBPF JIT has this "mess".

I completely understand the needs for this, but I am complaining about
the way it is written. That is not the best -- unbalanced annotations, C
macros in lowercase (apart from that, C macros in .S need semicolons &
backslashes), FUNC macro, etc.

> You can't even put a tracepoint or kprobe on these things and expect
> to see "arguments" or "return PC" values in the usual spots.  This
> code has special calling conventions and register usage as Alexei
> explained.

Yes, I can see that.

> I would suggest that you read and understand how this assembler is
> designed, how it is called from the generated JIT code, and what it's
> semantics and register usage are, before trying to annotating it.

Of course I studied the code. I only missed macro CHOOSE_LOAD_FUNC which
I see now. So that answers why sk_load_word_positive_offset & similar
are marked as .globl.


But the original question I asked still remains: why do you mind calling
them BPF_FUNC_START & *_END, given:

1) the functions are marked by "FUNC" already:
$ git grep FUNC linus/master arch/x86/net/bpf_jit.S
linus/master:arch/x86/net/bpf_jit.S:#define FUNC(name) \
linus/master:arch/x86/net/bpf_jit.S:FUNC(sk_load_word)
linus/master:arch/x86/net/bpf_jit.S:FUNC(sk_load_word_positive_offset)
linus/master:arch/x86/net/bpf_jit.S:FUNC(sk_load_half)
linus/master:arch/x86/net/bpf_jit.S:FUNC(sk_load_half_positive_offset)
linus/master:arch/x86/net/bpf_jit.S:FUNC(sk_load_byte)
linus/master:arch/x86/net/bpf_jit.S:FUNC(sk_load_byte_positive_offset)
linus/master:arch/x86/net/bpf_jit.S:FUNC(sk_load_word_negative_offset)
linus/master:arch/x86/net/bpf_jit.S:FUNC(sk_load_half_negative_offset)
linus/master:arch/x86/net/bpf_jit.S:FUNC(sk_load_byte_negative_offset)

2) they _are_ all callable from within the JIT code:
EMIT1_off32(0xE8, jmp_offset);

Yes, I fucked up the ENDs. They should be on different locations. But
the pieces are still functions from my POV and should be annotated
accordingly.

thanks,

Patch hide | download patch | download mbox

diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index f2a7faf4706e..762c29fb8832 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -23,16 +23,12 @@ 
 	32 /* space for rbx,r13,r14,r15 */ + \
 	8 /* space for skb_copy_bits */)
 
-#define FUNC(name) \
-	.globl name; \
-	.type name, @function; \
-	name:
-
-FUNC(sk_load_word)
+ENTRY(sk_load_word)
 	test	%esi,%esi
 	js	bpf_slow_path_word_neg
+ENDPROC(sk_load_word)
 
-FUNC(sk_load_word_positive_offset)
+ENTRY(sk_load_word_positive_offset)
 	mov	%r9d,%eax		# hlen
 	sub	%esi,%eax		# hlen - offset
 	cmp	$3,%eax
@@ -40,12 +36,14 @@  FUNC(sk_load_word_positive_offset)
 	mov     (SKBDATA,%rsi),%eax
 	bswap   %eax  			/* ntohl() */
 	ret
+ENDPROC(sk_load_word_positive_offset)
 
-FUNC(sk_load_half)
+ENTRY(sk_load_half)
 	test	%esi,%esi
 	js	bpf_slow_path_half_neg
+ENDPROC(sk_load_half)
 
-FUNC(sk_load_half_positive_offset)
+ENTRY(sk_load_half_positive_offset)
 	mov	%r9d,%eax
 	sub	%esi,%eax		#	hlen - offset
 	cmp	$1,%eax
@@ -53,16 +51,19 @@  FUNC(sk_load_half_positive_offset)
 	movzwl	(SKBDATA,%rsi),%eax
 	rol	$8,%ax			# ntohs()
 	ret
+ENDPROC(sk_load_half_positive_offset)
 
-FUNC(sk_load_byte)
+ENTRY(sk_load_byte)
 	test	%esi,%esi
 	js	bpf_slow_path_byte_neg
+ENDPROC(sk_load_byte)
 
-FUNC(sk_load_byte_positive_offset)
+ENTRY(sk_load_byte_positive_offset)
 	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
 	jle	bpf_slow_path_byte
 	movzbl	(SKBDATA,%rsi),%eax
 	ret
+ENDPROC(sk_load_byte_positive_offset)
 
 /* rsi contains offset and can be scratched */
 #define bpf_slow_path_common(LEN)		\
@@ -119,31 +120,34 @@  bpf_slow_path_word_neg:
 	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
 	jl	bpf_error	/* offset lower -> error  */
 
-FUNC(sk_load_word_negative_offset)
+ENTRY(sk_load_word_negative_offset)
 	sk_negative_common(4)
 	mov	(%rax), %eax
 	bswap	%eax
 	ret
+ENDPROC(sk_load_word_negative_offset)
 
 bpf_slow_path_half_neg:
 	cmp	SKF_MAX_NEG_OFF, %esi
 	jl	bpf_error
 
-FUNC(sk_load_half_negative_offset)
+ENTRY(sk_load_half_negative_offset)
 	sk_negative_common(2)
 	mov	(%rax),%ax
 	rol	$8,%ax
 	movzwl	%ax,%eax
 	ret
+ENDPROC(sk_load_half_negative_offset)
 
 bpf_slow_path_byte_neg:
 	cmp	SKF_MAX_NEG_OFF, %esi
 	jl	bpf_error
 
-FUNC(sk_load_byte_negative_offset)
+ENTRY(sk_load_byte_negative_offset)
 	sk_negative_common(1)
 	movzbl	(%rax), %eax
 	ret
+ENDPROC(sk_load_byte_negative_offset)
 
 bpf_error:
 # force a return 0 from jit handler