diff mbox series

um: vector: fix BPF loading in vector drivers

Message ID 20191128174405.4244-1-anton.ivanov@cambridgegreys.com
State Rejected
Headers show
Series um: vector: fix BPF loading in vector drivers | expand

Commit Message

Anton Ivanov Nov. 28, 2019, 5:44 p.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

This fixes a possible hang in bpf firmware loading in the
UML vector io drivers due to use of GFP_KERNEL while holding
a spinlock.

Based on a prposed fix by weiyongjun1@huawei.com and suggestions for
improving it by dan.carpenter@oracle.com

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/drivers/vector_kern.c | 38 ++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 18 deletions(-)

Comments

Daniel Borkmann Nov. 29, 2019, 9:15 a.m. UTC | #1
On 11/28/19 6:44 PM, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> This fixes a possible hang in bpf firmware loading in the
> UML vector io drivers due to use of GFP_KERNEL while holding
> a spinlock.
> 
> Based on a prposed fix by weiyongjun1@huawei.com and suggestions for
> improving it by dan.carpenter@oracle.com
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Any reason why this BPF firmware loading mechanism in UML vector driver that was
recently added [0] is plain old classic BPF? Quoting your commit log [0]:

   All vector drivers now allow a BPF program to be loaded and
   associated with the RX socket in the host kernel.

   1. The program can be loaded as an extra kernel command line
   option to any of the vector drivers.

   2. The program can also be loaded as "firmware", using the
   ethtool flash option. It is possible to turn this facility
   on or off using a command line option.

   A simplistic wrapper for generating the BPF firmware for the raw
   socket driver out of a tcpdump/libpcap filter expression can be
   found at: https://github.com/kot-begemot-uk/uml_vector_utilities/

... it tells what it does but /nothing/ about the original rationale / use case
why it is needed. So what is the use case? And why is this only classic BPF? Is
there any discussion to read up that lead you to this decision of only implementing
handling for classic BPF?

I'm asking because classic BPF is /legacy/ stuff that is on feature freeze and
only very limited in terms of functionality compared to native (e)BPF which is
why you need this weird 'firmware' loader [1] which wraps around tcpdump to
parse the -ddd output into BPF insns ...

Thanks,
Daniel

   [0] https://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git/commit/?h=linux-next&id=9807019a62dc670c73ce8e59e09b41ae458c34b3
   [1] https://github.com/kot-begemot-uk/uml_vector_utilities/blob/master/build_bpf_firmware.py

>   arch/um/drivers/vector_kern.c | 38 ++++++++++++++++++-----------------
>   1 file changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
> index 92617e16829e..dbbc6e850fdd 100644
> --- a/arch/um/drivers/vector_kern.c
> +++ b/arch/um/drivers/vector_kern.c
> @@ -1387,6 +1387,7 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
>   	struct vector_private *vp = netdev_priv(dev);
>   	struct vector_device *vdevice;
>   	const struct firmware *fw;
> +	void *new_filter;
>   	int result = 0;
>   
>   	if (!(vp->options & VECTOR_BPF_FLASH)) {
> @@ -1394,6 +1395,15 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
>   		return -1;
>   	}
>   
> +	vdevice = find_device(vp->unit);
> +
> +	if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
> +		return -1;
> +
> +	new_filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
> +	if (!new_filter)
> +		goto free_buffer;
> +
>   	spin_lock(&vp->lock);
>   
>   	if (vp->bpf != NULL) {
> @@ -1402,41 +1412,33 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
>   		kfree(vp->bpf->filter);
>   		vp->bpf->filter = NULL;
>   	} else {
> -		vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
> +		vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC);
>   		if (vp->bpf == NULL) {
>   			netdev_err(dev, "failed to allocate memory for firmware\n");
> -			goto flash_fail;
> +			goto apply_flash_fail;
>   		}
>   	}
>   
> -	vdevice = find_device(vp->unit);
> -
> -	if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
> -		goto flash_fail;
> -
> -	vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
> -	if (!vp->bpf->filter)
> -		goto free_buffer;
> -
> +	vp->bpf->filter = new_filter;
>   	vp->bpf->len = fw->size / sizeof(struct sock_filter);
> -	release_firmware(fw);
>   
>   	if (vp->opened)
>   		result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
>   
>   	spin_unlock(&vp->lock);
>   
> -	return result;
> -
> -free_buffer:
>   	release_firmware(fw);
>   
> -flash_fail:
> +	return result;
> +
> +apply_flash_fail:
>   	spin_unlock(&vp->lock);
> -	if (vp->bpf != NULL)
> +	if (vp->bpf)
>   		kfree(vp->bpf->filter);
>   	kfree(vp->bpf);
> -	vp->bpf = NULL;
> +
> +free_buffer:
> +	release_firmware(fw);
>   	return -1;
>   }
>   
>
Anton Ivanov Nov. 29, 2019, 11:54 a.m. UTC | #2
On 29/11/2019 09:15, Daniel Borkmann wrote:
> On 11/28/19 6:44 PM, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> This fixes a possible hang in bpf firmware loading in the
>> UML vector io drivers due to use of GFP_KERNEL while holding
>> a spinlock.
>>
>> Based on a prposed fix by weiyongjun1@huawei.com and suggestions for
>> improving it by dan.carpenter@oracle.com
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Any reason why this BPF firmware loading mechanism in UML vector driver 
> that was
> recently added [0] is plain old classic BPF? Quoting your commit log [0]:

It will allow whatever is allowed by sockfilter. Looking at the 
sockfilter implementation in the kernel it takes eBPF, however even the 
kernel docs still state BPF.

> 
>    All vector drivers now allow a BPF program to be loaded and
>    associated with the RX socket in the host kernel.
> 
>    1. The program can be loaded as an extra kernel command line
>    option to any of the vector drivers.
> 
>    2. The program can also be loaded as "firmware", using the
>    ethtool flash option. It is possible to turn this facility
>    on or off using a command line option.
> 
>    A simplistic wrapper for generating the BPF firmware for the raw
>    socket driver out of a tcpdump/libpcap filter expression can be
>    found at: https://github.com/kot-begemot-uk/uml_vector_utilities/
> 
> ... it tells what it does but /nothing/ about the original rationale / 
> use case
> why it is needed. So what is the use case? And why is this only classic 
> BPF? Is
> there any discussion to read up that lead you to this decision of only 
> implementing
> handling for classic BPF?

Moving processing out of the GUEST onto the HOST using a safe language. 
The firmware load is on the GUEST and your BPF is your virtual NIC 
"firmware" which runs on the HOST (in the host kernel in fact).

It is identical as an idea to what Netronome cards do in hardware.

> 
> I'm asking because classic BPF is /legacy/ stuff that is on feature 
> freeze and
> only very limited in terms of functionality compared to native (e)BPF 
> which is
> why you need this weird 'firmware' loader [1] which wraps around tcpdump to
> parse the -ddd output into BPF insns ...

Because there is no other mechanism of retrieving it after it is 
compiled by libpcap in any of the common scripting languages.

The pcap Perl, Python, Go (or whatever else) wrappers do not give you 
access to the compiled code after the filter has been compiled.

Why is that ingenious design - you have to take it with their maintainers.

So if you want to start with pcap/tcpdump syntax and you do not want to 
rewrite that part of tcpdump as a dumper in C you have no other choice.

The starting point is chosen because the idea is at some point to 
replace the existing and very aged pcap network transport in UML. That 
takes pcap syntax on the kernel command line.

I admit it is a kludge, I will probably do the "do not want" bit and 
rewrite that in C.

In any case - the "loader" is only an example, you can compile BPF using 
LLVM or whatever else you like.

A.

> 
> Thanks,
> Daniel
> 
>    [0] 
> https://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git/commit/?h=linux-next&id=9807019a62dc670c73ce8e59e09b41ae458c34b3 
> 
>    [1] 
> https://github.com/kot-begemot-uk/uml_vector_utilities/blob/master/build_bpf_firmware.py 
> 
> 
>>   arch/um/drivers/vector_kern.c | 38 ++++++++++++++++++-----------------
>>   1 file changed, 20 insertions(+), 18 deletions(-)
>>
>> diff --git a/arch/um/drivers/vector_kern.c 
>> b/arch/um/drivers/vector_kern.c
>> index 92617e16829e..dbbc6e850fdd 100644
>> --- a/arch/um/drivers/vector_kern.c
>> +++ b/arch/um/drivers/vector_kern.c
>> @@ -1387,6 +1387,7 @@ static int vector_net_load_bpf_flash(struct 
>> net_device *dev,
>>       struct vector_private *vp = netdev_priv(dev);
>>       struct vector_device *vdevice;
>>       const struct firmware *fw;
>> +    void *new_filter;
>>       int result = 0;
>>       if (!(vp->options & VECTOR_BPF_FLASH)) {
>> @@ -1394,6 +1395,15 @@ static int vector_net_load_bpf_flash(struct 
>> net_device *dev,
>>           return -1;
>>       }
>> +    vdevice = find_device(vp->unit);
>> +
>> +    if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
>> +        return -1;
>> +
>> +    new_filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
>> +    if (!new_filter)
>> +        goto free_buffer;
>> +
>>       spin_lock(&vp->lock);
>>       if (vp->bpf != NULL) {
>> @@ -1402,41 +1412,33 @@ static int vector_net_load_bpf_flash(struct 
>> net_device *dev,
>>           kfree(vp->bpf->filter);
>>           vp->bpf->filter = NULL;
>>       } else {
>> -        vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
>> +        vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC);
>>           if (vp->bpf == NULL) {
>>               netdev_err(dev, "failed to allocate memory for 
>> firmware\n");
>> -            goto flash_fail;
>> +            goto apply_flash_fail;
>>           }
>>       }
>> -    vdevice = find_device(vp->unit);
>> -
>> -    if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
>> -        goto flash_fail;
>> -
>> -    vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
>> -    if (!vp->bpf->filter)
>> -        goto free_buffer;
>> -
>> +    vp->bpf->filter = new_filter;
>>       vp->bpf->len = fw->size / sizeof(struct sock_filter);
>> -    release_firmware(fw);
>>       if (vp->opened)
>>           result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
>>       spin_unlock(&vp->lock);
>> -    return result;
>> -
>> -free_buffer:
>>       release_firmware(fw);
>> -flash_fail:
>> +    return result;
>> +
>> +apply_flash_fail:
>>       spin_unlock(&vp->lock);
>> -    if (vp->bpf != NULL)
>> +    if (vp->bpf)
>>           kfree(vp->bpf->filter);
>>       kfree(vp->bpf);
>> -    vp->bpf = NULL;
>> +
>> +free_buffer:
>> +    release_firmware(fw);
>>       return -1;
>>   }
>>
> 
>
Daniel Borkmann Nov. 29, 2019, 11:12 p.m. UTC | #3
On 11/29/19 12:54 PM, Anton Ivanov wrote:
> On 29/11/2019 09:15, Daniel Borkmann wrote:
>> On 11/28/19 6:44 PM, anton.ivanov@cambridgegreys.com wrote:
>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>
>>> This fixes a possible hang in bpf firmware loading in the
>>> UML vector io drivers due to use of GFP_KERNEL while holding
>>> a spinlock.
>>>
>>> Based on a prposed fix by weiyongjun1@huawei.com and suggestions for
>>> improving it by dan.carpenter@oracle.com
>>>
>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> Any reason why this BPF firmware loading mechanism in UML vector driver that was
>> recently added [0] is plain old classic BPF? Quoting your commit log [0]:
> 
> It will allow whatever is allowed by sockfilter. Looking at the sockfilter implementation in the kernel it takes eBPF, however even the kernel docs still state BPF.

You are using SO_ATTACH_FILTER in uml_vector_attach_bpf() which is the old classic
BPF (and not eBPF). The kernel internally moves that over to eBPF insns, but you'll
be constrained forever with the abilities of cBPF. The later added SO_ATTACH_BPF is
the one for eBPF where you pass the prog fd from bpf().

>>    All vector drivers now allow a BPF program to be loaded and
>>    associated with the RX socket in the host kernel.
>>
>>    1. The program can be loaded as an extra kernel command line
>>    option to any of the vector drivers.
>>
>>    2. The program can also be loaded as "firmware", using the
>>    ethtool flash option. It is possible to turn this facility
>>    on or off using a command line option.
>>
>>    A simplistic wrapper for generating the BPF firmware for the raw
>>    socket driver out of a tcpdump/libpcap filter expression can be
>>    found at: https://github.com/kot-begemot-uk/uml_vector_utilities/
>>
>> ... it tells what it does but /nothing/ about the original rationale / use case
>> why it is needed. So what is the use case? And why is this only classic BPF? Is
>> there any discussion to read up that lead you to this decision of only implementing
>> handling for classic BPF?
> 
> Moving processing out of the GUEST onto the HOST using a safe language. The firmware load is on the GUEST and your BPF is your virtual NIC "firmware" which runs on the HOST (in the host kernel in fact).
> 
> It is identical as an idea to what Netronome cards do in hardware.
> 
>> I'm asking because classic BPF is /legacy/ stuff that is on feature freeze and
>> only very limited in terms of functionality compared to native (e)BPF which is
>> why you need this weird 'firmware' loader [1] which wraps around tcpdump to
>> parse the -ddd output into BPF insns ...
> 
> Because there is no other mechanism of retrieving it after it is compiled by libpcap in any of the common scripting languages.
> 
> The pcap Perl, Python, Go (or whatever else) wrappers do not give you access to the compiled code after the filter has been compiled.
> 
> Why is that ingenious design - you have to take it with their maintainers.
> 
> So if you want to start with pcap/tcpdump syntax and you do not want to rewrite that part of tcpdump as a dumper in C you have no other choice.
> 
> The starting point is chosen because the idea is at some point to replace the existing and very aged pcap network transport in UML. That takes pcap syntax on the kernel command line.
> 
> I admit it is a kludge, I will probably do the "do not want" bit and rewrite that in C.

Yeah, it would probably be about the same # of LOC in C.

> In any case - the "loader" is only an example, you can compile BPF using LLVM or whatever else you like.

But did you try that with the code you have? Seems not, which is perhaps why there are some
wrong assumptions.

You can't use LLVM's BPF backend here since you only allow to pass in cBPF, and LLVM emits
an object file with native eBPF insns (you could use libbpf (in-tree under tools/lib/bpf/)
for loading that).

> A.
Anton Ivanov Nov. 30, 2019, 7:29 a.m. UTC | #4
On 29/11/2019 23:12, Daniel Borkmann wrote:
> On 11/29/19 12:54 PM, Anton Ivanov wrote:
>> On 29/11/2019 09:15, Daniel Borkmann wrote:
>>> On 11/28/19 6:44 PM, anton.ivanov@cambridgegreys.com wrote:
>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>
>>>> This fixes a possible hang in bpf firmware loading in the
>>>> UML vector io drivers due to use of GFP_KERNEL while holding
>>>> a spinlock.
>>>>
>>>> Based on a prposed fix by weiyongjun1@huawei.com and suggestions for
>>>> improving it by dan.carpenter@oracle.com
>>>>
>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>
>>> Any reason why this BPF firmware loading mechanism in UML vector 
>>> driver that was
>>> recently added [0] is plain old classic BPF? Quoting your commit log 
>>> [0]:
>>
>> It will allow whatever is allowed by sockfilter. Looking at the 
>> sockfilter implementation in the kernel it takes eBPF, however even 
>> the kernel docs still state BPF.
> 
> You are using SO_ATTACH_FILTER in uml_vector_attach_bpf() which is the 
> old classic
> BPF (and not eBPF). The kernel internally moves that over to eBPF insns, 
> but you'll
> be constrained forever with the abilities of cBPF. The later added 
> SO_ATTACH_BPF is
> the one for eBPF where you pass the prog fd from bpf().

I will switch to that in the next version.

> 
>>>    All vector drivers now allow a BPF program to be loaded and
>>>    associated with the RX socket in the host kernel.
>>>
>>>    1. The program can be loaded as an extra kernel command line
>>>    option to any of the vector drivers.
>>>
>>>    2. The program can also be loaded as "firmware", using the
>>>    ethtool flash option. It is possible to turn this facility
>>>    on or off using a command line option.
>>>
>>>    A simplistic wrapper for generating the BPF firmware for the raw
>>>    socket driver out of a tcpdump/libpcap filter expression can be
>>>    found at: https://github.com/kot-begemot-uk/uml_vector_utilities/
>>>
>>> ... it tells what it does but /nothing/ about the original rationale 
>>> / use case
>>> why it is needed. So what is the use case? And why is this only 
>>> classic BPF? Is
>>> there any discussion to read up that lead you to this decision of 
>>> only implementing
>>> handling for classic BPF?
>>
>> Moving processing out of the GUEST onto the HOST using a safe 
>> language. The firmware load is on the GUEST and your BPF is your 
>> virtual NIC "firmware" which runs on the HOST (in the host kernel in 
>> fact).
>>
>> It is identical as an idea to what Netronome cards do in hardware.
>>
>>> I'm asking because classic BPF is /legacy/ stuff that is on feature 
>>> freeze and
>>> only very limited in terms of functionality compared to native (e)BPF 
>>> which is
>>> why you need this weird 'firmware' loader [1] which wraps around 
>>> tcpdump to
>>> parse the -ddd output into BPF insns ...
>>
>> Because there is no other mechanism of retrieving it after it is 
>> compiled by libpcap in any of the common scripting languages.
>>
>> The pcap Perl, Python, Go (or whatever else) wrappers do not give you 
>> access to the compiled code after the filter has been compiled.
>>
>> Why is that ingenious design - you have to take it with their 
>> maintainers.
>>
>> So if you want to start with pcap/tcpdump syntax and you do not want 
>> to rewrite that part of tcpdump as a dumper in C you have no other 
>> choice.
>>
>> The starting point is chosen because the idea is at some point to 
>> replace the existing and very aged pcap network transport in UML. That 
>> takes pcap syntax on the kernel command line.
>>
>> I admit it is a kludge, I will probably do the "do not want" bit and 
>> rewrite that in C.
> 
> Yeah, it would probably be about the same # of LOC in C.
> 
>> In any case - the "loader" is only an example, you can compile BPF 
>> using LLVM or whatever else you like.
> 
> But did you try that with the code you have? Seems not, which is perhaps 
> why there are some
> wrong assumptions.

All of my tests were done using bpf generated by tcpdump out of a pcap 
expression. So the answer is no - I did not try LLVM because I did not 
need to for what I was aiming to achieve.

The pcap route matches 1:1 existing functionality in the uml pcap driver 
as well as existing functionality in the vector drivers for the cases 
where they need to avoid seeing their own xmits and cannot use features 
like QDISC_BYPASS.

> 
> You can't use LLVM's BPF backend here since you only allow to pass in 
> cBPF, and LLVM emits
> an object file with native eBPF insns (you could use libbpf (in-tree 
> under tools/lib/bpf/)
> for loading that).

My initial aim was the same feature sets as pcap and achieve it using a 
virtual analogue of what cards like Netronome do - via the firmware route.

Switching to SO_ATTACH_BPF will come in the next revision.

A.

> 
>> A.
> 
> _______________________________________________
> linux-um mailing list
> linux-um@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-um
Anton Ivanov Dec. 2, 2019, 9:33 a.m. UTC | #5
On 30/11/2019 07:29, Anton Ivanov wrote:
> On 29/11/2019 23:12, Daniel Borkmann wrote:
>> On 11/29/19 12:54 PM, Anton Ivanov wrote:
>>> On 29/11/2019 09:15, Daniel Borkmann wrote:
>>>> On 11/28/19 6:44 PM, anton.ivanov@cambridgegreys.com wrote:
>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>
>>>>> This fixes a possible hang in bpf firmware loading in the
>>>>> UML vector io drivers due to use of GFP_KERNEL while holding
>>>>> a spinlock.
>>>>>
>>>>> Based on a prposed fix by weiyongjun1@huawei.com and suggestions for
>>>>> improving it by dan.carpenter@oracle.com
>>>>>
>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>
>>>> Any reason why this BPF firmware loading mechanism in UML vector 
>>>> driver that was
>>>> recently added [0] is plain old classic BPF? Quoting your commit log 
>>>> [0]:
>>>
>>> It will allow whatever is allowed by sockfilter. Looking at the 
>>> sockfilter implementation in the kernel it takes eBPF, however even 
>>> the kernel docs still state BPF.
>>
>> You are using SO_ATTACH_FILTER in uml_vector_attach_bpf() which is the 
>> old classic
>> BPF (and not eBPF). The kernel internally moves that over to eBPF 
>> insns, but you'll
>> be constrained forever with the abilities of cBPF. The later added 
>> SO_ATTACH_BPF is
>> the one for eBPF where you pass the prog fd from bpf().
> 
> I will switch to that in the next version.
> 
>>
>>>>    All vector drivers now allow a BPF program to be loaded and
>>>>    associated with the RX socket in the host kernel.
>>>>
>>>>    1. The program can be loaded as an extra kernel command line
>>>>    option to any of the vector drivers.
>>>>
>>>>    2. The program can also be loaded as "firmware", using the
>>>>    ethtool flash option. It is possible to turn this facility
>>>>    on or off using a command line option.
>>>>
>>>>    A simplistic wrapper for generating the BPF firmware for the raw
>>>>    socket driver out of a tcpdump/libpcap filter expression can be
>>>>    found at: https://github.com/kot-begemot-uk/uml_vector_utilities/
>>>>
>>>> ... it tells what it does but /nothing/ about the original rationale 
>>>> / use case
>>>> why it is needed. So what is the use case? And why is this only 
>>>> classic BPF? Is
>>>> there any discussion to read up that lead you to this decision of 
>>>> only implementing
>>>> handling for classic BPF?
>>>
>>> Moving processing out of the GUEST onto the HOST using a safe 
>>> language. The firmware load is on the GUEST and your BPF is your 
>>> virtual NIC "firmware" which runs on the HOST (in the host kernel in 
>>> fact).
>>>
>>> It is identical as an idea to what Netronome cards do in hardware.
>>>
>>>> I'm asking because classic BPF is /legacy/ stuff that is on feature 
>>>> freeze and
>>>> only very limited in terms of functionality compared to native 
>>>> (e)BPF which is
>>>> why you need this weird 'firmware' loader [1] which wraps around 
>>>> tcpdump to
>>>> parse the -ddd output into BPF insns ...
>>>
>>> Because there is no other mechanism of retrieving it after it is 
>>> compiled by libpcap in any of the common scripting languages.
>>>
>>> The pcap Perl, Python, Go (or whatever else) wrappers do not give you 
>>> access to the compiled code after the filter has been compiled.
>>>
>>> Why is that ingenious design - you have to take it with their 
>>> maintainers.
>>>
>>> So if you want to start with pcap/tcpdump syntax and you do not want 
>>> to rewrite that part of tcpdump as a dumper in C you have no other 
>>> choice.
>>>
>>> The starting point is chosen because the idea is at some point to 
>>> replace the existing and very aged pcap network transport in UML. 
>>> That takes pcap syntax on the kernel command line.
>>>
>>> I admit it is a kludge, I will probably do the "do not want" bit and 
>>> rewrite that in C.
>>
>> Yeah, it would probably be about the same # of LOC in C.
>>
>>> In any case - the "loader" is only an example, you can compile BPF 
>>> using LLVM or whatever else you like.
>>
>> But did you try that with the code you have? Seems not, which is 
>> perhaps why there are some
>> wrong assumptions.
> 
> All of my tests were done using bpf generated by tcpdump out of a pcap 
> expression. So the answer is no - I did not try LLVM because I did not 
> need to for what I was aiming to achieve.
> 
> The pcap route matches 1:1 existing functionality in the uml pcap driver 
> as well as existing functionality in the vector drivers for the cases 
> where they need to avoid seeing their own xmits and cannot use features 
> like QDISC_BYPASS.
> 
>>
>> You can't use LLVM's BPF backend here since you only allow to pass in 
>> cBPF, and LLVM emits
>> an object file with native eBPF insns (you could use libbpf (in-tree 
>> under tools/lib/bpf/)
>> for loading that).
> 
> My initial aim was the same feature sets as pcap and achieve it using a 
> virtual analogue of what cards like Netronome do - via the firmware route.
> 
> Switching to SO_ATTACH_BPF will come in the next revision.
> 
> A.
> 
>>
>>> A.
>>
>> _______________________________________________
>> linux-um mailing list
>> linux-um@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-um
> 
> 
After reviewing what is needed for switching from SOCK_FILTER to 
SOCK_BPF, IMHO it will have to wait for a while.

1. I am not sticking yet another direct host syscall invocation into the 
userspace portion of the uml kernel and we cannot add extra userspace 
libraries like libbpf at present because it is not supported by kbuild.

I have a patch in the queue for that, but it will need to be approved by 
the kernel build people and merged before this can be done.

2. On top of that, in order to make use of eBPF for vNIC firmware 
properly, I will need to figure out the correct abstractions. The 
"program" part is quite clear - an  eBPF program fits exactly into the 
role of virtual nic firmware - it is identical to classic BPF and the 
way it is used at present.

The maps, however, and how do they go along with the "program firmware" 
is something which will need to be figured out. It may require a more 
complex load mechanisms and a proper (not 5 liner wrapper around pcap or 
tcpdump) firmware packer/unpacker.

Once I have figured it out and it can fit into the kbuild, I will send 
the next revision. I suspect that it will happen at about the same time 
I will finish the AF_XDP UML vNIC transport (it has the same 
requirements, needs the same calls and uses the same libraries).
diff mbox series

Patch

diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 92617e16829e..dbbc6e850fdd 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1387,6 +1387,7 @@  static int vector_net_load_bpf_flash(struct net_device *dev,
 	struct vector_private *vp = netdev_priv(dev);
 	struct vector_device *vdevice;
 	const struct firmware *fw;
+	void *new_filter;
 	int result = 0;
 
 	if (!(vp->options & VECTOR_BPF_FLASH)) {
@@ -1394,6 +1395,15 @@  static int vector_net_load_bpf_flash(struct net_device *dev,
 		return -1;
 	}
 
+	vdevice = find_device(vp->unit);
+
+	if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
+		return -1;
+
+	new_filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	if (!new_filter)
+		goto free_buffer;
+
 	spin_lock(&vp->lock);
 
 	if (vp->bpf != NULL) {
@@ -1402,41 +1412,33 @@  static int vector_net_load_bpf_flash(struct net_device *dev,
 		kfree(vp->bpf->filter);
 		vp->bpf->filter = NULL;
 	} else {
-		vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
+		vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC);
 		if (vp->bpf == NULL) {
 			netdev_err(dev, "failed to allocate memory for firmware\n");
-			goto flash_fail;
+			goto apply_flash_fail;
 		}
 	}
 
-	vdevice = find_device(vp->unit);
-
-	if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
-		goto flash_fail;
-
-	vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
-	if (!vp->bpf->filter)
-		goto free_buffer;
-
+	vp->bpf->filter = new_filter;
 	vp->bpf->len = fw->size / sizeof(struct sock_filter);
-	release_firmware(fw);
 
 	if (vp->opened)
 		result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
 
 	spin_unlock(&vp->lock);
 
-	return result;
-
-free_buffer:
 	release_firmware(fw);
 
-flash_fail:
+	return result;
+
+apply_flash_fail:
 	spin_unlock(&vp->lock);
-	if (vp->bpf != NULL)
+	if (vp->bpf)
 		kfree(vp->bpf->filter);
 	kfree(vp->bpf);
-	vp->bpf = NULL;
+
+free_buffer:
+	release_firmware(fw);
 	return -1;
 }