diff mbox

[v2,net-next] bpf: fix bpf_perf_event_read() helper

Message ID 1445468283-4592-1-git-send-email-ast@kernel.org
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Alexei Starovoitov Oct. 21, 2015, 10:58 p.m. UTC
Fix safety checks for bpf_perf_event_read():
- only non-inherited events can be added to perf_event_array map
  (do this check statically at map insertion time)
- dynamically check that event is local and !pmu->count
Otherwise buggy bpf program can cause kernel splat.

Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
v1->v2: fix compile in case of !CONFIG_PERF_EVENTS

This patch is on top of
http://patchwork.ozlabs.org/patch/533585/
to avoid conflicts.
Even in the worst case the crash is not possible.
Only warn_on_once, so imo net-next is ok.

 kernel/bpf/arraymap.c |    9 +++++----
 kernel/events/core.c  |   16 ++++++++++------
 2 files changed, 15 insertions(+), 10 deletions(-)

Comments

Wangnan (F) Oct. 22, 2015, 4:49 a.m. UTC | #1
After applying this patch I'm unable to use perf passing perf_event 
again like this:

  # perf record -a -e evt=cycles -e 
./test_config_map.c/maps.pmu_map.event=evt/ --exclude-perf ls

With -v it output:

...
adding perf_bpf_probe:func_write
adding perf_bpf_probe:func_write to 0x367d6a0
add bpf event perf_bpf_probe:func_write_return and attach bpf program 6
adding perf_bpf_probe:func_write_return
adding perf_bpf_probe:func_write_return to 0x3a7fc40
mmap size 528384B
ERROR: failed to insert value to pmu_map[0]
ERROR: Apply config to BPF failed: Invalid option for map, add -v to see 
detail
Opening /sys/kernel/debug/tracing//kprobe_events write=
...

Looks like perf sets attr.inherit for cycles? I'll look into this problem.

Thank you.

On 2015/10/22 6:58, Alexei Starovoitov wrote:
> Fix safety checks for bpf_perf_event_read():
> - only non-inherited events can be added to perf_event_array map
>    (do this check statically at map insertion time)
> - dynamically check that event is local and !pmu->count
> Otherwise buggy bpf program can cause kernel splat.
>
> Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter")
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
> v1->v2: fix compile in case of !CONFIG_PERF_EVENTS
>
> This patch is on top of
> http://patchwork.ozlabs.org/patch/533585/
> to avoid conflicts.
> Even in the worst case the crash is not possible.
> Only warn_on_once, so imo net-next is ok.
>
>   kernel/bpf/arraymap.c |    9 +++++----
>   kernel/events/core.c  |   16 ++++++++++------
>   2 files changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index e3cfe46b074f..75529cc94304 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -294,10 +294,11 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
>   	if (IS_ERR(attr))
>   		return (void *)attr;
>   
> -	if (attr->type != PERF_TYPE_RAW &&
> -	    !(attr->type == PERF_TYPE_SOFTWARE &&
> -	      attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
> -	    attr->type != PERF_TYPE_HARDWARE) {
> +	if ((attr->type != PERF_TYPE_RAW &&
> +	     !(attr->type == PERF_TYPE_SOFTWARE &&
> +	       attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
> +	     attr->type != PERF_TYPE_HARDWARE) ||
> +	    attr->inherit) {
>   		perf_event_release_kernel(event);
>   		return ERR_PTR(-EINVAL);
>   	}
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 64754bfecd70..0b6333265872 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -3258,7 +3258,7 @@ static inline u64 perf_event_count(struct perf_event *event)
>   u64 perf_event_read_local(struct perf_event *event)
>   {
>   	unsigned long flags;
> -	u64 val;
> +	u64 val = -EINVAL;
>   
>   	/*
>   	 * Disabling interrupts avoids all counter scheduling (context
> @@ -3267,12 +3267,14 @@ u64 perf_event_read_local(struct perf_event *event)
>   	local_irq_save(flags);
>   
>   	/* If this is a per-task event, it must be for current */
> -	WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) &&
> -		     event->hw.target != current);
> +	if ((event->attach_state & PERF_ATTACH_TASK) &&
> +	    event->hw.target != current)
> +		goto out;
>   
>   	/* If this is a per-CPU event, it must be for this CPU */
> -	WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) &&
> -		     event->cpu != smp_processor_id());
> +	if (!(event->attach_state & PERF_ATTACH_TASK) &&
> +	    event->cpu != smp_processor_id())
> +		goto out;
>   
>   	/*
>   	 * It must not be an event with inherit set, we cannot read
> @@ -3284,7 +3286,8 @@ u64 perf_event_read_local(struct perf_event *event)
>   	 * It must not have a pmu::count method, those are not
>   	 * NMI safe.
>   	 */
> -	WARN_ON_ONCE(event->pmu->count);
> +	if (event->pmu->count)
> +		goto out;
>   
>   	/*
>   	 * If the event is currently on this CPU, its either a per-task event,
> @@ -3295,6 +3298,7 @@ u64 perf_event_read_local(struct perf_event *event)
>   		event->pmu->read(event);
>   
>   	val = local64_read(&event->count);
> +out:
>   	local_irq_restore(flags);
>   
>   	return val;


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexei Starovoitov Oct. 22, 2015, 5 a.m. UTC | #2
On 10/21/15 9:49 PM, Wangnan (F) wrote:
> After applying this patch I'm unable to use perf passing perf_event
> again like this:

please do not top post and trim your replies.

>   # perf record -a -e evt=cycles -e
> ./test_config_map.c/maps.pmu_map.event=evt/ --exclude-perf ls
>
> With -v it output:
>
> ...
> adding perf_bpf_probe:func_write
> adding perf_bpf_probe:func_write to 0x367d6a0
> add bpf event perf_bpf_probe:func_write_return and attach bpf program 6
> adding perf_bpf_probe:func_write_return
> adding perf_bpf_probe:func_write_return to 0x3a7fc40
> mmap size 528384B
> ERROR: failed to insert value to pmu_map[0]
> ERROR: Apply config to BPF failed: Invalid option for map, add -v to see
> detail
> Opening /sys/kernel/debug/tracing//kprobe_events write=
> ...
>
> Looks like perf sets attr.inherit for cycles? I'll look into this problem.

yes. that's perf default.
How did it even work before?!
I was testing with your samples/bpf/tracex6 that sets inherit to zero.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wangnan (F) Oct. 22, 2015, 5:30 a.m. UTC | #3
On 2015/10/22 13:00, Alexei Starovoitov wrote:
> On 10/21/15 9:49 PM, Wangnan (F) wrote:
>> After applying this patch I'm unable to use perf passing perf_event
>> again like this:
>
> please do not top post and trim your replies.
>
>>   # perf record -a -e evt=cycles -e
>> ./test_config_map.c/maps.pmu_map.event=evt/ --exclude-perf ls
>>
>> With -v it output:
>>
>> ...
>> adding perf_bpf_probe:func_write
>> adding perf_bpf_probe:func_write to 0x367d6a0
>> add bpf event perf_bpf_probe:func_write_return and attach bpf program 6
>> adding perf_bpf_probe:func_write_return
>> adding perf_bpf_probe:func_write_return to 0x3a7fc40
>> mmap size 528384B
>> ERROR: failed to insert value to pmu_map[0]
>> ERROR: Apply config to BPF failed: Invalid option for map, add -v to see
>> detail
>> Opening /sys/kernel/debug/tracing//kprobe_events write=
>> ...
>>
>> Looks like perf sets attr.inherit for cycles? I'll look into this 
>> problem.
>
> yes. that's perf default.
> How did it even work before?!
> I was testing with your samples/bpf/tracex6 that sets inherit to zero.
>

Tested perf record -i option and it works for me:

# echo "" > /sys/kernel/debug/tracing/trace
# perf record -i -a -e evt=cycles -e 
./test_config_map.c/maps.pmu_map.event=evt/ --exclude-perf ls
# cat /sys/kernel/debug/tracing/trace  | grep ls
               ls-8227  [001] dN..  2526.184611: : pmu inc: 82270
               ls-8227  [001] dN..  2526.184626: : pmu inc: 40951
               ls-8227  [001] dN..  2526.184642: : pmu inc: 50659
               ls-8227  [001] dN..  2526.184657: : pmu inc: 43511
               ls-8227  [001] dN..  2526.184675: : pmu inc: 56921
...
And no warning message found in dmesg.

So I think your fix is good, we should improve perf.

Thank you.




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wangnan (F) Oct. 22, 2015, 5:31 a.m. UTC | #4
On 2015/10/22 6:58, Alexei Starovoitov wrote:
> Fix safety checks for bpf_perf_event_read():
> - only non-inherited events can be added to perf_event_array map
>    (do this check statically at map insertion time)
> - dynamically check that event is local and !pmu->count
> Otherwise buggy bpf program can cause kernel splat.
>
> Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter")
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
> v1->v2: fix compile in case of !CONFIG_PERF_EVENTS
>
> This patch is on top of
> http://patchwork.ozlabs.org/patch/533585/
> to avoid conflicts.
> Even in the worst case the crash is not possible.
> Only warn_on_once, so imo net-next is ok.
>
>   kernel/bpf/arraymap.c |    9 +++++----
>   kernel/events/core.c  |   16 ++++++++++------
>   2 files changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index e3cfe46b074f..75529cc94304 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -294,10 +294,11 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
>   	if (IS_ERR(attr))
>   		return (void *)attr;
>   
> -	if (attr->type != PERF_TYPE_RAW &&
> -	    !(attr->type == PERF_TYPE_SOFTWARE &&
> -	      attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
> -	    attr->type != PERF_TYPE_HARDWARE) {
> +	if ((attr->type != PERF_TYPE_RAW &&
> +	     !(attr->type == PERF_TYPE_SOFTWARE &&
> +	       attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
> +	     attr->type != PERF_TYPE_HARDWARE) ||
> +	    attr->inherit) {

This 'if' statement is so complex. What about using a inline function 
instead?

Thank you.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexei Starovoitov Oct. 22, 2015, 6:21 a.m. UTC | #5
On 10/21/15 10:31 PM, Wangnan (F) wrote:
>> +    if ((attr->type != PERF_TYPE_RAW &&
>> +         !(attr->type == PERF_TYPE_SOFTWARE &&
>> +           attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
>> +         attr->type != PERF_TYPE_HARDWARE) ||
>> +        attr->inherit) {
>
> This 'if' statement is so complex. What about using a inline function
> instead?

hmm. don't see how inline function will help readability.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wangnan (F) Oct. 22, 2015, 7:39 a.m. UTC | #6
On 2015/10/22 14:21, Alexei Starovoitov wrote:
> On 10/21/15 10:31 PM, Wangnan (F) wrote:
>>> +    if ((attr->type != PERF_TYPE_RAW &&
>>> +         !(attr->type == PERF_TYPE_SOFTWARE &&
>>> +           attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
>>> +         attr->type != PERF_TYPE_HARDWARE) ||
>>> +        attr->inherit) {
>>
>> This 'if' statement is so complex. What about using a inline function
>> instead?
>
> hmm. don't see how inline function will help readability.
>

For example (not tested):

  static inline bool perf_event_can_insert_to_map(struct perf_event_attr 
*attr)
  {
     /* is inherit? */
     if (attr->inherit)
         return false;

     /* is software event? */
     if (attr->type == PERF_TYPE_SOFTWARE)
         if (attr->config == PERF_COUNT_SW_BPF_OUTPUT)
             return true;
         else
             return false;

     /* Comment... */
     if (attr->type == PERF_TYPE_RAW)
         return true;
     if (attr->type == PERF_TYPE_HARDWARE)
         return true;
     return false;
  }

  ...
  if (!perf_event_can_insert_to_map(attr))
     ....

Do you think redability is improved?

Thank you.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wangnan (F) Oct. 22, 2015, 12:30 p.m. UTC | #7
On 2015/10/22 6:58, Alexei Starovoitov wrote:

[SNIP]
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index e3cfe46b074f..75529cc94304 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -294,10 +294,11 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
>   	if (IS_ERR(attr))
>   		return (void *)attr;
>   
> -	if (attr->type != PERF_TYPE_RAW &&
> -	    !(attr->type == PERF_TYPE_SOFTWARE &&
> -	      attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
> -	    attr->type != PERF_TYPE_HARDWARE) {
> +	if ((attr->type != PERF_TYPE_RAW &&
> +	     !(attr->type == PERF_TYPE_SOFTWARE &&
> +	       attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
> +	     attr->type != PERF_TYPE_HARDWARE) ||
> +	    attr->inherit) {
>   		perf_event_release_kernel(event);
>   		return ERR_PTR(-EINVAL);
>   	}

I have a question on inherit, not related to this patch:
Is it safe for perf to disable attr->inherit if the event is system wide?
I haven't read relate code completely. In my current knowledge the behavior
of a system wide perf event should be same whether inherit is set or not.
Is that true?

Thank you.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra Oct. 22, 2015, 12:41 p.m. UTC | #8
On Thu, Oct 22, 2015 at 08:30:36PM +0800, Wangnan (F) wrote:
> I have a question on inherit, not related to this patch:
> Is it safe for perf to disable attr->inherit if the event is system wide?
> I haven't read relate code completely. In my current knowledge the behavior
> of a system wide perf event should be same whether inherit is set or not.
> Is that true?

Yes, .inherit is pointless for cpu wide events, if we allow creating cpu
events with .inherit set that's unfortunate.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra Oct. 22, 2015, 1:57 p.m. UTC | #9
On Wed, Oct 21, 2015 at 03:58:03PM -0700, Alexei Starovoitov wrote:
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 64754bfecd70..0b6333265872 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -3258,7 +3258,7 @@ static inline u64 perf_event_count(struct perf_event *event)
>  u64 perf_event_read_local(struct perf_event *event)
>  {
>  	unsigned long flags;
> -	u64 val;
> +	u64 val = -EINVAL;

No, you cannot do this, -EINVAL is a valid count value. You simply must
not call this function on !local events, ever.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexei Starovoitov Oct. 22, 2015, 3:51 p.m. UTC | #10
On 10/22/15 12:39 AM, Wangnan (F) wrote:
>   ...
>   if (!perf_event_can_insert_to_map(attr))
>      ....
>
> Do you think redability is improved?

yes. makes sense. will respin.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexei Starovoitov Oct. 22, 2015, 3:53 p.m. UTC | #11
On 10/22/15 6:57 AM, Peter Zijlstra wrote:
> On Wed, Oct 21, 2015 at 03:58:03PM -0700, Alexei Starovoitov wrote:
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 64754bfecd70..0b6333265872 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -3258,7 +3258,7 @@ static inline u64 perf_event_count(struct perf_event *event)
>>   u64 perf_event_read_local(struct perf_event *event)
>>   {
>>   	unsigned long flags;
>> -	u64 val;
>> +	u64 val = -EINVAL;
>
> No, you cannot do this, -EINVAL is a valid count value. You simply must
> not call this function on !local events, ever.

agree. Will keep perf_event_read_local() as-is and do all safety
checks on bpf side.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e3cfe46b074f..75529cc94304 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -294,10 +294,11 @@  static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 	if (IS_ERR(attr))
 		return (void *)attr;
 
-	if (attr->type != PERF_TYPE_RAW &&
-	    !(attr->type == PERF_TYPE_SOFTWARE &&
-	      attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
-	    attr->type != PERF_TYPE_HARDWARE) {
+	if ((attr->type != PERF_TYPE_RAW &&
+	     !(attr->type == PERF_TYPE_SOFTWARE &&
+	       attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
+	     attr->type != PERF_TYPE_HARDWARE) ||
+	    attr->inherit) {
 		perf_event_release_kernel(event);
 		return ERR_PTR(-EINVAL);
 	}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 64754bfecd70..0b6333265872 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3258,7 +3258,7 @@  static inline u64 perf_event_count(struct perf_event *event)
 u64 perf_event_read_local(struct perf_event *event)
 {
 	unsigned long flags;
-	u64 val;
+	u64 val = -EINVAL;
 
 	/*
 	 * Disabling interrupts avoids all counter scheduling (context
@@ -3267,12 +3267,14 @@  u64 perf_event_read_local(struct perf_event *event)
 	local_irq_save(flags);
 
 	/* If this is a per-task event, it must be for current */
-	WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) &&
-		     event->hw.target != current);
+	if ((event->attach_state & PERF_ATTACH_TASK) &&
+	    event->hw.target != current)
+		goto out;
 
 	/* If this is a per-CPU event, it must be for this CPU */
-	WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) &&
-		     event->cpu != smp_processor_id());
+	if (!(event->attach_state & PERF_ATTACH_TASK) &&
+	    event->cpu != smp_processor_id())
+		goto out;
 
 	/*
 	 * It must not be an event with inherit set, we cannot read
@@ -3284,7 +3286,8 @@  u64 perf_event_read_local(struct perf_event *event)
 	 * It must not have a pmu::count method, those are not
 	 * NMI safe.
 	 */
-	WARN_ON_ONCE(event->pmu->count);
+	if (event->pmu->count)
+		goto out;
 
 	/*
 	 * If the event is currently on this CPU, its either a per-task event,
@@ -3295,6 +3298,7 @@  u64 perf_event_read_local(struct perf_event *event)
 		event->pmu->read(event);
 
 	val = local64_read(&event->count);
+out:
 	local_irq_restore(flags);
 
 	return val;