diff mbox series

[v3,bpf-next,1/4] tracing/probe: Add PERF_EVENT_IOC_QUERY_PROBE ioctl

Message ID 20190816223149.5714-2-dxu@dxuuu.xyz
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series tracing/probe: Add PERF_EVENT_IOC_QUERY_PROBE | expand

Commit Message

Daniel Xu Aug. 16, 2019, 10:31 p.m. UTC
It's useful to know [uk]probe's nmissed and nhit stats. For example with
tracing tools, it's important to know when events may have been lost.
debugfs currently exposes a control file to get this information, but
it is not compatible with probes registered with the perf API.

While bpf programs may be able to manually count nhit, there is no way
to gather nmissed. In other words, it is currently not possible to
retrieve information about FD-based probes.

This patch adds a new ioctl that lets users query nmissed (as well as
nhit for completeness). We currently only add support for [uk]probes
but leave the possibility open for other probes like tracepoint.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
 include/linux/trace_events.h    | 12 ++++++++++++
 include/uapi/linux/perf_event.h | 23 +++++++++++++++++++++++
 kernel/events/core.c            | 20 ++++++++++++++++++++
 kernel/trace/trace_kprobe.c     | 24 ++++++++++++++++++++++++
 kernel/trace/trace_uprobe.c     | 24 ++++++++++++++++++++++++
 5 files changed, 103 insertions(+)

Comments

Alexei Starovoitov Aug. 20, 2019, 1:26 a.m. UTC | #1
On Fri, Aug 16, 2019 at 3:33 PM Daniel Xu <dxu@dxuuu.xyz> wrote:
>
> It's useful to know [uk]probe's nmissed and nhit stats. For example with
> tracing tools, it's important to know when events may have been lost.
> debugfs currently exposes a control file to get this information, but
> it is not compatible with probes registered with the perf API.
>
> While bpf programs may be able to manually count nhit, there is no way
> to gather nmissed. In other words, it is currently not possible to
> retrieve information about FD-based probes.
>
> This patch adds a new ioctl that lets users query nmissed (as well as
> nhit for completeness). We currently only add support for [uk]probes
> but leave the possibility open for other probes like tracepoint.
>
> Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
...
> +int perf_kprobe_event_query(struct perf_event *event, void __user *info)
> +{
> +       struct perf_event_query_probe __user *uquery = info;
> +       struct perf_event_query_probe query = {};
> +       struct trace_event_call *call = event->tp_event;
> +       struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
> +       u64 ncopy;
> +
> +       if (!capable(CAP_SYS_ADMIN))
> +               return -EPERM;
> +       if (copy_from_user(&query, uquery,
> +                          offsetofend(struct perf_event_query_probe, size)))
> +               return -EFAULT;
> +
> +       ncopy = min_t(u64, query.size, sizeof(query));
> +       query.nhit = trace_kprobe_nhit(tk);
> +       query.nmissed = tk->rp.kp.nmissed;
> +
> +       if (copy_to_user(uquery, &query, ncopy))
> +               return -EFAULT;

shouldn't kernel update query.size before copying back?
Otherwise how user space would know which fields
were populated?
Daniel Xu Aug. 20, 2019, 2:34 a.m. UTC | #2
On Mon Aug 19, 2019 at 6:26 PM Alexei Starovoitov wrote:
> On Fri, Aug 16, 2019 at 3:33 PM Daniel Xu <dxu@dxuuu.xyz> wrote:
> >
> > It's useful to know [uk]probe's nmissed and nhit stats. For example with
> > tracing tools, it's important to know when events may have been lost.
> > debugfs currently exposes a control file to get this information, but
> > it is not compatible with probes registered with the perf API.
> >
> > While bpf programs may be able to manually count nhit, there is no way
> > to gather nmissed. In other words, it is currently not possible to
> > retrieve information about FD-based probes.
> >
> > This patch adds a new ioctl that lets users query nmissed (as well as
> > nhit for completeness). We currently only add support for [uk]probes
> > but leave the possibility open for other probes like tracepoint.
> >
> > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> ...
> > +int perf_kprobe_event_query(struct perf_event *event, void __user *info)
> > +{
> > +       struct perf_event_query_probe __user *uquery = info;
> > +       struct perf_event_query_probe query = {};
> > +       struct trace_event_call *call = event->tp_event;
> > +       struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
> > +       u64 ncopy;
> > +
> > +       if (!capable(CAP_SYS_ADMIN))
> > +               return -EPERM;
> > +       if (copy_from_user(&query, uquery,
> > +                          offsetofend(struct perf_event_query_probe, size)))
> > +               return -EFAULT;
> > +
> > +       ncopy = min_t(u64, query.size, sizeof(query));
> > +       query.nhit = trace_kprobe_nhit(tk);
> > +       query.nmissed = tk->rp.kp.nmissed;
> > +
> > +       if (copy_to_user(uquery, &query, ncopy))
> > +               return -EFAULT;
> 
> shouldn't kernel update query.size before copying back?
> Otherwise how user space would know which fields
> were populated?

Ah yes, sorry. Will add that.
Alexei Starovoitov Aug. 20, 2019, 2:52 a.m. UTC | #3
On Mon, Aug 19, 2019 at 7:34 PM Daniel Xu <dxu@dxuuu.xyz> wrote:
>
> Ah yes, sorry. Will add that.

Also please fix build errors.
It looks like buildbot is not happy about few things.
Peter Zijlstra Aug. 20, 2019, 2:45 p.m. UTC | #4
On Fri, Aug 16, 2019 at 03:31:46PM -0700, Daniel Xu wrote:
> It's useful to know [uk]probe's nmissed and nhit stats. For example with
> tracing tools, it's important to know when events may have been lost.
> debugfs currently exposes a control file to get this information, but
> it is not compatible with probes registered with the perf API.

What is this nmissed and nhit stuff?
Daniel Xu Aug. 20, 2019, 5:58 p.m. UTC | #5
Hi Peter,

On Tue Aug 20, 2019 at 4:45 PM Peter Zijlstra wrote:
> On Fri, Aug 16, 2019 at 03:31:46PM -0700, Daniel Xu wrote:
> > It's useful to know [uk]probe's nmissed and nhit stats. For example with
> > tracing tools, it's important to know when events may have been lost.
> > debugfs currently exposes a control file to get this information, but
> > it is not compatible with probes registered with the perf API.
> 
> What is this nmissed and nhit stuff?

nmissed is the number of times the probe's handler should have been run
but didn't. nhit is the number of times the probes handler has run. I've
documented this information in the uapi header. If you'd like, I can put
it in the commit message too.

Daniel
Peter Zijlstra Aug. 21, 2019, 11:08 a.m. UTC | #6
On Tue, Aug 20, 2019 at 10:58:47AM -0700, Daniel Xu wrote:
> Hi Peter,
> 
> On Tue Aug 20, 2019 at 4:45 PM Peter Zijlstra wrote:
> > On Fri, Aug 16, 2019 at 03:31:46PM -0700, Daniel Xu wrote:
> > > It's useful to know [uk]probe's nmissed and nhit stats. For example with
> > > tracing tools, it's important to know when events may have been lost.
> > > debugfs currently exposes a control file to get this information, but
> > > it is not compatible with probes registered with the perf API.
> > 
> > What is this nmissed and nhit stuff?
> 
> nmissed is the number of times the probe's handler should have been run
> but didn't. nhit is the number of times the probes handler has run. I've
> documented this information in the uapi header. If you'd like, I can put
> it in the commit message too.

That comment just says: 'number of times this probe was temporarily
disabled', which says exactly nothing.

But reading the kprobe code seems to suggest this happens on recursive
kprobes, which I'm thinking is a dodgy situation in the first place.

ftrace and perf in general don't keep counts of events lost due to
recursion, so why should we do this for kprobes? Also, while you write
to support uprobes, it doesn't actually suffer from this (it cannot,
uprobes cannot recurse), so supporting it makes no sense.

And with that, the name QUERY_PROBE also makes no sense, because it is
not specific to [uk]probes, all software events suffer this.

And I'm not sure an additional ioctl() is the right way, supposing we
want to expose this at all. You've mentioned no alternative approached,
I'm thinking PERF_FORMAT_LOST might be possible, or maybe a
PERF_RECORD_LOST extention.

Of course, then you get to implement it for tracepoints and software
events too.
Yonghong Song Aug. 21, 2019, 4:54 p.m. UTC | #7
On 8/21/19 4:08 AM, Peter Zijlstra wrote:
> On Tue, Aug 20, 2019 at 10:58:47AM -0700, Daniel Xu wrote:
>> Hi Peter,
>>
>> On Tue Aug 20, 2019 at 4:45 PM Peter Zijlstra wrote:
>>> On Fri, Aug 16, 2019 at 03:31:46PM -0700, Daniel Xu wrote:
>>>> It's useful to know [uk]probe's nmissed and nhit stats. For example with
>>>> tracing tools, it's important to know when events may have been lost.
>>>> debugfs currently exposes a control file to get this information, but
>>>> it is not compatible with probes registered with the perf API.
>>>
>>> What is this nmissed and nhit stuff?
>>
>> nmissed is the number of times the probe's handler should have been run
>> but didn't. nhit is the number of times the probes handler has run. I've
>> documented this information in the uapi header. If you'd like, I can put
>> it in the commit message too.
> 
> That comment just says: 'number of times this probe was temporarily
> disabled', which says exactly nothing.
> 
> But reading the kprobe code seems to suggest this happens on recursive
> kprobes, which I'm thinking is a dodgy situation in the first place.
> 
> ftrace and perf in general don't keep counts of events lost due to
> recursion, so why should we do this for kprobes? Also, while you write
> to support uprobes, it doesn't actually suffer from this (it cannot,
> uprobes cannot recurse), so supporting it makes no sense.
> 
> And with that, the name QUERY_PROBE also makes no sense, because it is
> not specific to [uk]probes, all software events suffer this.
> 
> And I'm not sure an additional ioctl() is the right way, supposing we
> want to expose this at all. You've mentioned no alternative approached,
> I'm thinking PERF_FORMAT_LOST might be possible, or maybe a
> PERF_RECORD_LOST extention.

Things get more complicated when bpf program is executing to replace
ring buffer output.

Currently, in kernel/trace/bpf_trace.c, we have

unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
         unsigned int ret;

         if (in_nmi()) /* not supported yet */
                 return 1;

         preempt_disable();

         if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
                 /*
                  * since some bpf program is already running on this cpu,
                  * don't call into another bpf program (same or different)
                  * and don't send kprobe event into ring-buffer,
                  * so return zero here
                  */
                 ret = 0;
                 goto out;
         }
.....

In the above, the events with bpf program attached will be missed
if the context is nmi interrupt, or if some recursion happens even with 
the same or different bpf programs.
In case of recursion, the events will not be sent to ring buffer.

A lot of bpf-based tracing programs uses maps to communicate and
do not allocate ring buffer at all.

Maybe we can still use ioctl based approach which is light weighted
compared to ring buffer approach? If a fd has bpf attached, nhit/nmisses
means the kprobe is processed by bpf program or not.

Currently, for debugfs, the nhit/nmisses info is exposed at
{k|u}probe_profile. Alternative, we could expose the nhit/nmisses
in /proc/self/fdinfo/<fd>. User can query this interface to
get numbers.

Arnaldo has a question on bcc mailing list about the hit/miss
counting of bpf program missed to process events.

https://lists.iovisor.org/g/iovisor-dev/message/1783

Comments?


> 
> Of course, then you get to implement it for tracepoints and software
> events too.
>
Peter Zijlstra Aug. 21, 2019, 6:31 p.m. UTC | #8
On Wed, Aug 21, 2019 at 04:54:47PM +0000, Yonghong Song wrote:
> Currently, in kernel/trace/bpf_trace.c, we have
> 
> unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
> {
>          unsigned int ret;
> 
>          if (in_nmi()) /* not supported yet */
>                  return 1;
> 
>          preempt_disable();
> 
>          if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {

Yes, I'm aware of that.

> In the above, the events with bpf program attached will be missed
> if the context is nmi interrupt, or if some recursion happens even with 
> the same or different bpf programs.
> In case of recursion, the events will not be sent to ring buffer.

And while that is significantly worse than what ftrace/perf have, it is
fundamentally the same thing.

perf allows (and iirc ftrace does too) 4 nested context per CPU
(task,softirq,irq,nmi) but any recursion within those context and we
drop stuff.

The BPF stuff is just more eager to drop things on the floor, but it is
fundamentally the same.

> A lot of bpf-based tracing programs uses maps to communicate and
> do not allocate ring buffer at all.

So extending PERF_RECORD_LOST doesn't work. But PERF_FORMAT_LOST might
still work fine; but you get to implement it for all software events.

> Maybe we can still use ioctl based approach which is light weighted
> compared to ring buffer approach? If a fd has bpf attached, nhit/nmisses
> means the kprobe is processed by bpf program or not.

There is nothing kprobe specific here. Kprobes just appear to be the
only one actually accounting the recursion cases, but everyone has
them.

> Currently, for debugfs, the nhit/nmisses info is exposed at
> {k|u}probe_profile. Alternative, we could expose the nhit/nmisses
> in /proc/self/fdinfo/<fd>. User can query this interface to
> get numbers.

No, we're not adding stuff to procfs for this.
Yonghong Song Aug. 21, 2019, 6:43 p.m. UTC | #9
On 8/21/19 11:31 AM, Peter Zijlstra wrote:
> On Wed, Aug 21, 2019 at 04:54:47PM +0000, Yonghong Song wrote:
>> Currently, in kernel/trace/bpf_trace.c, we have
>>
>> unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
>> {
>>           unsigned int ret;
>>
>>           if (in_nmi()) /* not supported yet */
>>                   return 1;
>>
>>           preempt_disable();
>>
>>           if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
> 
> Yes, I'm aware of that.
> 
>> In the above, the events with bpf program attached will be missed
>> if the context is nmi interrupt, or if some recursion happens even with
>> the same or different bpf programs.
>> In case of recursion, the events will not be sent to ring buffer.
> 
> And while that is significantly worse than what ftrace/perf have, it is
> fundamentally the same thing.
> 
> perf allows (and iirc ftrace does too) 4 nested context per CPU
> (task,softirq,irq,nmi) but any recursion within those context and we
> drop stuff.
> 
> The BPF stuff is just more eager to drop things on the floor, but it is
> fundamentally the same.
> 
>> A lot of bpf-based tracing programs uses maps to communicate and
>> do not allocate ring buffer at all.
> 
> So extending PERF_RECORD_LOST doesn't work. But PERF_FORMAT_LOST might
> still work fine; but you get to implement it for all software events.

Could you give more specifics about PERF_FORMAT_LOST? Googling 
"PERF_FORMAT_LOST" only yields two emails which we are discussing here :-(

> 
>> Maybe we can still use ioctl based approach which is light weighted
>> compared to ring buffer approach? If a fd has bpf attached, nhit/nmisses
>> means the kprobe is processed by bpf program or not.
> 
> There is nothing kprobe specific here. Kprobes just appear to be the
> only one actually accounting the recursion cases, but everyone has
> them.

Sorry to be specific, kprobe is just an example, I actually refers to 
any perf event where bpf can attach to, which theoretically are any
perf events which can be opened with "perf_event_open" syscall although 
some of them (e.g., software events?) may not have bpf running hooks yet.

> 
>> Currently, for debugfs, the nhit/nmisses info is exposed at
>> {k|u}probe_profile. Alternative, we could expose the nhit/nmisses
>> in /proc/self/fdinfo/<fd>. User can query this interface to
>> get numbers.
> 
> No, we're not adding stuff to procfs for this.

No problem. Just a suggestion.
Arnaldo Carvalho de Melo Aug. 21, 2019, 8:04 p.m. UTC | #10
Em Wed, Aug 21, 2019 at 06:43:49PM +0000, Yonghong Song escreveu:
> On 8/21/19 11:31 AM, Peter Zijlstra wrote:
> > On Wed, Aug 21, 2019 at 04:54:47PM +0000, Yonghong Song wrote:
> >> A lot of bpf-based tracing programs uses maps to communicate and
> >> do not allocate ring buffer at all.
> > 
> > So extending PERF_RECORD_LOST doesn't work. But PERF_FORMAT_LOST might
> > still work fine; but you get to implement it for all software events.
> 
> Could you give more specifics about PERF_FORMAT_LOST? Googling 
> "PERF_FORMAT_LOST" only yields two emails which we are discussing here :-(

Perhaps he's talking about using read(perf_event_fd, ...) after having set it
up with perf_event_attr.read_format with the-to-be-implemented
PERF_FORMAT_LOST bit?

Look at perf_read() and perf_read_one() in kernel/events/core.c.
 
- Arnaldo
Arnaldo Carvalho de Melo Aug. 21, 2019, 8:07 p.m. UTC | #11
Em Wed, Aug 21, 2019 at 04:54:47PM +0000, Yonghong Song escreveu:
> Arnaldo has a question on bcc mailing list about the hit/miss
> counting of bpf program missed to process events.
 
> https://lists.iovisor.org/g/iovisor-dev/message/1783

PERF_FORMAT_LOST seems to be a good answer to that? See my other reply
to this thread.

- Arnaldo
Yonghong Song Aug. 21, 2019, 10:10 p.m. UTC | #12
On 8/21/19 1:07 PM, Arnaldo Carvalho de Melo wrote:
> Em Wed, Aug 21, 2019 at 04:54:47PM +0000, Yonghong Song escreveu:
>> Arnaldo has a question on bcc mailing list about the hit/miss
>> counting of bpf program missed to process events.
>   
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.iovisor.org_g_iovisor-2Ddev_message_1783&d=DwIBAg&c=5VD0RTtNlTh3ycd41b3MUw&r=DA8e1B5r073vIqRrFz7MRA&m=Rrvq6K3mx2wYBCU6cSXLjJj8Xfb06oymxNZH8ysnlLA&s=IbEaX8v0OulmvKU-pmcAhWNmaHwXgaDd5auVFfRoyJg&e=
> 
> PERF_FORMAT_LOST seems to be a good answer to that? See my other reply
> to this thread.

Just checked. indeed adding PERF_FORMAT_LOST to perf read_format
seems a reasonable approach. ioctl with perf_event_open fd can do the 
same thing, but ioctl should be avoided if we have alternatives.

Thanks for the pointer!

> 
> - Arnaldo
>
Peter Zijlstra Aug. 22, 2019, 7:47 a.m. UTC | #13
On Wed, Aug 21, 2019 at 06:43:49PM +0000, Yonghong Song wrote:
> On 8/21/19 11:31 AM, Peter Zijlstra wrote:

> > So extending PERF_RECORD_LOST doesn't work. But PERF_FORMAT_LOST might
> > still work fine; but you get to implement it for all software events.
> 
> Could you give more specifics about PERF_FORMAT_LOST? Googling 
> "PERF_FORMAT_LOST" only yields two emails which we are discussing here :-(

Look at what the other PERF_FORMAT_ flags do? Basically it is adding a
field to the read(2) output.

> >> Maybe we can still use ioctl based approach which is light weighted
> >> compared to ring buffer approach? If a fd has bpf attached, nhit/nmisses
> >> means the kprobe is processed by bpf program or not.
> > 
> > There is nothing kprobe specific here. Kprobes just appear to be the
> > only one actually accounting the recursion cases, but everyone has
> > them.
> 
> Sorry to be specific, kprobe is just an example, I actually refers to 
> any perf event where bpf can attach to, which theoretically are any
> perf events which can be opened with "perf_event_open" syscall although 
> some of them (e.g., software events?) may not have bpf running hooks yet.

Yes, BPF is sucky that way.
Song Liu Aug. 22, 2019, 7:54 a.m. UTC | #14
Hi Peter, 

> On Aug 22, 2019, at 12:47 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> 
> On Wed, Aug 21, 2019 at 06:43:49PM +0000, Yonghong Song wrote:
>> On 8/21/19 11:31 AM, Peter Zijlstra wrote:
> 
>>> So extending PERF_RECORD_LOST doesn't work. But PERF_FORMAT_LOST might
>>> still work fine; but you get to implement it for all software events.
>> 
>> Could you give more specifics about PERF_FORMAT_LOST? Googling 
>> "PERF_FORMAT_LOST" only yields two emails which we are discussing here :-(
> 
> Look at what the other PERF_FORMAT_ flags do? Basically it is adding a
> field to the read(2) output.

Do we need to implement PERF_FORMAT_LOST for all software events? If user
space asks for PERF_FORMAT_LOST for events that do not support it, can we
just fail sys_perf_event_open()?

Thanks,
Song
Peter Zijlstra Aug. 22, 2019, 9:05 a.m. UTC | #15
On Thu, Aug 22, 2019 at 07:54:16AM +0000, Song Liu wrote:
> Hi Peter, 
> 
> > On Aug 22, 2019, at 12:47 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> > 
> > On Wed, Aug 21, 2019 at 06:43:49PM +0000, Yonghong Song wrote:
> >> On 8/21/19 11:31 AM, Peter Zijlstra wrote:
> > 
> >>> So extending PERF_RECORD_LOST doesn't work. But PERF_FORMAT_LOST might
> >>> still work fine; but you get to implement it for all software events.
> >> 
> >> Could you give more specifics about PERF_FORMAT_LOST? Googling 
> >> "PERF_FORMAT_LOST" only yields two emails which we are discussing here :-(
> > 
> > Look at what the other PERF_FORMAT_ flags do? Basically it is adding a
> > field to the read(2) output.
> 
> Do we need to implement PERF_FORMAT_LOST for all software events? If user
> space asks for PERF_FORMAT_LOST for events that do not support it, can we
> just fail sys_perf_event_open()?

It really shouldn't be hard; and I'm failing to see why kprobes are
special.
Daniel Xu Aug. 22, 2019, 9:08 p.m. UTC | #16
On Thu Aug 22, 2019 at 11:05 AM Peter Zijlstra wrote:
> On Thu, Aug 22, 2019 at 07:54:16AM +0000, Song Liu wrote:
> > Hi Peter, 
> > 
> > > On Aug 22, 2019, at 12:47 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> > > 
> > > On Wed, Aug 21, 2019 at 06:43:49PM +0000, Yonghong Song wrote:
> > >> On 8/21/19 11:31 AM, Peter Zijlstra wrote:
> > > 
> > >>> So extending PERF_RECORD_LOST doesn't work. But PERF_FORMAT_LOST might
> > >>> still work fine; but you get to implement it for all software events.
> > >> 
> > >> Could you give more specifics about PERF_FORMAT_LOST? Googling 
> > >> "PERF_FORMAT_LOST" only yields two emails which we are discussing here :-(
> > > 
> > > Look at what the other PERF_FORMAT_ flags do? Basically it is adding a
> > > field to the read(2) output.
> > 
> > Do we need to implement PERF_FORMAT_LOST for all software events? If user
> > space asks for PERF_FORMAT_LOST for events that do not support it, can we
> > just fail sys_perf_event_open()?
> 
> It really shouldn't be hard; and I'm failing to see why kprobes are
> special.

Thanks for the feedback, everyone. Really appreciate it.

I will look into extending read_format. I'll submit another patch series
after I get the code to work.

Daniel
diff mbox series

Patch

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 5150436783e8..61558f19696a 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -586,6 +586,12 @@  extern int bpf_get_kprobe_info(const struct perf_event *event,
 			       u32 *fd_type, const char **symbol,
 			       u64 *probe_offset, u64 *probe_addr,
 			       bool perf_type_tracepoint);
+extern int perf_kprobe_event_query(struct perf_event *event, void __user *info);
+#else
+int perf_kprobe_event_query(struct perf_event *event, void __user *info)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
 extern int  perf_uprobe_init(struct perf_event *event,
@@ -594,6 +600,12 @@  extern void perf_uprobe_destroy(struct perf_event *event);
 extern int bpf_get_uprobe_info(const struct perf_event *event,
 			       u32 *fd_type, const char **filename,
 			       u64 *probe_offset, bool perf_type_tracepoint);
+extern int perf_uprobe_event_query(struct perf_event *event, void __user *info);
+#else
+int perf_uprobe_event_query(struct perf_event *event, void __user *info)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 7198ddd0c6b1..8783d29a807a 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -447,6 +447,28 @@  struct perf_event_query_bpf {
 	__u32	ids[0];
 };
 
+/*
+ * Structure used by below PERF_EVENT_IOC_QUERY_PROBE command
+ * to query information about the probe attached to the perf
+ * event. Currently only supports [uk]probes.
+ */
+struct perf_event_query_probe {
+	/*
+	 * Size of structure for forward/backward compatibility
+	 */
+	__u64	size;
+	/*
+	 * Set by the kernel to indicate number of times this probe
+	 * was temporarily disabled
+	 */
+	__u64	nmissed;
+	/*
+	 * Set by the kernel to indicate number of times this probe
+	 * was hit
+	 */
+	__u64	nhit;
+};
+
 /*
  * Ioctls that can be done on a perf event fd:
  */
@@ -462,6 +484,7 @@  struct perf_event_query_bpf {
 #define PERF_EVENT_IOC_PAUSE_OUTPUT		_IOW('$', 9, __u32)
 #define PERF_EVENT_IOC_QUERY_BPF		_IOWR('$', 10, struct perf_event_query_bpf *)
 #define PERF_EVENT_IOC_MODIFY_ATTRIBUTES	_IOW('$', 11, struct perf_event_attr *)
+#define PERF_EVENT_IOC_QUERY_PROBE		_IOR('$', 12, struct perf_event_query_probe *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0463c1151bae..ed33d50511a3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5060,6 +5060,8 @@  static int perf_event_set_filter(struct perf_event *event, void __user *arg);
 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
 static int perf_copy_attr(struct perf_event_attr __user *uattr,
 			  struct perf_event_attr *attr);
+static int perf_probe_event_query(struct perf_event *event,
+				    void __user *info);
 
 static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
 {
@@ -5143,6 +5145,10 @@  static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 
 		return perf_event_modify_attr(event,  &new_attr);
 	}
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
+	case PERF_EVENT_IOC_QUERY_PROBE:
+		return perf_probe_event_query(event, (void __user *)arg);
+#endif
 	default:
 		return -ENOTTY;
 	}
@@ -8833,6 +8839,20 @@  static inline void perf_tp_register(void)
 #endif
 }
 
+static int perf_probe_event_query(struct perf_event *event,
+				    void __user *info)
+{
+#ifdef CONFIG_KPROBE_EVENTS
+	if (event->attr.type == perf_kprobe.type)
+		return perf_kprobe_event_query(event, (void __user *)info);
+#endif
+#ifdef CONFIG_UPROBE_EVENTS
+	if (event->attr.type == perf_uprobe.type)
+		return perf_uprobe_event_query(event, (void __user *)info);
+#endif
+	return -EINVAL;
+}
+
 static void perf_event_free_filter(struct perf_event *event)
 {
 	ftrace_profile_free_filter(event);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9d483ad9bb6c..89fbe3e97562 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -196,6 +196,30 @@  bool trace_kprobe_error_injectable(struct trace_event_call *call)
 	return within_error_injection_list(trace_kprobe_address(tk));
 }
 
+int perf_kprobe_event_query(struct perf_event *event, void __user *info)
+{
+	struct perf_event_query_probe __user *uquery = info;
+	struct perf_event_query_probe query = {};
+	struct trace_event_call *call = event->tp_event;
+	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+	u64 ncopy;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (copy_from_user(&query, uquery,
+			   offsetofend(struct perf_event_query_probe, size)))
+		return -EFAULT;
+
+	ncopy = min_t(u64, query.size, sizeof(query));
+	query.nhit = trace_kprobe_nhit(tk);
+	query.nmissed = tk->rp.kp.nmissed;
+
+	if (copy_to_user(uquery, &query, ncopy))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int register_kprobe_event(struct trace_kprobe *tk);
 static int unregister_kprobe_event(struct trace_kprobe *tk);
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 1ceedb9146b1..ecdf2bdb91a7 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1333,6 +1333,30 @@  static inline void init_trace_event_call(struct trace_uprobe *tu)
 	call->data = tu;
 }
 
+int perf_uprobe_event_query(struct perf_event *event, void __user *info)
+{
+	struct perf_event_query_probe __user *uquery = info;
+	struct perf_event_query_probe query = {};
+	struct trace_event_call *call = event->tp_event;
+	struct trace_uprobe *tu = (struct trace_uprobe *)call->data;
+	u64 ncopy;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (copy_from_user(&query, uquery,
+			   offsetofend(struct perf_event_query_probe, size)))
+		return -EFAULT;
+
+	ncopy = min_t(u64, query.size, sizeof(query));
+	query.nhit = tu->nhit;
+	query.nmissed = 0;
+
+	if (copy_to_user(uquery, &query, ncopy))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int register_uprobe_event(struct trace_uprobe *tu)
 {
 	init_trace_event_call(tu);