diff mbox series

[v3,01/14] perf/hw_breakpoint: Add KUnit test for constraints accounting

Message ID 20220704150514.48816-2-elver@google.com (mailing list archive)
State Handled Elsewhere
Headers show
Series perf/hw_breakpoint: Optimize for thousands of tasks | expand

Commit Message

Marco Elver July 4, 2022, 3:05 p.m. UTC
Add KUnit test for hw_breakpoint constraints accounting, with various
interesting mixes of breakpoint targets (some care was taken to catch
interesting corner cases via bug-injection).

The test cannot be built as a module because it requires access to
hw_breakpoint_slots(), which is not inlinable or exported on all
architectures.

Signed-off-by: Marco Elver <elver@google.com>
---
v3:
* Don't use raw_smp_processor_id().

v2:
* New patch.
---
 kernel/events/Makefile             |   1 +
 kernel/events/hw_breakpoint_test.c | 323 +++++++++++++++++++++++++++++
 lib/Kconfig.debug                  |  10 +
 3 files changed, 334 insertions(+)
 create mode 100644 kernel/events/hw_breakpoint_test.c

Comments

Dmitry Vyukov July 4, 2022, 3:10 p.m. UTC | #1
On Mon, 4 Jul 2022 at 17:06, Marco Elver <elver@google.com> wrote:
>
> Add KUnit test for hw_breakpoint constraints accounting, with various
> interesting mixes of breakpoint targets (some care was taken to catch
> interesting corner cases via bug-injection).
>
> The test cannot be built as a module because it requires access to
> hw_breakpoint_slots(), which is not inlinable or exported on all
> architectures.
>
> Signed-off-by: Marco Elver <elver@google.com>

Reviewed-by: Dmitry Vyukov <dvyukov@google.com>

> ---
> v3:
> * Don't use raw_smp_processor_id().
>
> v2:
> * New patch.
> ---
>  kernel/events/Makefile             |   1 +
>  kernel/events/hw_breakpoint_test.c | 323 +++++++++++++++++++++++++++++
>  lib/Kconfig.debug                  |  10 +
>  3 files changed, 334 insertions(+)
>  create mode 100644 kernel/events/hw_breakpoint_test.c
>
> diff --git a/kernel/events/Makefile b/kernel/events/Makefile
> index 8591c180b52b..91a62f566743 100644
> --- a/kernel/events/Makefile
> +++ b/kernel/events/Makefile
> @@ -2,4 +2,5 @@
>  obj-y := core.o ring_buffer.o callchain.o
>
>  obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
> +obj-$(CONFIG_HW_BREAKPOINT_KUNIT_TEST) += hw_breakpoint_test.o
>  obj-$(CONFIG_UPROBES) += uprobes.o
> diff --git a/kernel/events/hw_breakpoint_test.c b/kernel/events/hw_breakpoint_test.c
> new file mode 100644
> index 000000000000..433c5c45e2a5
> --- /dev/null
> +++ b/kernel/events/hw_breakpoint_test.c
> @@ -0,0 +1,323 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KUnit test for hw_breakpoint constraints accounting logic.
> + *
> + * Copyright (C) 2022, Google LLC.
> + */
> +
> +#include <kunit/test.h>
> +#include <linux/cpumask.h>
> +#include <linux/hw_breakpoint.h>
> +#include <linux/kthread.h>
> +#include <linux/perf_event.h>
> +#include <asm/hw_breakpoint.h>
> +
> +#define TEST_REQUIRES_BP_SLOTS(test, slots)                                            \
> +       do {                                                                            \
> +               if ((slots) > get_test_bp_slots()) {                                    \
> +                       kunit_skip((test), "Requires breakpoint slots: %d > %d", slots, \
> +                                  get_test_bp_slots());                                \
> +               }                                                                       \
> +       } while (0)
> +
> +#define TEST_EXPECT_NOSPC(expr) KUNIT_EXPECT_EQ(test, -ENOSPC, PTR_ERR(expr))
> +
> +#define MAX_TEST_BREAKPOINTS 512
> +
> +static char break_vars[MAX_TEST_BREAKPOINTS];
> +static struct perf_event *test_bps[MAX_TEST_BREAKPOINTS];
> +static struct task_struct *__other_task;
> +
> +static struct perf_event *register_test_bp(int cpu, struct task_struct *tsk, int idx)
> +{
> +       struct perf_event_attr attr = {};
> +
> +       if (WARN_ON(idx < 0 || idx >= MAX_TEST_BREAKPOINTS))
> +               return NULL;
> +
> +       hw_breakpoint_init(&attr);
> +       attr.bp_addr = (unsigned long)&break_vars[idx];
> +       attr.bp_len = HW_BREAKPOINT_LEN_1;
> +       attr.bp_type = HW_BREAKPOINT_RW;
> +       return perf_event_create_kernel_counter(&attr, cpu, tsk, NULL, NULL);
> +}
> +
> +static void unregister_test_bp(struct perf_event **bp)
> +{
> +       if (WARN_ON(IS_ERR(*bp)))
> +               return;
> +       if (WARN_ON(!*bp))
> +               return;
> +       unregister_hw_breakpoint(*bp);
> +       *bp = NULL;
> +}
> +
> +static int get_test_bp_slots(void)
> +{
> +       static int slots;
> +
> +       if (!slots)
> +               slots = hw_breakpoint_slots(TYPE_DATA);
> +
> +       return slots;
> +}
> +
> +static void fill_one_bp_slot(struct kunit *test, int *id, int cpu, struct task_struct *tsk)
> +{
> +       struct perf_event *bp = register_test_bp(cpu, tsk, *id);
> +
> +       KUNIT_ASSERT_NOT_NULL(test, bp);
> +       KUNIT_ASSERT_FALSE(test, IS_ERR(bp));
> +       KUNIT_ASSERT_NULL(test, test_bps[*id]);
> +       test_bps[(*id)++] = bp;
> +}
> +
> +/*
> + * Fills up the given @cpu/@tsk with breakpoints, only leaving @skip slots free.
> + *
> + * Returns true if this can be called again, continuing at @id.
> + */
> +static bool fill_bp_slots(struct kunit *test, int *id, int cpu, struct task_struct *tsk, int skip)
> +{
> +       for (int i = 0; i < get_test_bp_slots() - skip; ++i)
> +               fill_one_bp_slot(test, id, cpu, tsk);
> +
> +       return *id + get_test_bp_slots() <= MAX_TEST_BREAKPOINTS;
> +}
> +
> +static int dummy_kthread(void *arg)
> +{
> +       return 0;
> +}
> +
> +static struct task_struct *get_other_task(struct kunit *test)
> +{
> +       struct task_struct *tsk;
> +
> +       if (__other_task)
> +               return __other_task;
> +
> +       tsk = kthread_create(dummy_kthread, NULL, "hw_breakpoint_dummy_task");
> +       KUNIT_ASSERT_FALSE(test, IS_ERR(tsk));
> +       __other_task = tsk;
> +       return __other_task;
> +}
> +
> +static int get_test_cpu(int num)
> +{
> +       int cpu;
> +
> +       WARN_ON(num < 0);
> +
> +       for_each_online_cpu(cpu) {
> +               if (num-- <= 0)
> +                       break;
> +       }
> +
> +       return cpu;
> +}
> +
> +/* ===== Test cases ===== */
> +
> +static void test_one_cpu(struct kunit *test)
> +{
> +       int idx = 0;
> +
> +       fill_bp_slots(test, &idx, get_test_cpu(0), NULL, 0);
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +}
> +
> +static void test_many_cpus(struct kunit *test)
> +{
> +       int idx = 0;
> +       int cpu;
> +
> +       /* Test that CPUs are independent. */
> +       for_each_online_cpu(cpu) {
> +               bool do_continue = fill_bp_slots(test, &idx, cpu, NULL, 0);
> +
> +               TEST_EXPECT_NOSPC(register_test_bp(cpu, NULL, idx));
> +               if (!do_continue)
> +                       break;
> +       }
> +}
> +
> +static void test_one_task_on_all_cpus(struct kunit *test)
> +{
> +       int idx = 0;
> +
> +       fill_bp_slots(test, &idx, -1, current, 0);
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +       /* Remove one and adding back CPU-target should work. */
> +       unregister_test_bp(&test_bps[0]);
> +       fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> +}
> +
> +static void test_two_tasks_on_all_cpus(struct kunit *test)
> +{
> +       int idx = 0;
> +
> +       /* Test that tasks are independent. */
> +       fill_bp_slots(test, &idx, -1, current, 0);
> +       fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
> +
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +       /* Remove one from first task and adding back CPU-target should not work. */
> +       unregister_test_bp(&test_bps[0]);
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +}
> +
> +static void test_one_task_on_one_cpu(struct kunit *test)
> +{
> +       int idx = 0;
> +
> +       fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +       /*
> +        * Remove one and adding back CPU-target should work; this case is
> +        * special vs. above because the task's constraints are CPU-dependent.
> +        */
> +       unregister_test_bp(&test_bps[0]);
> +       fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> +}
> +
> +static void test_one_task_mixed(struct kunit *test)
> +{
> +       int idx = 0;
> +
> +       TEST_REQUIRES_BP_SLOTS(test, 3);
> +
> +       fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
> +       fill_bp_slots(test, &idx, -1, current, 1);
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +
> +       /* Transition from CPU-dependent pinned count to CPU-independent. */
> +       unregister_test_bp(&test_bps[0]);
> +       unregister_test_bp(&test_bps[1]);
> +       fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> +       fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +}
> +
> +static void test_two_tasks_on_one_cpu(struct kunit *test)
> +{
> +       int idx = 0;
> +
> +       fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
> +       fill_bp_slots(test, &idx, get_test_cpu(0), get_other_task(test), 0);
> +
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +       /* Can still create breakpoints on some other CPU. */
> +       fill_bp_slots(test, &idx, get_test_cpu(1), NULL, 0);
> +}
> +
> +static void test_two_tasks_on_one_all_cpus(struct kunit *test)
> +{
> +       int idx = 0;
> +
> +       fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
> +       fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
> +
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +       /* Cannot create breakpoints on some other CPU either. */
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
> +}
> +
> +static void test_task_on_all_and_one_cpu(struct kunit *test)
> +{
> +       int tsk_on_cpu_idx, cpu_idx;
> +       int idx = 0;
> +
> +       TEST_REQUIRES_BP_SLOTS(test, 3);
> +
> +       fill_bp_slots(test, &idx, -1, current, 2);
> +       /* Transitioning from only all CPU breakpoints to mixed. */
> +       tsk_on_cpu_idx = idx;
> +       fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
> +       fill_one_bp_slot(test, &idx, -1, current);
> +
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +
> +       /* We should still be able to use up another CPU's slots. */
> +       cpu_idx = idx;
> +       fill_one_bp_slot(test, &idx, get_test_cpu(1), NULL);
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
> +
> +       /* Transitioning back to task target on all CPUs. */
> +       unregister_test_bp(&test_bps[tsk_on_cpu_idx]);
> +       /* Still have a CPU target breakpoint in get_test_cpu(1). */
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       /* Remove it and try again. */
> +       unregister_test_bp(&test_bps[cpu_idx]);
> +       fill_one_bp_slot(test, &idx, -1, current);
> +
> +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
> +}
> +
> +static struct kunit_case hw_breakpoint_test_cases[] = {
> +       KUNIT_CASE(test_one_cpu),
> +       KUNIT_CASE(test_many_cpus),
> +       KUNIT_CASE(test_one_task_on_all_cpus),
> +       KUNIT_CASE(test_two_tasks_on_all_cpus),
> +       KUNIT_CASE(test_one_task_on_one_cpu),
> +       KUNIT_CASE(test_one_task_mixed),
> +       KUNIT_CASE(test_two_tasks_on_one_cpu),
> +       KUNIT_CASE(test_two_tasks_on_one_all_cpus),
> +       KUNIT_CASE(test_task_on_all_and_one_cpu),
> +       {},
> +};
> +
> +static int test_init(struct kunit *test)
> +{
> +       /* Most test cases want 2 distinct CPUs. */
> +       return num_online_cpus() < 2 ? -EINVAL : 0;
> +}
> +
> +static void test_exit(struct kunit *test)
> +{
> +       for (int i = 0; i < MAX_TEST_BREAKPOINTS; ++i) {
> +               if (test_bps[i])
> +                       unregister_test_bp(&test_bps[i]);
> +       }
> +
> +       if (__other_task) {
> +               kthread_stop(__other_task);
> +               __other_task = NULL;
> +       }
> +}
> +
> +static struct kunit_suite hw_breakpoint_test_suite = {
> +       .name = "hw_breakpoint",
> +       .test_cases = hw_breakpoint_test_cases,
> +       .init = test_init,
> +       .exit = test_exit,
> +};
> +
> +kunit_test_suites(&hw_breakpoint_test_suite);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Marco Elver <elver@google.com>");
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 2e24db4bff19..4c87a6edf046 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2513,6 +2513,16 @@ config STACKINIT_KUNIT_TEST
>           CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF,
>           or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL.
>
> +config HW_BREAKPOINT_KUNIT_TEST
> +       bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS
> +       depends on HAVE_HW_BREAKPOINT
> +       depends on KUNIT=y
> +       default KUNIT_ALL_TESTS
> +       help
> +         Tests for hw_breakpoint constraints accounting.
> +
> +         If unsure, say N.
> +
>  config TEST_UDELAY
>         tristate "udelay test driver"
>         help
> --
> 2.37.0.rc0.161.g10f37bed90-goog
>
Ian Rogers July 20, 2022, 3:22 p.m. UTC | #2
On Mon, Jul 4, 2022 at 8:11 AM Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Mon, 4 Jul 2022 at 17:06, Marco Elver <elver@google.com> wrote:
> >
> > Add KUnit test for hw_breakpoint constraints accounting, with various
> > interesting mixes of breakpoint targets (some care was taken to catch
> > interesting corner cases via bug-injection).
> >
> > The test cannot be built as a module because it requires access to
> > hw_breakpoint_slots(), which is not inlinable or exported on all
> > architectures.
> >
> > Signed-off-by: Marco Elver <elver@google.com>
>
> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>

Acked-by: Ian Rogers <irogers@google.com>

Thanks,
Ian

> > ---
> > v3:
> > * Don't use raw_smp_processor_id().
> >
> > v2:
> > * New patch.
> > ---
> >  kernel/events/Makefile             |   1 +
> >  kernel/events/hw_breakpoint_test.c | 323 +++++++++++++++++++++++++++++
> >  lib/Kconfig.debug                  |  10 +
> >  3 files changed, 334 insertions(+)
> >  create mode 100644 kernel/events/hw_breakpoint_test.c
> >
> > diff --git a/kernel/events/Makefile b/kernel/events/Makefile
> > index 8591c180b52b..91a62f566743 100644
> > --- a/kernel/events/Makefile
> > +++ b/kernel/events/Makefile
> > @@ -2,4 +2,5 @@
> >  obj-y := core.o ring_buffer.o callchain.o
> >
> >  obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
> > +obj-$(CONFIG_HW_BREAKPOINT_KUNIT_TEST) += hw_breakpoint_test.o
> >  obj-$(CONFIG_UPROBES) += uprobes.o
> > diff --git a/kernel/events/hw_breakpoint_test.c b/kernel/events/hw_breakpoint_test.c
> > new file mode 100644
> > index 000000000000..433c5c45e2a5
> > --- /dev/null
> > +++ b/kernel/events/hw_breakpoint_test.c
> > @@ -0,0 +1,323 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * KUnit test for hw_breakpoint constraints accounting logic.
> > + *
> > + * Copyright (C) 2022, Google LLC.
> > + */
> > +
> > +#include <kunit/test.h>
> > +#include <linux/cpumask.h>
> > +#include <linux/hw_breakpoint.h>
> > +#include <linux/kthread.h>
> > +#include <linux/perf_event.h>
> > +#include <asm/hw_breakpoint.h>
> > +
> > +#define TEST_REQUIRES_BP_SLOTS(test, slots)                                            \
> > +       do {                                                                            \
> > +               if ((slots) > get_test_bp_slots()) {                                    \
> > +                       kunit_skip((test), "Requires breakpoint slots: %d > %d", slots, \
> > +                                  get_test_bp_slots());                                \
> > +               }                                                                       \
> > +       } while (0)
> > +
> > +#define TEST_EXPECT_NOSPC(expr) KUNIT_EXPECT_EQ(test, -ENOSPC, PTR_ERR(expr))
> > +
> > +#define MAX_TEST_BREAKPOINTS 512
> > +
> > +static char break_vars[MAX_TEST_BREAKPOINTS];
> > +static struct perf_event *test_bps[MAX_TEST_BREAKPOINTS];
> > +static struct task_struct *__other_task;
> > +
> > +static struct perf_event *register_test_bp(int cpu, struct task_struct *tsk, int idx)
> > +{
> > +       struct perf_event_attr attr = {};
> > +
> > +       if (WARN_ON(idx < 0 || idx >= MAX_TEST_BREAKPOINTS))
> > +               return NULL;
> > +
> > +       hw_breakpoint_init(&attr);
> > +       attr.bp_addr = (unsigned long)&break_vars[idx];
> > +       attr.bp_len = HW_BREAKPOINT_LEN_1;
> > +       attr.bp_type = HW_BREAKPOINT_RW;
> > +       return perf_event_create_kernel_counter(&attr, cpu, tsk, NULL, NULL);
> > +}
> > +
> > +static void unregister_test_bp(struct perf_event **bp)
> > +{
> > +       if (WARN_ON(IS_ERR(*bp)))
> > +               return;
> > +       if (WARN_ON(!*bp))
> > +               return;
> > +       unregister_hw_breakpoint(*bp);
> > +       *bp = NULL;
> > +}
> > +
> > +static int get_test_bp_slots(void)
> > +{
> > +       static int slots;
> > +
> > +       if (!slots)
> > +               slots = hw_breakpoint_slots(TYPE_DATA);
> > +
> > +       return slots;
> > +}
> > +
> > +static void fill_one_bp_slot(struct kunit *test, int *id, int cpu, struct task_struct *tsk)
> > +{
> > +       struct perf_event *bp = register_test_bp(cpu, tsk, *id);
> > +
> > +       KUNIT_ASSERT_NOT_NULL(test, bp);
> > +       KUNIT_ASSERT_FALSE(test, IS_ERR(bp));
> > +       KUNIT_ASSERT_NULL(test, test_bps[*id]);
> > +       test_bps[(*id)++] = bp;
> > +}
> > +
> > +/*
> > + * Fills up the given @cpu/@tsk with breakpoints, only leaving @skip slots free.
> > + *
> > + * Returns true if this can be called again, continuing at @id.
> > + */
> > +static bool fill_bp_slots(struct kunit *test, int *id, int cpu, struct task_struct *tsk, int skip)
> > +{
> > +       for (int i = 0; i < get_test_bp_slots() - skip; ++i)
> > +               fill_one_bp_slot(test, id, cpu, tsk);
> > +
> > +       return *id + get_test_bp_slots() <= MAX_TEST_BREAKPOINTS;
> > +}
> > +
> > +static int dummy_kthread(void *arg)
> > +{
> > +       return 0;
> > +}
> > +
> > +static struct task_struct *get_other_task(struct kunit *test)
> > +{
> > +       struct task_struct *tsk;
> > +
> > +       if (__other_task)
> > +               return __other_task;
> > +
> > +       tsk = kthread_create(dummy_kthread, NULL, "hw_breakpoint_dummy_task");
> > +       KUNIT_ASSERT_FALSE(test, IS_ERR(tsk));
> > +       __other_task = tsk;
> > +       return __other_task;
> > +}
> > +
> > +static int get_test_cpu(int num)
> > +{
> > +       int cpu;
> > +
> > +       WARN_ON(num < 0);
> > +
> > +       for_each_online_cpu(cpu) {
> > +               if (num-- <= 0)
> > +                       break;
> > +       }
> > +
> > +       return cpu;
> > +}
> > +
> > +/* ===== Test cases ===== */
> > +
> > +static void test_one_cpu(struct kunit *test)
> > +{
> > +       int idx = 0;
> > +
> > +       fill_bp_slots(test, &idx, get_test_cpu(0), NULL, 0);
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +}
> > +
> > +static void test_many_cpus(struct kunit *test)
> > +{
> > +       int idx = 0;
> > +       int cpu;
> > +
> > +       /* Test that CPUs are independent. */
> > +       for_each_online_cpu(cpu) {
> > +               bool do_continue = fill_bp_slots(test, &idx, cpu, NULL, 0);
> > +
> > +               TEST_EXPECT_NOSPC(register_test_bp(cpu, NULL, idx));
> > +               if (!do_continue)
> > +                       break;
> > +       }
> > +}
> > +
> > +static void test_one_task_on_all_cpus(struct kunit *test)
> > +{
> > +       int idx = 0;
> > +
> > +       fill_bp_slots(test, &idx, -1, current, 0);
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +       /* Remove one and adding back CPU-target should work. */
> > +       unregister_test_bp(&test_bps[0]);
> > +       fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> > +}
> > +
> > +static void test_two_tasks_on_all_cpus(struct kunit *test)
> > +{
> > +       int idx = 0;
> > +
> > +       /* Test that tasks are independent. */
> > +       fill_bp_slots(test, &idx, -1, current, 0);
> > +       fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
> > +
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +       /* Remove one from first task and adding back CPU-target should not work. */
> > +       unregister_test_bp(&test_bps[0]);
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +}
> > +
> > +static void test_one_task_on_one_cpu(struct kunit *test)
> > +{
> > +       int idx = 0;
> > +
> > +       fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +       /*
> > +        * Remove one and adding back CPU-target should work; this case is
> > +        * special vs. above because the task's constraints are CPU-dependent.
> > +        */
> > +       unregister_test_bp(&test_bps[0]);
> > +       fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> > +}
> > +
> > +static void test_one_task_mixed(struct kunit *test)
> > +{
> > +       int idx = 0;
> > +
> > +       TEST_REQUIRES_BP_SLOTS(test, 3);
> > +
> > +       fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
> > +       fill_bp_slots(test, &idx, -1, current, 1);
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +
> > +       /* Transition from CPU-dependent pinned count to CPU-independent. */
> > +       unregister_test_bp(&test_bps[0]);
> > +       unregister_test_bp(&test_bps[1]);
> > +       fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> > +       fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +}
> > +
> > +static void test_two_tasks_on_one_cpu(struct kunit *test)
> > +{
> > +       int idx = 0;
> > +
> > +       fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
> > +       fill_bp_slots(test, &idx, get_test_cpu(0), get_other_task(test), 0);
> > +
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +       /* Can still create breakpoints on some other CPU. */
> > +       fill_bp_slots(test, &idx, get_test_cpu(1), NULL, 0);
> > +}
> > +
> > +static void test_two_tasks_on_one_all_cpus(struct kunit *test)
> > +{
> > +       int idx = 0;
> > +
> > +       fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
> > +       fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
> > +
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +       /* Cannot create breakpoints on some other CPU either. */
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
> > +}
> > +
> > +static void test_task_on_all_and_one_cpu(struct kunit *test)
> > +{
> > +       int tsk_on_cpu_idx, cpu_idx;
> > +       int idx = 0;
> > +
> > +       TEST_REQUIRES_BP_SLOTS(test, 3);
> > +
> > +       fill_bp_slots(test, &idx, -1, current, 2);
> > +       /* Transitioning from only all CPU breakpoints to mixed. */
> > +       tsk_on_cpu_idx = idx;
> > +       fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
> > +       fill_one_bp_slot(test, &idx, -1, current);
> > +
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +
> > +       /* We should still be able to use up another CPU's slots. */
> > +       cpu_idx = idx;
> > +       fill_one_bp_slot(test, &idx, get_test_cpu(1), NULL);
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
> > +
> > +       /* Transitioning back to task target on all CPUs. */
> > +       unregister_test_bp(&test_bps[tsk_on_cpu_idx]);
> > +       /* Still have a CPU target breakpoint in get_test_cpu(1). */
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       /* Remove it and try again. */
> > +       unregister_test_bp(&test_bps[cpu_idx]);
> > +       fill_one_bp_slot(test, &idx, -1, current);
> > +
> > +       TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> > +       TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
> > +}
> > +
> > +static struct kunit_case hw_breakpoint_test_cases[] = {
> > +       KUNIT_CASE(test_one_cpu),
> > +       KUNIT_CASE(test_many_cpus),
> > +       KUNIT_CASE(test_one_task_on_all_cpus),
> > +       KUNIT_CASE(test_two_tasks_on_all_cpus),
> > +       KUNIT_CASE(test_one_task_on_one_cpu),
> > +       KUNIT_CASE(test_one_task_mixed),
> > +       KUNIT_CASE(test_two_tasks_on_one_cpu),
> > +       KUNIT_CASE(test_two_tasks_on_one_all_cpus),
> > +       KUNIT_CASE(test_task_on_all_and_one_cpu),
> > +       {},
> > +};
> > +
> > +static int test_init(struct kunit *test)
> > +{
> > +       /* Most test cases want 2 distinct CPUs. */
> > +       return num_online_cpus() < 2 ? -EINVAL : 0;
> > +}
> > +
> > +static void test_exit(struct kunit *test)
> > +{
> > +       for (int i = 0; i < MAX_TEST_BREAKPOINTS; ++i) {
> > +               if (test_bps[i])
> > +                       unregister_test_bp(&test_bps[i]);
> > +       }
> > +
> > +       if (__other_task) {
> > +               kthread_stop(__other_task);
> > +               __other_task = NULL;
> > +       }
> > +}
> > +
> > +static struct kunit_suite hw_breakpoint_test_suite = {
> > +       .name = "hw_breakpoint",
> > +       .test_cases = hw_breakpoint_test_cases,
> > +       .init = test_init,
> > +       .exit = test_exit,
> > +};
> > +
> > +kunit_test_suites(&hw_breakpoint_test_suite);
> > +
> > +MODULE_LICENSE("GPL");
> > +MODULE_AUTHOR("Marco Elver <elver@google.com>");
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index 2e24db4bff19..4c87a6edf046 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -2513,6 +2513,16 @@ config STACKINIT_KUNIT_TEST
> >           CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF,
> >           or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL.
> >
> > +config HW_BREAKPOINT_KUNIT_TEST
> > +       bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS
> > +       depends on HAVE_HW_BREAKPOINT
> > +       depends on KUNIT=y
> > +       default KUNIT_ALL_TESTS
> > +       help
> > +         Tests for hw_breakpoint constraints accounting.
> > +
> > +         If unsure, say N.
> > +
> >  config TEST_UDELAY
> >         tristate "udelay test driver"
> >         help
> > --
> > 2.37.0.rc0.161.g10f37bed90-goog
> >
Mark Rutland July 21, 2022, 4:22 p.m. UTC | #3
Hi Marco,

[adding Will]

On Mon, Jul 04, 2022 at 05:05:01PM +0200, Marco Elver wrote:
> Add KUnit test for hw_breakpoint constraints accounting, with various
> interesting mixes of breakpoint targets (some care was taken to catch
> interesting corner cases via bug-injection).
> 
> The test cannot be built as a module because it requires access to
> hw_breakpoint_slots(), which is not inlinable or exported on all
> architectures.
> 
> Signed-off-by: Marco Elver <elver@google.com>

As mentioned on IRC, I'm seeing these tests fail on arm64 when applied atop
v5.19-rc7:

| TAP version 14
| 1..1
|     # Subtest: hw_breakpoint
|     1..9
|     ok 1 - test_one_cpu
|     ok 2 - test_many_cpus
|     # test_one_task_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
|     Expected IS_ERR(bp) to be false, but is true
|     not ok 3 - test_one_task_on_all_cpus
|     # test_two_tasks_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
|     Expected IS_ERR(bp) to be false, but is true
|     not ok 4 - test_two_tasks_on_all_cpus
|     # test_one_task_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
|     Expected IS_ERR(bp) to be false, but is true
|     not ok 5 - test_one_task_on_one_cpu
|     # test_one_task_mixed: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
|     Expected IS_ERR(bp) to be false, but is true
|     not ok 6 - test_one_task_mixed
|     # test_two_tasks_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
|     Expected IS_ERR(bp) to be false, but is true
|     not ok 7 - test_two_tasks_on_one_cpu
|     # test_two_tasks_on_one_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
|     Expected IS_ERR(bp) to be false, but is true
|     not ok 8 - test_two_tasks_on_one_all_cpus
|     # test_task_on_all_and_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
|     Expected IS_ERR(bp) to be false, but is true
|     not ok 9 - test_task_on_all_and_one_cpu
| # hw_breakpoint: pass:2 fail:7 skip:0 total:9
| # Totals: pass:2 fail:7 skip:0 total:9

... which seems to be becasue arm64 currently forbids per-task
breakpoints/watchpoints in hw_breakpoint_arch_parse(), where we have:

        /*
         * Disallow per-task kernel breakpoints since these would
         * complicate the stepping code.
         */
        if (hw->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
                return -EINVAL;

... which has been the case since day one in commit:

  478fcb2cdb2351dc ("arm64: Debugging support")

I'm not immediately sure what would be necessary to support per-task kernel
breakpoints, but given a lot of that state is currently per-cpu, I imagine it's
invasive.

Mark.

> ---
> v3:
> * Don't use raw_smp_processor_id().
> 
> v2:
> * New patch.
> ---
>  kernel/events/Makefile             |   1 +
>  kernel/events/hw_breakpoint_test.c | 323 +++++++++++++++++++++++++++++
>  lib/Kconfig.debug                  |  10 +
>  3 files changed, 334 insertions(+)
>  create mode 100644 kernel/events/hw_breakpoint_test.c
> 
> diff --git a/kernel/events/Makefile b/kernel/events/Makefile
> index 8591c180b52b..91a62f566743 100644
> --- a/kernel/events/Makefile
> +++ b/kernel/events/Makefile
> @@ -2,4 +2,5 @@
>  obj-y := core.o ring_buffer.o callchain.o
>  
>  obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
> +obj-$(CONFIG_HW_BREAKPOINT_KUNIT_TEST) += hw_breakpoint_test.o
>  obj-$(CONFIG_UPROBES) += uprobes.o
> diff --git a/kernel/events/hw_breakpoint_test.c b/kernel/events/hw_breakpoint_test.c
> new file mode 100644
> index 000000000000..433c5c45e2a5
> --- /dev/null
> +++ b/kernel/events/hw_breakpoint_test.c
> @@ -0,0 +1,323 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KUnit test for hw_breakpoint constraints accounting logic.
> + *
> + * Copyright (C) 2022, Google LLC.
> + */
> +
> +#include <kunit/test.h>
> +#include <linux/cpumask.h>
> +#include <linux/hw_breakpoint.h>
> +#include <linux/kthread.h>
> +#include <linux/perf_event.h>
> +#include <asm/hw_breakpoint.h>
> +
> +#define TEST_REQUIRES_BP_SLOTS(test, slots)						\
> +	do {										\
> +		if ((slots) > get_test_bp_slots()) {					\
> +			kunit_skip((test), "Requires breakpoint slots: %d > %d", slots,	\
> +				   get_test_bp_slots());				\
> +		}									\
> +	} while (0)
> +
> +#define TEST_EXPECT_NOSPC(expr) KUNIT_EXPECT_EQ(test, -ENOSPC, PTR_ERR(expr))
> +
> +#define MAX_TEST_BREAKPOINTS 512
> +
> +static char break_vars[MAX_TEST_BREAKPOINTS];
> +static struct perf_event *test_bps[MAX_TEST_BREAKPOINTS];
> +static struct task_struct *__other_task;
> +
> +static struct perf_event *register_test_bp(int cpu, struct task_struct *tsk, int idx)
> +{
> +	struct perf_event_attr attr = {};
> +
> +	if (WARN_ON(idx < 0 || idx >= MAX_TEST_BREAKPOINTS))
> +		return NULL;
> +
> +	hw_breakpoint_init(&attr);
> +	attr.bp_addr = (unsigned long)&break_vars[idx];
> +	attr.bp_len = HW_BREAKPOINT_LEN_1;
> +	attr.bp_type = HW_BREAKPOINT_RW;
> +	return perf_event_create_kernel_counter(&attr, cpu, tsk, NULL, NULL);
> +}
> +
> +static void unregister_test_bp(struct perf_event **bp)
> +{
> +	if (WARN_ON(IS_ERR(*bp)))
> +		return;
> +	if (WARN_ON(!*bp))
> +		return;
> +	unregister_hw_breakpoint(*bp);
> +	*bp = NULL;
> +}
> +
> +static int get_test_bp_slots(void)
> +{
> +	static int slots;
> +
> +	if (!slots)
> +		slots = hw_breakpoint_slots(TYPE_DATA);
> +
> +	return slots;
> +}
> +
> +static void fill_one_bp_slot(struct kunit *test, int *id, int cpu, struct task_struct *tsk)
> +{
> +	struct perf_event *bp = register_test_bp(cpu, tsk, *id);
> +
> +	KUNIT_ASSERT_NOT_NULL(test, bp);
> +	KUNIT_ASSERT_FALSE(test, IS_ERR(bp));
> +	KUNIT_ASSERT_NULL(test, test_bps[*id]);
> +	test_bps[(*id)++] = bp;
> +}
> +
> +/*
> + * Fills up the given @cpu/@tsk with breakpoints, only leaving @skip slots free.
> + *
> + * Returns true if this can be called again, continuing at @id.
> + */
> +static bool fill_bp_slots(struct kunit *test, int *id, int cpu, struct task_struct *tsk, int skip)
> +{
> +	for (int i = 0; i < get_test_bp_slots() - skip; ++i)
> +		fill_one_bp_slot(test, id, cpu, tsk);
> +
> +	return *id + get_test_bp_slots() <= MAX_TEST_BREAKPOINTS;
> +}
> +
> +static int dummy_kthread(void *arg)
> +{
> +	return 0;
> +}
> +
> +static struct task_struct *get_other_task(struct kunit *test)
> +{
> +	struct task_struct *tsk;
> +
> +	if (__other_task)
> +		return __other_task;
> +
> +	tsk = kthread_create(dummy_kthread, NULL, "hw_breakpoint_dummy_task");
> +	KUNIT_ASSERT_FALSE(test, IS_ERR(tsk));
> +	__other_task = tsk;
> +	return __other_task;
> +}
> +
> +static int get_test_cpu(int num)
> +{
> +	int cpu;
> +
> +	WARN_ON(num < 0);
> +
> +	for_each_online_cpu(cpu) {
> +		if (num-- <= 0)
> +			break;
> +	}
> +
> +	return cpu;
> +}
> +
> +/* ===== Test cases ===== */
> +
> +static void test_one_cpu(struct kunit *test)
> +{
> +	int idx = 0;
> +
> +	fill_bp_slots(test, &idx, get_test_cpu(0), NULL, 0);
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +}
> +
> +static void test_many_cpus(struct kunit *test)
> +{
> +	int idx = 0;
> +	int cpu;
> +
> +	/* Test that CPUs are independent. */
> +	for_each_online_cpu(cpu) {
> +		bool do_continue = fill_bp_slots(test, &idx, cpu, NULL, 0);
> +
> +		TEST_EXPECT_NOSPC(register_test_bp(cpu, NULL, idx));
> +		if (!do_continue)
> +			break;
> +	}
> +}
> +
> +static void test_one_task_on_all_cpus(struct kunit *test)
> +{
> +	int idx = 0;
> +
> +	fill_bp_slots(test, &idx, -1, current, 0);
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +	/* Remove one and adding back CPU-target should work. */
> +	unregister_test_bp(&test_bps[0]);
> +	fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> +}
> +
> +static void test_two_tasks_on_all_cpus(struct kunit *test)
> +{
> +	int idx = 0;
> +
> +	/* Test that tasks are independent. */
> +	fill_bp_slots(test, &idx, -1, current, 0);
> +	fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
> +
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +	/* Remove one from first task and adding back CPU-target should not work. */
> +	unregister_test_bp(&test_bps[0]);
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +}
> +
> +static void test_one_task_on_one_cpu(struct kunit *test)
> +{
> +	int idx = 0;
> +
> +	fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +	/*
> +	 * Remove one and adding back CPU-target should work; this case is
> +	 * special vs. above because the task's constraints are CPU-dependent.
> +	 */
> +	unregister_test_bp(&test_bps[0]);
> +	fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> +}
> +
> +static void test_one_task_mixed(struct kunit *test)
> +{
> +	int idx = 0;
> +
> +	TEST_REQUIRES_BP_SLOTS(test, 3);
> +
> +	fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
> +	fill_bp_slots(test, &idx, -1, current, 1);
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +
> +	/* Transition from CPU-dependent pinned count to CPU-independent. */
> +	unregister_test_bp(&test_bps[0]);
> +	unregister_test_bp(&test_bps[1]);
> +	fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> +	fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +}
> +
> +static void test_two_tasks_on_one_cpu(struct kunit *test)
> +{
> +	int idx = 0;
> +
> +	fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
> +	fill_bp_slots(test, &idx, get_test_cpu(0), get_other_task(test), 0);
> +
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +	/* Can still create breakpoints on some other CPU. */
> +	fill_bp_slots(test, &idx, get_test_cpu(1), NULL, 0);
> +}
> +
> +static void test_two_tasks_on_one_all_cpus(struct kunit *test)
> +{
> +	int idx = 0;
> +
> +	fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
> +	fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
> +
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +	/* Cannot create breakpoints on some other CPU either. */
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
> +}
> +
> +static void test_task_on_all_and_one_cpu(struct kunit *test)
> +{
> +	int tsk_on_cpu_idx, cpu_idx;
> +	int idx = 0;
> +
> +	TEST_REQUIRES_BP_SLOTS(test, 3);
> +
> +	fill_bp_slots(test, &idx, -1, current, 2);
> +	/* Transitioning from only all CPU breakpoints to mixed. */
> +	tsk_on_cpu_idx = idx;
> +	fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
> +	fill_one_bp_slot(test, &idx, -1, current);
> +
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +
> +	/* We should still be able to use up another CPU's slots. */
> +	cpu_idx = idx;
> +	fill_one_bp_slot(test, &idx, get_test_cpu(1), NULL);
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
> +
> +	/* Transitioning back to task target on all CPUs. */
> +	unregister_test_bp(&test_bps[tsk_on_cpu_idx]);
> +	/* Still have a CPU target breakpoint in get_test_cpu(1). */
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	/* Remove it and try again. */
> +	unregister_test_bp(&test_bps[cpu_idx]);
> +	fill_one_bp_slot(test, &idx, -1, current);
> +
> +	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
> +	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
> +}
> +
> +static struct kunit_case hw_breakpoint_test_cases[] = {
> +	KUNIT_CASE(test_one_cpu),
> +	KUNIT_CASE(test_many_cpus),
> +	KUNIT_CASE(test_one_task_on_all_cpus),
> +	KUNIT_CASE(test_two_tasks_on_all_cpus),
> +	KUNIT_CASE(test_one_task_on_one_cpu),
> +	KUNIT_CASE(test_one_task_mixed),
> +	KUNIT_CASE(test_two_tasks_on_one_cpu),
> +	KUNIT_CASE(test_two_tasks_on_one_all_cpus),
> +	KUNIT_CASE(test_task_on_all_and_one_cpu),
> +	{},
> +};
> +
> +static int test_init(struct kunit *test)
> +{
> +	/* Most test cases want 2 distinct CPUs. */
> +	return num_online_cpus() < 2 ? -EINVAL : 0;
> +}
> +
> +static void test_exit(struct kunit *test)
> +{
> +	for (int i = 0; i < MAX_TEST_BREAKPOINTS; ++i) {
> +		if (test_bps[i])
> +			unregister_test_bp(&test_bps[i]);
> +	}
> +
> +	if (__other_task) {
> +		kthread_stop(__other_task);
> +		__other_task = NULL;
> +	}
> +}
> +
> +static struct kunit_suite hw_breakpoint_test_suite = {
> +	.name = "hw_breakpoint",
> +	.test_cases = hw_breakpoint_test_cases,
> +	.init = test_init,
> +	.exit = test_exit,
> +};
> +
> +kunit_test_suites(&hw_breakpoint_test_suite);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Marco Elver <elver@google.com>");
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 2e24db4bff19..4c87a6edf046 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2513,6 +2513,16 @@ config STACKINIT_KUNIT_TEST
>  	  CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF,
>  	  or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL.
>  
> +config HW_BREAKPOINT_KUNIT_TEST
> +	bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS
> +	depends on HAVE_HW_BREAKPOINT
> +	depends on KUNIT=y
> +	default KUNIT_ALL_TESTS
> +	help
> +	  Tests for hw_breakpoint constraints accounting.
> +
> +	  If unsure, say N.
> +
>  config TEST_UDELAY
>  	tristate "udelay test driver"
>  	help
> -- 
> 2.37.0.rc0.161.g10f37bed90-goog
>
Will Deacon July 22, 2022, 9:10 a.m. UTC | #4
On Thu, Jul 21, 2022 at 05:22:07PM +0100, Mark Rutland wrote:
> Hi Marco,
> 
> [adding Will]
> 
> On Mon, Jul 04, 2022 at 05:05:01PM +0200, Marco Elver wrote:
> > Add KUnit test for hw_breakpoint constraints accounting, with various
> > interesting mixes of breakpoint targets (some care was taken to catch
> > interesting corner cases via bug-injection).
> > 
> > The test cannot be built as a module because it requires access to
> > hw_breakpoint_slots(), which is not inlinable or exported on all
> > architectures.
> > 
> > Signed-off-by: Marco Elver <elver@google.com>
> 
> As mentioned on IRC, I'm seeing these tests fail on arm64 when applied atop
> v5.19-rc7:
> 
> | TAP version 14
> | 1..1
> |     # Subtest: hw_breakpoint
> |     1..9
> |     ok 1 - test_one_cpu
> |     ok 2 - test_many_cpus
> |     # test_one_task_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 3 - test_one_task_on_all_cpus
> |     # test_two_tasks_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 4 - test_two_tasks_on_all_cpus
> |     # test_one_task_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 5 - test_one_task_on_one_cpu
> |     # test_one_task_mixed: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 6 - test_one_task_mixed
> |     # test_two_tasks_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 7 - test_two_tasks_on_one_cpu
> |     # test_two_tasks_on_one_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 8 - test_two_tasks_on_one_all_cpus
> |     # test_task_on_all_and_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 9 - test_task_on_all_and_one_cpu
> | # hw_breakpoint: pass:2 fail:7 skip:0 total:9
> | # Totals: pass:2 fail:7 skip:0 total:9
> 
> ... which seems to be becasue arm64 currently forbids per-task
> breakpoints/watchpoints in hw_breakpoint_arch_parse(), where we have:
> 
>         /*
>          * Disallow per-task kernel breakpoints since these would
>          * complicate the stepping code.
>          */
>         if (hw->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
>                 return -EINVAL;
> 
> ... which has been the case since day one in commit:
> 
>   478fcb2cdb2351dc ("arm64: Debugging support")
> 
> I'm not immediately sure what would be necessary to support per-task kernel
> breakpoints, but given a lot of that state is currently per-cpu, I imagine it's
> invasive.

I would actually like to remove HW_BREAKPOINT completely for arm64 as it
doesn't really work and causes problems for other interfaces such as ptrace
and kgdb.

Will
Dmitry Vyukov July 22, 2022, 9:20 a.m. UTC | #5
On Fri, 22 Jul 2022 at 11:10, Will Deacon <will@kernel.org> wrote:
> > [adding Will]
> >
> > On Mon, Jul 04, 2022 at 05:05:01PM +0200, Marco Elver wrote:
> > > Add KUnit test for hw_breakpoint constraints accounting, with various
> > > interesting mixes of breakpoint targets (some care was taken to catch
> > > interesting corner cases via bug-injection).
> > >
> > > The test cannot be built as a module because it requires access to
> > > hw_breakpoint_slots(), which is not inlinable or exported on all
> > > architectures.
> > >
> > > Signed-off-by: Marco Elver <elver@google.com>
> >
> > As mentioned on IRC, I'm seeing these tests fail on arm64 when applied atop
> > v5.19-rc7:
> >
> > | TAP version 14
> > | 1..1
> > |     # Subtest: hw_breakpoint
> > |     1..9
> > |     ok 1 - test_one_cpu
> > |     ok 2 - test_many_cpus
> > |     # test_one_task_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > |     Expected IS_ERR(bp) to be false, but is true
> > |     not ok 3 - test_one_task_on_all_cpus
> > |     # test_two_tasks_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > |     Expected IS_ERR(bp) to be false, but is true
> > |     not ok 4 - test_two_tasks_on_all_cpus
> > |     # test_one_task_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > |     Expected IS_ERR(bp) to be false, but is true
> > |     not ok 5 - test_one_task_on_one_cpu
> > |     # test_one_task_mixed: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > |     Expected IS_ERR(bp) to be false, but is true
> > |     not ok 6 - test_one_task_mixed
> > |     # test_two_tasks_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > |     Expected IS_ERR(bp) to be false, but is true
> > |     not ok 7 - test_two_tasks_on_one_cpu
> > |     # test_two_tasks_on_one_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > |     Expected IS_ERR(bp) to be false, but is true
> > |     not ok 8 - test_two_tasks_on_one_all_cpus
> > |     # test_task_on_all_and_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > |     Expected IS_ERR(bp) to be false, but is true
> > |     not ok 9 - test_task_on_all_and_one_cpu
> > | # hw_breakpoint: pass:2 fail:7 skip:0 total:9
> > | # Totals: pass:2 fail:7 skip:0 total:9
> >
> > ... which seems to be becasue arm64 currently forbids per-task
> > breakpoints/watchpoints in hw_breakpoint_arch_parse(), where we have:
> >
> >         /*
> >          * Disallow per-task kernel breakpoints since these would
> >          * complicate the stepping code.
> >          */
> >         if (hw->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
> >                 return -EINVAL;
> >
> > ... which has been the case since day one in commit:
> >
> >   478fcb2cdb2351dc ("arm64: Debugging support")
> >
> > I'm not immediately sure what would be necessary to support per-task kernel
> > breakpoints, but given a lot of that state is currently per-cpu, I imagine it's
> > invasive.
>
> I would actually like to remove HW_BREAKPOINT completely for arm64 as it
> doesn't really work and causes problems for other interfaces such as ptrace
> and kgdb.

Will it be a localized removal of code that will be easy to revert in
future? Or will it touch lots of code here and there?
Let's say we come up with a very important use case for HW_BREAKPOINT
and will need to make it work on arm64 as well in future.
Will Deacon July 22, 2022, 10:10 a.m. UTC | #6
On Fri, Jul 22, 2022 at 11:20:25AM +0200, Dmitry Vyukov wrote:
> On Fri, 22 Jul 2022 at 11:10, Will Deacon <will@kernel.org> wrote:
> > > [adding Will]
> > >
> > > On Mon, Jul 04, 2022 at 05:05:01PM +0200, Marco Elver wrote:
> > > > Add KUnit test for hw_breakpoint constraints accounting, with various
> > > > interesting mixes of breakpoint targets (some care was taken to catch
> > > > interesting corner cases via bug-injection).
> > > >
> > > > The test cannot be built as a module because it requires access to
> > > > hw_breakpoint_slots(), which is not inlinable or exported on all
> > > > architectures.
> > > >
> > > > Signed-off-by: Marco Elver <elver@google.com>
> > >
> > > As mentioned on IRC, I'm seeing these tests fail on arm64 when applied atop
> > > v5.19-rc7:
> > >
> > > | TAP version 14
> > > | 1..1
> > > |     # Subtest: hw_breakpoint
> > > |     1..9
> > > |     ok 1 - test_one_cpu
> > > |     ok 2 - test_many_cpus
> > > |     # test_one_task_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > |     Expected IS_ERR(bp) to be false, but is true
> > > |     not ok 3 - test_one_task_on_all_cpus
> > > |     # test_two_tasks_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > |     Expected IS_ERR(bp) to be false, but is true
> > > |     not ok 4 - test_two_tasks_on_all_cpus
> > > |     # test_one_task_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > |     Expected IS_ERR(bp) to be false, but is true
> > > |     not ok 5 - test_one_task_on_one_cpu
> > > |     # test_one_task_mixed: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > |     Expected IS_ERR(bp) to be false, but is true
> > > |     not ok 6 - test_one_task_mixed
> > > |     # test_two_tasks_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > |     Expected IS_ERR(bp) to be false, but is true
> > > |     not ok 7 - test_two_tasks_on_one_cpu
> > > |     # test_two_tasks_on_one_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > |     Expected IS_ERR(bp) to be false, but is true
> > > |     not ok 8 - test_two_tasks_on_one_all_cpus
> > > |     # test_task_on_all_and_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > |     Expected IS_ERR(bp) to be false, but is true
> > > |     not ok 9 - test_task_on_all_and_one_cpu
> > > | # hw_breakpoint: pass:2 fail:7 skip:0 total:9
> > > | # Totals: pass:2 fail:7 skip:0 total:9
> > >
> > > ... which seems to be becasue arm64 currently forbids per-task
> > > breakpoints/watchpoints in hw_breakpoint_arch_parse(), where we have:
> > >
> > >         /*
> > >          * Disallow per-task kernel breakpoints since these would
> > >          * complicate the stepping code.
> > >          */
> > >         if (hw->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
> > >                 return -EINVAL;
> > >
> > > ... which has been the case since day one in commit:
> > >
> > >   478fcb2cdb2351dc ("arm64: Debugging support")
> > >
> > > I'm not immediately sure what would be necessary to support per-task kernel
> > > breakpoints, but given a lot of that state is currently per-cpu, I imagine it's
> > > invasive.
> >
> > I would actually like to remove HW_BREAKPOINT completely for arm64 as it
> > doesn't really work and causes problems for other interfaces such as ptrace
> > and kgdb.
> 
> Will it be a localized removal of code that will be easy to revert in
> future? Or will it touch lots of code here and there?
> Let's say we come up with a very important use case for HW_BREAKPOINT
> and will need to make it work on arm64 as well in future.

My (rough) plan is to implement a lower-level abstraction for handling the
underlying hardware resources, so we can layer consumers on top of that
instead of funneling through hw_breakpoint. So if we figure out how to make
bits of hw_breakpoint work on arm64, then it should just go on top.

The main pain point for hw_breakpoint is kernel-side {break,watch}points
and I think there are open design questions about how they should work
on arm64, particularly when considering the interaction with user
watchpoints triggering on uaccess routines and the possibility of hitting
a kernel watchpoint in irq context.

Will
Dmitry Vyukov July 22, 2022, 10:31 a.m. UTC | #7
On Fri, 22 Jul 2022 at 12:11, Will Deacon <will@kernel.org> wrote:
> > > > [adding Will]
> > > >
> > > > On Mon, Jul 04, 2022 at 05:05:01PM +0200, Marco Elver wrote:
> > > > > Add KUnit test for hw_breakpoint constraints accounting, with various
> > > > > interesting mixes of breakpoint targets (some care was taken to catch
> > > > > interesting corner cases via bug-injection).
> > > > >
> > > > > The test cannot be built as a module because it requires access to
> > > > > hw_breakpoint_slots(), which is not inlinable or exported on all
> > > > > architectures.
> > > > >
> > > > > Signed-off-by: Marco Elver <elver@google.com>
> > > >
> > > > As mentioned on IRC, I'm seeing these tests fail on arm64 when applied atop
> > > > v5.19-rc7:
> > > >
> > > > | TAP version 14
> > > > | 1..1
> > > > |     # Subtest: hw_breakpoint
> > > > |     1..9
> > > > |     ok 1 - test_one_cpu
> > > > |     ok 2 - test_many_cpus
> > > > |     # test_one_task_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > > |     Expected IS_ERR(bp) to be false, but is true
> > > > |     not ok 3 - test_one_task_on_all_cpus
> > > > |     # test_two_tasks_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > > |     Expected IS_ERR(bp) to be false, but is true
> > > > |     not ok 4 - test_two_tasks_on_all_cpus
> > > > |     # test_one_task_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > > |     Expected IS_ERR(bp) to be false, but is true
> > > > |     not ok 5 - test_one_task_on_one_cpu
> > > > |     # test_one_task_mixed: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > > |     Expected IS_ERR(bp) to be false, but is true
> > > > |     not ok 6 - test_one_task_mixed
> > > > |     # test_two_tasks_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > > |     Expected IS_ERR(bp) to be false, but is true
> > > > |     not ok 7 - test_two_tasks_on_one_cpu
> > > > |     # test_two_tasks_on_one_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > > |     Expected IS_ERR(bp) to be false, but is true
> > > > |     not ok 8 - test_two_tasks_on_one_all_cpus
> > > > |     # test_task_on_all_and_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> > > > |     Expected IS_ERR(bp) to be false, but is true
> > > > |     not ok 9 - test_task_on_all_and_one_cpu
> > > > | # hw_breakpoint: pass:2 fail:7 skip:0 total:9
> > > > | # Totals: pass:2 fail:7 skip:0 total:9
> > > >
> > > > ... which seems to be becasue arm64 currently forbids per-task
> > > > breakpoints/watchpoints in hw_breakpoint_arch_parse(), where we have:
> > > >
> > > >         /*
> > > >          * Disallow per-task kernel breakpoints since these would
> > > >          * complicate the stepping code.
> > > >          */
> > > >         if (hw->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
> > > >                 return -EINVAL;
> > > >
> > > > ... which has been the case since day one in commit:
> > > >
> > > >   478fcb2cdb2351dc ("arm64: Debugging support")
> > > >
> > > > I'm not immediately sure what would be necessary to support per-task kernel
> > > > breakpoints, but given a lot of that state is currently per-cpu, I imagine it's
> > > > invasive.
> > >
> > > I would actually like to remove HW_BREAKPOINT completely for arm64 as it
> > > doesn't really work and causes problems for other interfaces such as ptrace
> > > and kgdb.
> >
> > Will it be a localized removal of code that will be easy to revert in
> > future? Or will it touch lots of code here and there?
> > Let's say we come up with a very important use case for HW_BREAKPOINT
> > and will need to make it work on arm64 as well in future.
>
> My (rough) plan is to implement a lower-level abstraction for handling the
> underlying hardware resources, so we can layer consumers on top of that
> instead of funneling through hw_breakpoint. So if we figure out how to make
> bits of hw_breakpoint work on arm64, then it should just go on top.
>
> The main pain point for hw_breakpoint is kernel-side {break,watch}points
> and I think there are open design questions about how they should work
> on arm64, particularly when considering the interaction with user
> watchpoints triggering on uaccess routines and the possibility of hitting
> a kernel watchpoint in irq context.

I see. Our main interest would be break/watchpoints on user addresses
firing from both user-space and kernel (uaccess), so at least on irqs.
Will Deacon July 22, 2022, 11:03 a.m. UTC | #8
On Fri, Jul 22, 2022 at 12:31:45PM +0200, Dmitry Vyukov wrote:
> On Fri, 22 Jul 2022 at 12:11, Will Deacon <will@kernel.org> wrote:
> > > > > On Mon, Jul 04, 2022 at 05:05:01PM +0200, Marco Elver wrote:
> > > > > I'm not immediately sure what would be necessary to support per-task kernel
> > > > > breakpoints, but given a lot of that state is currently per-cpu, I imagine it's
> > > > > invasive.
> > > >
> > > > I would actually like to remove HW_BREAKPOINT completely for arm64 as it
> > > > doesn't really work and causes problems for other interfaces such as ptrace
> > > > and kgdb.
> > >
> > > Will it be a localized removal of code that will be easy to revert in
> > > future? Or will it touch lots of code here and there?
> > > Let's say we come up with a very important use case for HW_BREAKPOINT
> > > and will need to make it work on arm64 as well in future.
> >
> > My (rough) plan is to implement a lower-level abstraction for handling the
> > underlying hardware resources, so we can layer consumers on top of that
> > instead of funneling through hw_breakpoint. So if we figure out how to make
> > bits of hw_breakpoint work on arm64, then it should just go on top.
> >
> > The main pain point for hw_breakpoint is kernel-side {break,watch}points
> > and I think there are open design questions about how they should work
> > on arm64, particularly when considering the interaction with user
> > watchpoints triggering on uaccess routines and the possibility of hitting
> > a kernel watchpoint in irq context.
> 
> I see. Our main interest would be break/watchpoints on user addresses
> firing from both user-space and kernel (uaccess), so at least on irqs.

Interesting. Do other architectures report watchpoint hits on user
addresses from kernel uaccess? It feels like this might be surprising to
some users, and it opens up questions about accesses using different virtual
aliases (e.g. via GUP) or from other entities as well (e.g. firmware,
KVM guests, DMA).

Will
Dmitry Vyukov July 22, 2022, 1:41 p.m. UTC | #9
On Fri, 22 Jul 2022 at 13:03, Will Deacon <will@kernel.org> wrote:
> > > > > > On Mon, Jul 04, 2022 at 05:05:01PM +0200, Marco Elver wrote:
> > > > > > I'm not immediately sure what would be necessary to support per-task kernel
> > > > > > breakpoints, but given a lot of that state is currently per-cpu, I imagine it's
> > > > > > invasive.
> > > > >
> > > > > I would actually like to remove HW_BREAKPOINT completely for arm64 as it
> > > > > doesn't really work and causes problems for other interfaces such as ptrace
> > > > > and kgdb.
> > > >
> > > > Will it be a localized removal of code that will be easy to revert in
> > > > future? Or will it touch lots of code here and there?
> > > > Let's say we come up with a very important use case for HW_BREAKPOINT
> > > > and will need to make it work on arm64 as well in future.
> > >
> > > My (rough) plan is to implement a lower-level abstraction for handling the
> > > underlying hardware resources, so we can layer consumers on top of that
> > > instead of funneling through hw_breakpoint. So if we figure out how to make
> > > bits of hw_breakpoint work on arm64, then it should just go on top.
> > >
> > > The main pain point for hw_breakpoint is kernel-side {break,watch}points
> > > and I think there are open design questions about how they should work
> > > on arm64, particularly when considering the interaction with user
> > > watchpoints triggering on uaccess routines and the possibility of hitting
> > > a kernel watchpoint in irq context.
> >
> > I see. Our main interest would be break/watchpoints on user addresses
> > firing from both user-space and kernel (uaccess), so at least on irqs.
>
> Interesting. Do other architectures report watchpoint hits on user
> addresses from kernel uaccess? It feels like this might be surprising to
> some users, and it opens up questions about accesses using different virtual
> aliases (e.g. via GUP) or from other entities as well (e.g. firmware,
> KVM guests, DMA).

x86 supports this.
There is that attr.exclude_kernel flag that requires special permissions:
https://elixir.bootlin.com/linux/v5.19-rc7/source/kernel/events/core.c#L12061
https://elixir.bootlin.com/linux/v5.19-rc7/source/kernel/events/core.c#L9323
But if I understand correctly, it only filters out delivery, the HW
breakpoint fires even if attr.exclude_kernel is set.

We also wanted to relax this permission check somewhat:
https://lore.kernel.org/all/20220601093502.364142-1-elver@google.com/

Yes, if the kernel maps the page at a different virtual address, then
the breakpoint won't fire I think.
Don't know what are the issues with firmware/KVM.
Marco Elver July 25, 2022, 11 a.m. UTC | #10
On Thu, 21 Jul 2022 at 18:22, Mark Rutland <mark.rutland@arm.com> wrote:
>
> Hi Marco,
>
> [adding Will]
>
> On Mon, Jul 04, 2022 at 05:05:01PM +0200, Marco Elver wrote:
> > Add KUnit test for hw_breakpoint constraints accounting, with various
> > interesting mixes of breakpoint targets (some care was taken to catch
> > interesting corner cases via bug-injection).
> >
> > The test cannot be built as a module because it requires access to
> > hw_breakpoint_slots(), which is not inlinable or exported on all
> > architectures.
> >
> > Signed-off-by: Marco Elver <elver@google.com>
>
> As mentioned on IRC, I'm seeing these tests fail on arm64 when applied atop
> v5.19-rc7:
>
> | TAP version 14
> | 1..1
> |     # Subtest: hw_breakpoint
> |     1..9
> |     ok 1 - test_one_cpu
> |     ok 2 - test_many_cpus
> |     # test_one_task_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 3 - test_one_task_on_all_cpus
> |     # test_two_tasks_on_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 4 - test_two_tasks_on_all_cpus
> |     # test_one_task_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 5 - test_one_task_on_one_cpu
> |     # test_one_task_mixed: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 6 - test_one_task_mixed
> |     # test_two_tasks_on_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 7 - test_two_tasks_on_one_cpu
> |     # test_two_tasks_on_one_all_cpus: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 8 - test_two_tasks_on_one_all_cpus
> |     # test_task_on_all_and_one_cpu: ASSERTION FAILED at kernel/events/hw_breakpoint_test.c:70
> |     Expected IS_ERR(bp) to be false, but is true
> |     not ok 9 - test_task_on_all_and_one_cpu
> | # hw_breakpoint: pass:2 fail:7 skip:0 total:9
> | # Totals: pass:2 fail:7 skip:0 total:9
>
> ... which seems to be becasue arm64 currently forbids per-task
> breakpoints/watchpoints in hw_breakpoint_arch_parse(), where we have:
>
>         /*
>          * Disallow per-task kernel breakpoints since these would
>          * complicate the stepping code.
>          */
>         if (hw->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
>                 return -EINVAL;
>
> ... which has been the case since day one in commit:
>
>   478fcb2cdb2351dc ("arm64: Debugging support")
>
> I'm not immediately sure what would be necessary to support per-task kernel
> breakpoints, but given a lot of that state is currently per-cpu, I imagine it's
> invasive.

Thanks for investigating - so the test is working as intended. ;-)

However it's a shame that arm64's support is limited. And what Will
said about possible removal/rework of arm64 hw_breakpoint support
doesn't sound too reassuring.

We will definitely want to revisit arm64's hw_breakpoint support in future.

Thanks,
-- Marco
diff mbox series

Patch

diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 8591c180b52b..91a62f566743 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -2,4 +2,5 @@ 
 obj-y := core.o ring_buffer.o callchain.o
 
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_HW_BREAKPOINT_KUNIT_TEST) += hw_breakpoint_test.o
 obj-$(CONFIG_UPROBES) += uprobes.o
diff --git a/kernel/events/hw_breakpoint_test.c b/kernel/events/hw_breakpoint_test.c
new file mode 100644
index 000000000000..433c5c45e2a5
--- /dev/null
+++ b/kernel/events/hw_breakpoint_test.c
@@ -0,0 +1,323 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test for hw_breakpoint constraints accounting logic.
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#include <kunit/test.h>
+#include <linux/cpumask.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/kthread.h>
+#include <linux/perf_event.h>
+#include <asm/hw_breakpoint.h>
+
+#define TEST_REQUIRES_BP_SLOTS(test, slots)						\
+	do {										\
+		if ((slots) > get_test_bp_slots()) {					\
+			kunit_skip((test), "Requires breakpoint slots: %d > %d", slots,	\
+				   get_test_bp_slots());				\
+		}									\
+	} while (0)
+
+#define TEST_EXPECT_NOSPC(expr) KUNIT_EXPECT_EQ(test, -ENOSPC, PTR_ERR(expr))
+
+#define MAX_TEST_BREAKPOINTS 512
+
+static char break_vars[MAX_TEST_BREAKPOINTS];
+static struct perf_event *test_bps[MAX_TEST_BREAKPOINTS];
+static struct task_struct *__other_task;
+
+static struct perf_event *register_test_bp(int cpu, struct task_struct *tsk, int idx)
+{
+	struct perf_event_attr attr = {};
+
+	if (WARN_ON(idx < 0 || idx >= MAX_TEST_BREAKPOINTS))
+		return NULL;
+
+	hw_breakpoint_init(&attr);
+	attr.bp_addr = (unsigned long)&break_vars[idx];
+	attr.bp_len = HW_BREAKPOINT_LEN_1;
+	attr.bp_type = HW_BREAKPOINT_RW;
+	return perf_event_create_kernel_counter(&attr, cpu, tsk, NULL, NULL);
+}
+
+static void unregister_test_bp(struct perf_event **bp)
+{
+	if (WARN_ON(IS_ERR(*bp)))
+		return;
+	if (WARN_ON(!*bp))
+		return;
+	unregister_hw_breakpoint(*bp);
+	*bp = NULL;
+}
+
+static int get_test_bp_slots(void)
+{
+	static int slots;
+
+	if (!slots)
+		slots = hw_breakpoint_slots(TYPE_DATA);
+
+	return slots;
+}
+
+static void fill_one_bp_slot(struct kunit *test, int *id, int cpu, struct task_struct *tsk)
+{
+	struct perf_event *bp = register_test_bp(cpu, tsk, *id);
+
+	KUNIT_ASSERT_NOT_NULL(test, bp);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(bp));
+	KUNIT_ASSERT_NULL(test, test_bps[*id]);
+	test_bps[(*id)++] = bp;
+}
+
+/*
+ * Fills up the given @cpu/@tsk with breakpoints, only leaving @skip slots free.
+ *
+ * Returns true if this can be called again, continuing at @id.
+ */
+static bool fill_bp_slots(struct kunit *test, int *id, int cpu, struct task_struct *tsk, int skip)
+{
+	for (int i = 0; i < get_test_bp_slots() - skip; ++i)
+		fill_one_bp_slot(test, id, cpu, tsk);
+
+	return *id + get_test_bp_slots() <= MAX_TEST_BREAKPOINTS;
+}
+
+static int dummy_kthread(void *arg)
+{
+	return 0;
+}
+
+static struct task_struct *get_other_task(struct kunit *test)
+{
+	struct task_struct *tsk;
+
+	if (__other_task)
+		return __other_task;
+
+	tsk = kthread_create(dummy_kthread, NULL, "hw_breakpoint_dummy_task");
+	KUNIT_ASSERT_FALSE(test, IS_ERR(tsk));
+	__other_task = tsk;
+	return __other_task;
+}
+
+static int get_test_cpu(int num)
+{
+	int cpu;
+
+	WARN_ON(num < 0);
+
+	for_each_online_cpu(cpu) {
+		if (num-- <= 0)
+			break;
+	}
+
+	return cpu;
+}
+
+/* ===== Test cases ===== */
+
+static void test_one_cpu(struct kunit *test)
+{
+	int idx = 0;
+
+	fill_bp_slots(test, &idx, get_test_cpu(0), NULL, 0);
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+}
+
+static void test_many_cpus(struct kunit *test)
+{
+	int idx = 0;
+	int cpu;
+
+	/* Test that CPUs are independent. */
+	for_each_online_cpu(cpu) {
+		bool do_continue = fill_bp_slots(test, &idx, cpu, NULL, 0);
+
+		TEST_EXPECT_NOSPC(register_test_bp(cpu, NULL, idx));
+		if (!do_continue)
+			break;
+	}
+}
+
+static void test_one_task_on_all_cpus(struct kunit *test)
+{
+	int idx = 0;
+
+	fill_bp_slots(test, &idx, -1, current, 0);
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+	/* Remove one and adding back CPU-target should work. */
+	unregister_test_bp(&test_bps[0]);
+	fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
+}
+
+static void test_two_tasks_on_all_cpus(struct kunit *test)
+{
+	int idx = 0;
+
+	/* Test that tasks are independent. */
+	fill_bp_slots(test, &idx, -1, current, 0);
+	fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
+
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+	/* Remove one from first task and adding back CPU-target should not work. */
+	unregister_test_bp(&test_bps[0]);
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+}
+
+static void test_one_task_on_one_cpu(struct kunit *test)
+{
+	int idx = 0;
+
+	fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+	/*
+	 * Remove one and adding back CPU-target should work; this case is
+	 * special vs. above because the task's constraints are CPU-dependent.
+	 */
+	unregister_test_bp(&test_bps[0]);
+	fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
+}
+
+static void test_one_task_mixed(struct kunit *test)
+{
+	int idx = 0;
+
+	TEST_REQUIRES_BP_SLOTS(test, 3);
+
+	fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
+	fill_bp_slots(test, &idx, -1, current, 1);
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+
+	/* Transition from CPU-dependent pinned count to CPU-independent. */
+	unregister_test_bp(&test_bps[0]);
+	unregister_test_bp(&test_bps[1]);
+	fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
+	fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+}
+
+static void test_two_tasks_on_one_cpu(struct kunit *test)
+{
+	int idx = 0;
+
+	fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
+	fill_bp_slots(test, &idx, get_test_cpu(0), get_other_task(test), 0);
+
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+	/* Can still create breakpoints on some other CPU. */
+	fill_bp_slots(test, &idx, get_test_cpu(1), NULL, 0);
+}
+
+static void test_two_tasks_on_one_all_cpus(struct kunit *test)
+{
+	int idx = 0;
+
+	fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
+	fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
+
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+	/* Cannot create breakpoints on some other CPU either. */
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
+}
+
+static void test_task_on_all_and_one_cpu(struct kunit *test)
+{
+	int tsk_on_cpu_idx, cpu_idx;
+	int idx = 0;
+
+	TEST_REQUIRES_BP_SLOTS(test, 3);
+
+	fill_bp_slots(test, &idx, -1, current, 2);
+	/* Transitioning from only all CPU breakpoints to mixed. */
+	tsk_on_cpu_idx = idx;
+	fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
+	fill_one_bp_slot(test, &idx, -1, current);
+
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+
+	/* We should still be able to use up another CPU's slots. */
+	cpu_idx = idx;
+	fill_one_bp_slot(test, &idx, get_test_cpu(1), NULL);
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
+
+	/* Transitioning back to task target on all CPUs. */
+	unregister_test_bp(&test_bps[tsk_on_cpu_idx]);
+	/* Still have a CPU target breakpoint in get_test_cpu(1). */
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	/* Remove it and try again. */
+	unregister_test_bp(&test_bps[cpu_idx]);
+	fill_one_bp_slot(test, &idx, -1, current);
+
+	TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
+	TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
+}
+
+static struct kunit_case hw_breakpoint_test_cases[] = {
+	KUNIT_CASE(test_one_cpu),
+	KUNIT_CASE(test_many_cpus),
+	KUNIT_CASE(test_one_task_on_all_cpus),
+	KUNIT_CASE(test_two_tasks_on_all_cpus),
+	KUNIT_CASE(test_one_task_on_one_cpu),
+	KUNIT_CASE(test_one_task_mixed),
+	KUNIT_CASE(test_two_tasks_on_one_cpu),
+	KUNIT_CASE(test_two_tasks_on_one_all_cpus),
+	KUNIT_CASE(test_task_on_all_and_one_cpu),
+	{},
+};
+
+static int test_init(struct kunit *test)
+{
+	/* Most test cases want 2 distinct CPUs. */
+	return num_online_cpus() < 2 ? -EINVAL : 0;
+}
+
+static void test_exit(struct kunit *test)
+{
+	for (int i = 0; i < MAX_TEST_BREAKPOINTS; ++i) {
+		if (test_bps[i])
+			unregister_test_bp(&test_bps[i]);
+	}
+
+	if (__other_task) {
+		kthread_stop(__other_task);
+		__other_task = NULL;
+	}
+}
+
+static struct kunit_suite hw_breakpoint_test_suite = {
+	.name = "hw_breakpoint",
+	.test_cases = hw_breakpoint_test_cases,
+	.init = test_init,
+	.exit = test_exit,
+};
+
+kunit_test_suites(&hw_breakpoint_test_suite);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marco Elver <elver@google.com>");
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e24db4bff19..4c87a6edf046 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2513,6 +2513,16 @@  config STACKINIT_KUNIT_TEST
 	  CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF,
 	  or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL.
 
+config HW_BREAKPOINT_KUNIT_TEST
+	bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS
+	depends on HAVE_HW_BREAKPOINT
+	depends on KUNIT=y
+	default KUNIT_ALL_TESTS
+	help
+	  Tests for hw_breakpoint constraints accounting.
+
+	  If unsure, say N.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help