[PATCH v1 5/6] perf mutex: Fix thread safety analysis

Ian Rogers posted 6 patches 3 years, 7 months ago
There is a newer version of this series
[PATCH v1 5/6] perf mutex: Fix thread safety analysis
Posted by Ian Rogers 3 years, 7 months ago
Add annotations to describe lock behavior. Add missing unlocks to
perf_sched__replay. Alter hist_iter__top_callback as the thread-safety
analysis cannot follow pointers through local variables.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-sched.c | 8 ++++++++
 tools/perf/builtin-top.c   | 5 +++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 0f52f73be896..a8a765ed28ce 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -658,6 +658,8 @@ static void *thread_func(void *ctx)
 }
 
 static void create_tasks(struct perf_sched *sched)
+	EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex)
+	EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex)
 {
 	struct task_desc *task;
 	pthread_attr_t attr;
@@ -687,6 +689,8 @@ static void create_tasks(struct perf_sched *sched)
 }
 
 static void wait_for_tasks(struct perf_sched *sched)
+	EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
+	EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
 {
 	u64 cpu_usage_0, cpu_usage_1;
 	struct task_desc *task;
@@ -738,6 +742,8 @@ static void wait_for_tasks(struct perf_sched *sched)
 }
 
 static void run_one_test(struct perf_sched *sched)
+	EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
+	EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
 {
 	u64 T0, T1, delta, avg_delta, fluct;
 
@@ -3314,6 +3320,8 @@ static int perf_sched__replay(struct perf_sched *sched)
 	for (i = 0; i < sched->replay_repeat; i++)
 		run_one_test(sched);
 
+	mutex_unlock(&sched->start_work_mutex);
+	mutex_unlock(&sched->work_done_wait_mutex);
 	return 0;
 }
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 3757292bfe86..e832f04e3076 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -196,6 +196,7 @@ static void perf_top__record_precise_ip(struct perf_top *top,
 					struct hist_entry *he,
 					struct perf_sample *sample,
 					struct evsel *evsel, u64 ip)
+	EXCLUSIVE_LOCKS_REQUIRED(he->hists->lock)
 {
 	struct annotation *notes;
 	struct symbol *sym = he->ms.sym;
@@ -724,13 +725,13 @@ static void *display_thread(void *arg)
 static int hist_iter__top_callback(struct hist_entry_iter *iter,
 				   struct addr_location *al, bool single,
 				   void *arg)
+	EXCLUSIVE_LOCKS_REQUIRED(iter->he->hists->lock)
 {
 	struct perf_top *top = arg;
-	struct hist_entry *he = iter->he;
 	struct evsel *evsel = iter->evsel;
 
 	if (perf_hpp_list.sym && single)
-		perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
+		perf_top__record_precise_ip(top, iter->he, iter->sample, evsel, al->addr);
 
 	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
 		     !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY),
-- 
2.37.1.595.g718a3a8f04-goog
Re: [PATCH v1 5/6] perf mutex: Fix thread safety analysis
Posted by Namhyung Kim 3 years, 7 months ago
On Tue, Aug 16, 2022 at 10:39 PM Ian Rogers <irogers@google.com> wrote:
>
> Add annotations to describe lock behavior. Add missing unlocks to
> perf_sched__replay. Alter hist_iter__top_callback as the thread-safety
> analysis cannot follow pointers through local variables.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-sched.c | 8 ++++++++
>  tools/perf/builtin-top.c   | 5 +++--
>  2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 0f52f73be896..a8a765ed28ce 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -658,6 +658,8 @@ static void *thread_func(void *ctx)
>  }
>
>  static void create_tasks(struct perf_sched *sched)
> +       EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex)
> +       EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex)
>  {
>         struct task_desc *task;
>         pthread_attr_t attr;
> @@ -687,6 +689,8 @@ static void create_tasks(struct perf_sched *sched)
>  }
>
>  static void wait_for_tasks(struct perf_sched *sched)
> +       EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
> +       EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
>  {
>         u64 cpu_usage_0, cpu_usage_1;
>         struct task_desc *task;
> @@ -738,6 +742,8 @@ static void wait_for_tasks(struct perf_sched *sched)
>  }
>
>  static void run_one_test(struct perf_sched *sched)
> +       EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
> +       EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
>  {
>         u64 T0, T1, delta, avg_delta, fluct;
>
> @@ -3314,6 +3320,8 @@ static int perf_sched__replay(struct perf_sched *sched)
>         for (i = 0; i < sched->replay_repeat; i++)
>                 run_one_test(sched);
>
> +       mutex_unlock(&sched->start_work_mutex);
> +       mutex_unlock(&sched->work_done_wait_mutex);

But this would wake up the replay tasks and let them burn cpus unnecessarily.
Maybe we can make them exit at the moment.


>         return 0;
>  }
>
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 3757292bfe86..e832f04e3076 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -196,6 +196,7 @@ static void perf_top__record_precise_ip(struct perf_top *top,
>                                         struct hist_entry *he,
>                                         struct perf_sample *sample,
>                                         struct evsel *evsel, u64 ip)
> +       EXCLUSIVE_LOCKS_REQUIRED(he->hists->lock)
>  {
>         struct annotation *notes;
>         struct symbol *sym = he->ms.sym;
> @@ -724,13 +725,13 @@ static void *display_thread(void *arg)
>  static int hist_iter__top_callback(struct hist_entry_iter *iter,
>                                    struct addr_location *al, bool single,
>                                    void *arg)
> +       EXCLUSIVE_LOCKS_REQUIRED(iter->he->hists->lock)
>  {
>         struct perf_top *top = arg;
> -       struct hist_entry *he = iter->he;
>         struct evsel *evsel = iter->evsel;
>
>         if (perf_hpp_list.sym && single)
> -               perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
> +               perf_top__record_precise_ip(top, iter->he, iter->sample, evsel, al->addr);
>
>         hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
>                      !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY),

Looks like a separate change.

Thanks,
Namhyung


> --
> 2.37.1.595.g718a3a8f04-goog
>
Re: [PATCH v1 5/6] perf mutex: Fix thread safety analysis
Posted by Ian Rogers 3 years, 7 months ago
On Thu, Aug 18, 2022 at 9:41 AM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Tue, Aug 16, 2022 at 10:39 PM Ian Rogers <irogers@google.com> wrote:
> >
> > Add annotations to describe lock behavior. Add missing unlocks to
> > perf_sched__replay. Alter hist_iter__top_callback as the thread-safety
> > analysis cannot follow pointers through local variables.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/builtin-sched.c | 8 ++++++++
> >  tools/perf/builtin-top.c   | 5 +++--
> >  2 files changed, 11 insertions(+), 2 deletions(-)
> >
> > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> > index 0f52f73be896..a8a765ed28ce 100644
> > --- a/tools/perf/builtin-sched.c
> > +++ b/tools/perf/builtin-sched.c
> > @@ -658,6 +658,8 @@ static void *thread_func(void *ctx)
> >  }
> >
> >  static void create_tasks(struct perf_sched *sched)
> > +       EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex)
> > +       EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex)
> >  {
> >         struct task_desc *task;
> >         pthread_attr_t attr;
> > @@ -687,6 +689,8 @@ static void create_tasks(struct perf_sched *sched)
> >  }
> >
> >  static void wait_for_tasks(struct perf_sched *sched)
> > +       EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
> > +       EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
> >  {
> >         u64 cpu_usage_0, cpu_usage_1;
> >         struct task_desc *task;
> > @@ -738,6 +742,8 @@ static void wait_for_tasks(struct perf_sched *sched)
> >  }
> >
> >  static void run_one_test(struct perf_sched *sched)
> > +       EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
> > +       EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
> >  {
> >         u64 T0, T1, delta, avg_delta, fluct;
> >
> > @@ -3314,6 +3320,8 @@ static int perf_sched__replay(struct perf_sched *sched)
> >         for (i = 0; i < sched->replay_repeat; i++)
> >                 run_one_test(sched);
> >
> > +       mutex_unlock(&sched->start_work_mutex);
> > +       mutex_unlock(&sched->work_done_wait_mutex);
>
> But this would wake up the replay tasks and let them burn cpus unnecessarily.
> Maybe we can make them exit at the moment.

I think I've stumbled on a can of worms. Why would you spin and not
use a condition variable? Anyway, I can remove this by just saying
this function leaves these locked.

>
> >         return 0;
> >  }
> >
> > diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> > index 3757292bfe86..e832f04e3076 100644
> > --- a/tools/perf/builtin-top.c
> > +++ b/tools/perf/builtin-top.c
> > @@ -196,6 +196,7 @@ static void perf_top__record_precise_ip(struct perf_top *top,
> >                                         struct hist_entry *he,
> >                                         struct perf_sample *sample,
> >                                         struct evsel *evsel, u64 ip)
> > +       EXCLUSIVE_LOCKS_REQUIRED(he->hists->lock)
> >  {
> >         struct annotation *notes;
> >         struct symbol *sym = he->ms.sym;
> > @@ -724,13 +725,13 @@ static void *display_thread(void *arg)
> >  static int hist_iter__top_callback(struct hist_entry_iter *iter,
> >                                    struct addr_location *al, bool single,
> >                                    void *arg)
> > +       EXCLUSIVE_LOCKS_REQUIRED(iter->he->hists->lock)
> >  {
> >         struct perf_top *top = arg;
> > -       struct hist_entry *he = iter->he;
> >         struct evsel *evsel = iter->evsel;
> >
> >         if (perf_hpp_list.sym && single)
> > -               perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
> > +               perf_top__record_precise_ip(top, iter->he, iter->sample, evsel, al->addr);
> >
> >         hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
> >                      !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY),
>
> Looks like a separate change.

This is subtle and relates to how the thread safety pass in clang is
implemented. I'll waffle but the TL;DR is that without this change we
can't enable Wthread-safety so I'd say it is part of the same change.
The waffley bit:

Thread safety checking puts the annotation on to the variable and not
the type. We know that:
const char *x = "hi";
char *y = x;
will give a compile time error on the assignment to y as const-ness
was lost. With the thread safety checks you could have:
char *x PT_GUARDED_BY(lock) = ...;
char *y = x;
And if you used x without holding "lock" you'd get an error but you
wouldn't get the same error from y, even though behind the scenes it
is the same memory. It is the same case here, on entry we know that
"iter->he->hists->lock" is held but the assignment to "he" means clang
doesn't know that "he->hists->lock" is held. This then fails the check
on perf_top__record_precise_ip that the lock be held as we pass "he"
rather than "iter->he".

Thanks,
Ian

> Thanks,
> Namhyung
>
>
> > --
> > 2.37.1.595.g718a3a8f04-goog
> >
Re: [PATCH v1 5/6] perf mutex: Fix thread safety analysis
Posted by Namhyung Kim 3 years, 7 months ago
On Thu, Aug 18, 2022 at 11:17 AM Ian Rogers <irogers@google.com> wrote:
>
> On Thu, Aug 18, 2022 at 9:41 AM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > On Tue, Aug 16, 2022 at 10:39 PM Ian Rogers <irogers@google.com> wrote:
> > >
> > > Add annotations to describe lock behavior. Add missing unlocks to
> > > perf_sched__replay. Alter hist_iter__top_callback as the thread-safety
> > > analysis cannot follow pointers through local variables.
> > >
> > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > ---
> > >  tools/perf/builtin-sched.c | 8 ++++++++
> > >  tools/perf/builtin-top.c   | 5 +++--
> > >  2 files changed, 11 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> > > index 0f52f73be896..a8a765ed28ce 100644
> > > --- a/tools/perf/builtin-sched.c
> > > +++ b/tools/perf/builtin-sched.c
> > > @@ -658,6 +658,8 @@ static void *thread_func(void *ctx)
> > >  }
> > >
> > >  static void create_tasks(struct perf_sched *sched)
> > > +       EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex)
> > > +       EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex)
> > >  {
> > >         struct task_desc *task;
> > >         pthread_attr_t attr;
> > > @@ -687,6 +689,8 @@ static void create_tasks(struct perf_sched *sched)
> > >  }
> > >
> > >  static void wait_for_tasks(struct perf_sched *sched)
> > > +       EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
> > > +       EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
> > >  {
> > >         u64 cpu_usage_0, cpu_usage_1;
> > >         struct task_desc *task;
> > > @@ -738,6 +742,8 @@ static void wait_for_tasks(struct perf_sched *sched)
> > >  }
> > >
> > >  static void run_one_test(struct perf_sched *sched)
> > > +       EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
> > > +       EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
> > >  {
> > >         u64 T0, T1, delta, avg_delta, fluct;
> > >
> > > @@ -3314,6 +3320,8 @@ static int perf_sched__replay(struct perf_sched *sched)
> > >         for (i = 0; i < sched->replay_repeat; i++)
> > >                 run_one_test(sched);
> > >
> > > +       mutex_unlock(&sched->start_work_mutex);
> > > +       mutex_unlock(&sched->work_done_wait_mutex);
> >
> > But this would wake up the replay tasks and let them burn cpus unnecessarily.
> > Maybe we can make them exit at the moment.
>
> I think I've stumbled on a can of worms. Why would you spin and not
> use a condition variable? Anyway, I can remove this by just saying
> this function leaves these locked.

I think you can add a boolean variable and set it before unlocking the
mutexes.  In the thread body, it can check the variable and exit.


>
> >
> > >         return 0;
> > >  }
> > >
> > > diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> > > index 3757292bfe86..e832f04e3076 100644
> > > --- a/tools/perf/builtin-top.c
> > > +++ b/tools/perf/builtin-top.c
> > > @@ -196,6 +196,7 @@ static void perf_top__record_precise_ip(struct perf_top *top,
> > >                                         struct hist_entry *he,
> > >                                         struct perf_sample *sample,
> > >                                         struct evsel *evsel, u64 ip)
> > > +       EXCLUSIVE_LOCKS_REQUIRED(he->hists->lock)
> > >  {
> > >         struct annotation *notes;
> > >         struct symbol *sym = he->ms.sym;
> > > @@ -724,13 +725,13 @@ static void *display_thread(void *arg)
> > >  static int hist_iter__top_callback(struct hist_entry_iter *iter,
> > >                                    struct addr_location *al, bool single,
> > >                                    void *arg)
> > > +       EXCLUSIVE_LOCKS_REQUIRED(iter->he->hists->lock)
> > >  {
> > >         struct perf_top *top = arg;
> > > -       struct hist_entry *he = iter->he;
> > >         struct evsel *evsel = iter->evsel;
> > >
> > >         if (perf_hpp_list.sym && single)
> > > -               perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
> > > +               perf_top__record_precise_ip(top, iter->he, iter->sample, evsel, al->addr);
> > >
> > >         hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
> > >                      !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY),
> >
> > Looks like a separate change.
>
> This is subtle and relates to how the thread safety pass in clang is
> implemented. I'll waffle but the TL;DR is that without this change we
> can't enable Wthread-safety so I'd say it is part of the same change.
> The waffley bit:
>
> Thread safety checking puts the annotation on to the variable and not
> the type. We know that:
> const char *x = "hi";
> char *y = x;
> will give a compile time error on the assignment to y as const-ness
> was lost. With the thread safety checks you could have:
> char *x PT_GUARDED_BY(lock) = ...;
> char *y = x;
> And if you used x without holding "lock" you'd get an error but you
> wouldn't get the same error from y, even though behind the scenes it
> is the same memory. It is the same case here, on entry we know that
> "iter->he->hists->lock" is held but the assignment to "he" means clang
> doesn't know that "he->hists->lock" is held. This then fails the check
> on perf_top__record_precise_ip that the lock be held as we pass "he"
> rather than "iter->he".

Oh, I mean this perf top change can be separated from perf sched.

Thanks,
Namhyung