Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Linux: Kernel

[PATCH 2/2] trace_workqueue: add refcnt to struct cpu_workqueue_stats

 

 

Linux kernel RSS feed   Index | Next | Previous | View Threaded


lizf at cn

Jul 6, 2009, 1:10 AM

Post #1 of 4 (242 views)
Permalink
[PATCH 2/2] trace_workqueue: add refcnt to struct cpu_workqueue_stats

From: Lai Jiangshan <laijs [at] cn>

The stat entries can be freed when the stat file is being read.
The worse is, the ptr can be freed immediately after it's returned
from workqueue_stat_start/next().

Add a refcnt to struct cpu_workqueue_stats to avoid use-after-free.

Signed-off-by: Lai Jiangshan <laijs [at] cn>
Signed-off-by: Li Zefan <lizf [at] cn>
---
kernel/trace/trace_workqueue.c | 32 ++++++++++++++++++++++++++------
1 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 97fcea4..40cafb0 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
#include <trace/events/workqueue.h>
#include <linux/list.h>
#include <linux/percpu.h>
+#include <linux/kref.h>
#include "trace_stat.h"
#include "trace.h"

@@ -16,6 +17,7 @@
/* A cpu workqueue thread */
struct cpu_workqueue_stats {
struct list_head list;
+ struct kref kref;
int cpu;
pid_t pid;
/* Can be inserted from interrupt or user context, need to be atomic */
@@ -39,6 +41,11 @@ struct workqueue_global_stats {
static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))

+static void cpu_workqueue_stat_free(struct kref *kref)
+{
+ kfree(container_of(kref, struct cpu_workqueue_stats, kref));
+}
+
/* Insertion of a work */
static void
probe_workqueue_insertion(struct task_struct *wq_thread,
@@ -96,8 +103,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
return;
}
INIT_LIST_HEAD(&cws->list);
+ kref_init(&cws->kref);
cws->cpu = cpu;
-
cws->pid = wq_thread->pid;

spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
@@ -118,7 +125,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
list) {
if (node->pid == wq_thread->pid) {
list_del(&node->list);
- kfree(node);
+ kref_put(&node->kref, cpu_workqueue_stat_free);
goto found;
}
}
@@ -137,9 +144,11 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)

spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);

- if (!list_empty(&workqueue_cpu_stat(cpu)->list))
+ if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
struct cpu_workqueue_stats, list);
+ kref_get(&ret->kref);
+ }

spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);

@@ -162,9 +171,9 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
static void *workqueue_stat_next(void *prev, int idx)
{
struct cpu_workqueue_stats *prev_cws = prev;
+ struct cpu_workqueue_stats *ret;
int cpu = prev_cws->cpu;
unsigned long flags;
- void *ret = NULL;

spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
@@ -175,11 +184,14 @@ static void *workqueue_stat_next(void *prev, int idx)
return NULL;
} while (!(ret = workqueue_stat_start_cpu(cpu)));
return ret;
+ } else {
+ ret = list_entry(prev_cws->list.next,
+ struct cpu_workqueue_stats, list);
+ kref_get(&ret->kref);
}
spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);

- return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
- list);
+ return ret;
}

static int workqueue_stat_show(struct seq_file *s, void *p)
@@ -203,6 +215,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
return 0;
}

+static void workqueue_stat_release(void *stat)
+{
+ struct cpu_workqueue_stats *node = stat;
+
+ kref_put(&node->kref, cpu_workqueue_stat_free);
+}
+
static int workqueue_stat_headers(struct seq_file *s)
{
seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
@@ -215,6 +234,7 @@ struct tracer_stat workqueue_stats __read_mostly = {
.stat_start = workqueue_stat_start,
.stat_next = workqueue_stat_next,
.stat_show = workqueue_stat_show,
+ .stat_release = workqueue_stat_release,
.stat_headers = workqueue_stat_headers
};

--
1.5.4.rc3





--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


lizf at cn

Jul 6, 2009, 11:07 PM

Post #2 of 4 (210 views)
Permalink
Re: [PATCH 2/2] trace_workqueue: add refcnt to struct cpu_workqueue_stats [In reply to]

> The stat entries can be freed when the stat file is being read.
> The worse is, the ptr can be freed immediately after it's returned
> from workqueue_stat_start/next().
>
> Add a refcnt to struct cpu_workqueue_stats to avoid use-after-free.
>
> Signed-off-by: Lai Jiangshan <laijs [at] cn>
> Signed-off-by: Li Zefan <lizf [at] cn>
> ---
...
> @@ -175,11 +184,14 @@ static void *workqueue_stat_next(void *prev, int idx)
> return NULL;
> } while (!(ret = workqueue_stat_start_cpu(cpu)));
> return ret;
> + } else {
> + ret = list_entry(prev_cws->list.next,
> + struct cpu_workqueue_stats, list);

I just realized accessing prev_cws->list.next can be invalid!

We can fix it by using list_del_init() to delete cws->list in
probe_workqueue_destruction(), but then if the race happened,
the next time stat_next() is called, NULL will be returned.
I guess this is Ok, since the race is rare.

(I never like the design of trace_stat..Fortunately we'll
probably switch to perfcounter for this kind of statistics
reporting)

> + kref_get(&ret->kref);
> }
> spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>
> - return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
> - list);
> + return ret;
> }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


fweisbec at gmail

Jul 7, 2009, 1:07 AM

Post #3 of 4 (217 views)
Permalink
Re: [PATCH 2/2] trace_workqueue: add refcnt to struct cpu_workqueue_stats [In reply to]

On Tue, Jul 07, 2009 at 02:07:35PM +0800, Li Zefan wrote:
> > The stat entries can be freed when the stat file is being read.
> > The worse is, the ptr can be freed immediately after it's returned
> > from workqueue_stat_start/next().
> >
> > Add a refcnt to struct cpu_workqueue_stats to avoid use-after-free.
> >
> > Signed-off-by: Lai Jiangshan <laijs [at] cn>
> > Signed-off-by: Li Zefan <lizf [at] cn>
> > ---
> ...
> > @@ -175,11 +184,14 @@ static void *workqueue_stat_next(void *prev, int idx)
> > return NULL;
> > } while (!(ret = workqueue_stat_start_cpu(cpu)));
> > return ret;
> > + } else {
> > + ret = list_entry(prev_cws->list.next,
> > + struct cpu_workqueue_stats, list);
>
> I just realized accessing prev_cws->list.next can be invalid!
>
> We can fix it by using list_del_init() to delete cws->list in
> probe_workqueue_destruction(), but then if the race happened,
> the next time stat_next() is called, NULL will be returned.
> I guess this is Ok, since the race is rare.


If you ensure the kref_get/put are under the
workqueue_cpu_stat(cpu)->lock, it should be fine, right?


> (I never like the design of trace_stat..Fortunately we'll
> probably switch to perfcounter for this kind of statistics
> reporting)


I don't like its design either. I wrote it specifically for
the branch tracer and didn't think about free-able events :-/


>
> > + kref_get(&ret->kref);
> > }
> > spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> >
> > - return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
> > - list);
> > + return ret;
> > }
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


lizf at cn

Jul 7, 2009, 1:23 AM

Post #4 of 4 (220 views)
Permalink
Re: [PATCH 2/2] trace_workqueue: add refcnt to struct cpu_workqueue_stats [In reply to]

Frederic Weisbecker wrote:
> On Tue, Jul 07, 2009 at 02:07:35PM +0800, Li Zefan wrote:
>>> The stat entries can be freed when the stat file is being read.
>>> The worse is, the ptr can be freed immediately after it's returned
>>> from workqueue_stat_start/next().
>>>
>>> Add a refcnt to struct cpu_workqueue_stats to avoid use-after-free.
>>>
>>> Signed-off-by: Lai Jiangshan <laijs [at] cn>
>>> Signed-off-by: Li Zefan <lizf [at] cn>
>>> ---
>> ...
>>> @@ -175,11 +184,14 @@ static void *workqueue_stat_next(void *prev, int idx)
>>> return NULL;
>>> } while (!(ret = workqueue_stat_start_cpu(cpu)));
>>> return ret;
>>> + } else {
>>> + ret = list_entry(prev_cws->list.next,
>>> + struct cpu_workqueue_stats, list);
>> I just realized accessing prev_cws->list.next can be invalid!
>>
>> We can fix it by using list_del_init() to delete cws->list in
>> probe_workqueue_destruction(), but then if the race happened,
>> the next time stat_next() is called, NULL will be returned.
>> I guess this is Ok, since the race is rare.
>
>
> If you ensure the kref_get/put are under the
> workqueue_cpu_stat(cpu)->lock, it should be fine, right?
>

Unfortunately no.

It's safe to dereference prev_cws, but not safe to retreive
prevw_cws->list.next.

Suppose: head->n1->n2

T1 T2
--------------- -------------------
stat_start()
-> return n1
list_del(n1)
-> n1->list->next = LIST_POISON1;
stat_next()
-> prev = n1
-> list_entry(prev->list.next) !!!

You see why it's not safe..

>
>> (I never like the design of trace_stat..Fortunately we'll
>> probably switch to perfcounter for this kind of statistics
>> reporting)
>
>
> I don't like its design either. I wrote it specifically for
> the branch tracer and didn't think about free-able events :-/
>

Yeah, for free-able events it's buggy to use trace_stat.
Similar bug exists in ksym_tracer.

Another way to fix it is not use trace_stat but use seq_file
directly. They don't need to be sorted anyway.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Linux kernel RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.