Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Linux: Kernel

[PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization

 

 

Linux kernel RSS feed   Index | Next | Previous | View Threaded


mhiramat at redhat

Nov 23, 2009, 3:21 PM

Post #1 of 12 (175 views)
Permalink
[PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization

Introduce kprobes jump optimization arch-independent parts.
Kprobes uses breakpoint instruction for interrupting execution flow, on
some architectures, it can be replaced by a jump instruction and
interruption emulation code. This gains kprobs' performance drastically.

To enable this feature, set CONFIG_OPTPROBES=y (default y if the arch
supports OPTPROBE).

Changes in v5:
- Use get_online_cpus()/put_online_cpus() for avoiding text_mutex
deadlock.

Signed-off-by: Masami Hiramatsu <mhiramat [at] redhat>
Cc: Ananth N Mavinakayanahalli <ananth [at] in>
Cc: Ingo Molnar <mingo [at] elte>
Cc: Jim Keniston <jkenisto [at] us>
Cc: Srikar Dronamraju <srikar [at] linux>
Cc: Christoph Hellwig <hch [at] infradead>
Cc: Steven Rostedt <rostedt [at] goodmis>
Cc: Frederic Weisbecker <fweisbec [at] gmail>
Cc: H. Peter Anvin <hpa [at] zytor>
Cc: Anders Kaseorg <andersk [at] ksplice>
Cc: Tim Abbott <tabbott [at] ksplice>
Cc: Andi Kleen <andi [at] firstfloor>
Cc: Jason Baron <jbaron [at] redhat>
Cc: Mathieu Desnoyers <mathieu.desnoyers [at] polymtl>
---

arch/Kconfig | 13 ++
include/linux/kprobes.h | 36 ++++
kernel/kprobes.c | 401 ++++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 404 insertions(+), 46 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 28146cd..86a294a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -57,6 +57,17 @@ config KPROBES
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".

+config OPTPROBES
+ bool "Kprobes jump optimization support (EXPERIMENTAL)"
+ default y
+ depends on KPROBES
+ depends on !PREEMPT
+ depends on HAVE_OPTPROBES
+ select KALLSYMS_ALL
+ help
+ This option will allow kprobes to optimize breakpoint to
+ a jump for reducing its overhead.
+
config HAVE_EFFICIENT_UNALIGNED_ACCESS
bool
help
@@ -99,6 +110,8 @@ config HAVE_KPROBES
config HAVE_KRETPROBES
bool

+config HAVE_OPTPROBES
+ bool
#
# An arch should select this if it provides all these things:
#
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1b672f7..aed1f95 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -122,6 +122,11 @@ struct kprobe {
/* Kprobe status flags */
#define KPROBE_FLAG_GONE 1 /* breakpoint has already gone */
#define KPROBE_FLAG_DISABLED 2 /* probe is temporarily disabled */
+#define KPROBE_FLAG_OPTIMIZED 4 /*
+ * probe is really optimized.
+ * NOTE:
+ * this flag is only for optimized_kprobe.
+ */

/* Has this kprobe gone ? */
static inline int kprobe_gone(struct kprobe *p)
@@ -134,6 +139,12 @@ static inline int kprobe_disabled(struct kprobe *p)
{
return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE);
}
+
+/* Is this kprobe really running optimized path ? */
+static inline int kprobe_optimized(struct kprobe *p)
+{
+ return p->flags & KPROBE_FLAG_OPTIMIZED;
+}
/*
* Special probe type that uses setjmp-longjmp type tricks to resume
* execution at a specified entry with a matching prototype corresponding
@@ -249,6 +260,31 @@ extern kprobe_opcode_t *get_insn_slot(void);
extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
extern void kprobes_inc_nmissed_count(struct kprobe *p);

+#ifdef CONFIG_OPTPROBES
+/*
+ * Internal structure for direct jump optimized probe
+ */
+struct optimized_kprobe {
+ struct kprobe kp;
+ struct list_head list; /* list for optimizing queue */
+ struct arch_optimized_insn optinsn;
+};
+
+/* Architecture dependent functions for direct jump optimization */
+extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
+extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
+extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
+extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
+extern int arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
+extern kprobe_opcode_t *get_optinsn_slot(void);
+extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
+extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+ unsigned long addr);
+
+extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
+#endif /* CONFIG_OPTPROBES */
+
/* Get the kprobe at this addr (if any) - called with preemption disabled */
struct kprobe *get_kprobe(void *addr);
void kretprobe_hash_lock(struct task_struct *tsk,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 10d2ed5..15aa797 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -44,6 +44,7 @@
#include <linux/debugfs.h>
#include <linux/kdebug.h>
#include <linux/memory.h>
+#include <linux/cpu.h>

#include <asm-generic/sections.h>
#include <asm/cacheflush.h>
@@ -301,6 +302,31 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
__free_insn_slot(&kprobe_insn_slots, slot, dirty);
mutex_unlock(&kprobe_insn_mutex);
}
+#ifdef CONFIG_OPTPROBES
+/* For optimized_kprobe buffer */
+static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
+static struct kprobe_insn_cache kprobe_optinsn_slots = {
+ .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
+ /* .insn_size is initialized later */
+ .nr_garbage = 0,
+};
+/* Get a slot for optimized_kprobe buffer */
+kprobe_opcode_t __kprobes *get_optinsn_slot(void)
+{
+ kprobe_opcode_t *ret = NULL;
+ mutex_lock(&kprobe_optinsn_mutex);
+ ret = __get_insn_slot(&kprobe_optinsn_slots);
+ mutex_unlock(&kprobe_optinsn_mutex);
+ return ret;
+}
+
+void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
+{
+ mutex_lock(&kprobe_optinsn_mutex);
+ __free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
+ mutex_unlock(&kprobe_optinsn_mutex);
+}
+#endif
#endif

/* We have preemption disabled.. so it is safe to use __ versions */
@@ -334,20 +360,270 @@ struct kprobe __kprobes *get_kprobe(void *addr)
return NULL;
}

+static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
+
+/* Return true if the kprobe is an aggregator */
+static inline int kprobe_aggrprobe(struct kprobe *p)
+{
+ return p->pre_handler == aggr_pre_handler;
+}
+
+/*
+ * Keep all fields in the kprobe consistent
+ */
+static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+{
+ memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
+ memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+}
+
+#ifdef CONFIG_OPTPROBES
+/*
+ * Call all pre_handler on the list, but ignores its return value.
+ * This must be called from arch-dep optimized caller.
+ */
+void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe *kp;
+
+ list_for_each_entry_rcu(kp, &p->list, list) {
+ if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
+ set_kprobe_instance(kp);
+ kp->pre_handler(kp, regs);
+ }
+ reset_kprobe_instance();
+ }
+}
+
+/* Return true(!0) if the kprobe is ready for optimization. */
+static inline int kprobe_optready(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+ if (kprobe_aggrprobe(p)) {
+ op = container_of(p, struct optimized_kprobe, kp);
+ return arch_prepared_optinsn(&op->optinsn);
+ }
+ return 0;
+}
+
+/* Return an optimized kprobe which replaces instructions including addr. */
+struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
+{
+ int i;
+ struct kprobe *p = NULL;
+ struct optimized_kprobe *op;
+ for (i = 0; !p && i < MAX_OPTIMIZED_LENGTH; i++)
+ p = get_kprobe((void *)(addr - i));
+
+ if (p && kprobe_optready(p)) {
+ op = container_of(p, struct optimized_kprobe, kp);
+ if (arch_within_optimized_kprobe(op, addr))
+ return p;
+ }
+ return NULL;
+}
+
+/* Optimization staging list, protected by kprobe_mutex */
+static LIST_HEAD(optimizing_list);
+
+static void kprobe_optimizer(struct work_struct *work);
+static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+#define OPTIMIZE_DELAY 5
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+ struct optimized_kprobe *op, *tmp;
+
+ /* Lock modules while optimizing kprobes */
+ mutex_lock(&module_mutex);
+ mutex_lock(&kprobe_mutex);
+ if (kprobes_all_disarmed)
+ goto end;
+
+ /* Wait quiesence period for ensuring all interrupts are done */
+ synchronize_sched();
+
+ get_online_cpus(); /* Use online_cpus while optimizing */
+ mutex_lock(&text_mutex);
+ list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
+ WARN_ON(kprobe_disabled(&op->kp));
+ if (arch_optimize_kprobe(op) < 0)
+ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+ list_del_init(&op->list);
+ }
+ mutex_unlock(&text_mutex);
+ put_online_cpus();
+end:
+ mutex_unlock(&kprobe_mutex);
+ mutex_unlock(&module_mutex);
+}
+
+/* Optimize kprobe if p is ready to be optimized */
+static __kprobes void optimize_kprobe(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+ /* Check if the kprobe is disabled or not ready for optimization. */
+ if (!kprobe_optready(p) ||
+ (kprobe_disabled(p) || kprobes_all_disarmed))
+ return;
+
+ /* Both of break_handler and post_handler are not supported. */
+ if (p->break_handler || p->post_handler)
+ return;
+
+ op = container_of(p, struct optimized_kprobe, kp);
+
+ /* Check there is no other kprobes at the optimized instructions */
+ if (arch_check_optimized_kprobe(op) < 0)
+ return;
+
+ /* Check if it is already optimized. */
+ if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
+ return;
+
+ op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
+ list_add(&op->list, &optimizing_list);
+ if (!delayed_work_pending(&optimizing_work))
+ schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
+/* Unoptimize a kprobe if p is optimized */
+static __kprobes void unoptimize_kprobe(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+ if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
+ op = container_of(p, struct optimized_kprobe, kp);
+ if (!list_empty(&op->list))
+ /* Dequeue from the optimization queue */
+ list_del_init(&op->list);
+ else
+ /* Replace jump with break */
+ arch_unoptimize_kprobe(op);
+ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+ }
+}
+
+/* Remove optimized instructions */
+static void __kprobes kill_optimized_kprobe(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+ op = container_of(p, struct optimized_kprobe, kp);
+ if (!list_empty(&op->list)) {
+ /* Dequeue from the optimization queue */
+ list_del_init(&op->list);
+ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+ }
+ /* Don't unoptimize, because the target code will be freed. */
+ arch_remove_optimized_kprobe(op);
+}
+
+/* Try to prepare optimized instructions */
+static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+ op = container_of(p, struct optimized_kprobe, kp);
+ arch_prepare_optimized_kprobe(op);
+}
+
+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+ op = container_of(p, struct optimized_kprobe, kp);
+ arch_remove_optimized_kprobe(op);
+ kfree(op);
+}
+
+/* Allocate new optimized_kprobe and try to prepare optimized instructions */
+static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+{
+ struct optimized_kprobe *op;
+
+ op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
+ if (!op)
+ return NULL;
+
+ INIT_LIST_HEAD(&op->list);
+ op->kp.addr = p->addr;
+ arch_prepare_optimized_kprobe(op);
+ return &op->kp;
+}
+
+static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
+
+/*
+ * Prepare an optimized_kprobe and optimize it
+ * NOTE: p must be a normal registered kprobe
+ */
+static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
+{
+ struct kprobe *ap;
+ struct optimized_kprobe *op;
+
+ ap = alloc_aggr_kprobe(p);
+ if (!ap)
+ return;
+
+ op = container_of(ap, struct optimized_kprobe, kp);
+ if (!arch_prepared_optinsn(&op->optinsn)) {
+ /* If failed to setup optimizing, fallback to kprobe */
+ free_aggr_kprobe(ap);
+ return;
+ }
+
+ init_aggr_kprobe(ap, p);
+ optimize_kprobe(ap);
+ return;
+}
+#else /* !CONFIG_OPTPROBES */
+#define get_optimized_kprobe(addr) (NULL)
+#define optimize_kprobe(p) do {} while (0)
+#define unoptimize_kprobe(p) do {} while (0)
+#define kill_optimized_kprobe(p) do {} while (0)
+#define prepare_optimized_kprobe(p) do {} while (0)
+#define try_to_optimize_kprobe(p) do {} while (0)
+
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+ kfree(p);
+}
+
+static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+{
+ return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
+}
+#endif /* CONFIG_OPTPROBES */
+
+static void __kprobes __arm_kprobe(struct kprobe *kp)
+{
+ arch_arm_kprobe(kp);
+ optimize_kprobe(kp); /* Try to re-optimize */
+}
+
+static void __kprobes __disarm_kprobe(struct kprobe *kp)
+{
+ unoptimize_kprobe(kp); /* Try to unoptimize */
+ arch_disarm_kprobe(kp);
+}
+
/* Arm a kprobe with text_mutex */
static void __kprobes arm_kprobe(struct kprobe *kp)
{
+ /* optimize_kprobe doesn't need online_cpus. */
mutex_lock(&text_mutex);
- arch_arm_kprobe(kp);
+ __arm_kprobe(kp);
mutex_unlock(&text_mutex);
}

/* Disarm a kprobe with text_mutex */
static void __kprobes disarm_kprobe(struct kprobe *kp)
{
+ get_online_cpus(); /* unoptimize_kprobe requires online_cpus */
mutex_lock(&text_mutex);
- arch_disarm_kprobe(kp);
+ __disarm_kprobe(kp);
mutex_unlock(&text_mutex);
+ put_online_cpus();
}

/*
@@ -416,7 +692,7 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
{
struct kprobe *kp;
- if (p->pre_handler != aggr_pre_handler) {
+ if (!kprobe_aggrprobe(p)) {
p->nmissed++;
} else {
list_for_each_entry_rcu(kp, &p->list, list)
@@ -540,21 +816,16 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
}

/*
- * Keep all fields in the kprobe consistent
- */
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
-{
- memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
- memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
-}
-
-/*
* Add the new probe to ap->list. Fail if this is the
* second jprobe at the address - two jprobes can't coexist
*/
static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
{
BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
+
+ if (p->break_handler || p->post_handler)
+ unoptimize_kprobe(ap); /* Fall back to normal kprobe */
+
if (p->break_handler) {
if (ap->break_handler)
return -EEXIST;
@@ -569,7 +840,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
ap->flags &= ~KPROBE_FLAG_DISABLED;
if (!kprobes_all_disarmed)
/* Arm the breakpoint again. */
- arm_kprobe(ap);
+ __arm_kprobe(ap);
}
return 0;
}
@@ -578,12 +849,13 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
* Fill in the required fields of the "manager kprobe". Replace the
* earlier kprobe in the hlist with the manager kprobe
*/
-static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
+static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
{
+ /* Copy p's insn slot to ap */
copy_kprobe(p, ap);
flush_insn_slot(ap);
ap->addr = p->addr;
- ap->flags = p->flags;
+ ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
ap->pre_handler = aggr_pre_handler;
ap->fault_handler = aggr_fault_handler;
/* We don't care the kprobe which has gone. */
@@ -593,8 +865,9 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
ap->break_handler = aggr_break_handler;

INIT_LIST_HEAD(&ap->list);
- list_add_rcu(&p->list, &ap->list);
+ INIT_HLIST_NODE(&ap->hlist);

+ list_add_rcu(&p->list, &ap->list);
hlist_replace_rcu(&p->hlist, &ap->hlist);
}

@@ -608,12 +881,12 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
int ret = 0;
struct kprobe *ap = old_p;

- if (old_p->pre_handler != aggr_pre_handler) {
- /* If old_p is not an aggr_probe, create new aggr_kprobe. */
- ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
+ if (!kprobe_aggrprobe(old_p)) {
+ /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
+ ap = alloc_aggr_kprobe(old_p);
if (!ap)
return -ENOMEM;
- add_aggr_kprobe(ap, old_p);
+ init_aggr_kprobe(ap, old_p);
}

if (kprobe_gone(ap)) {
@@ -632,6 +905,9 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
*/
return ret;

+ /* Prepare optimized instructions if possible. */
+ prepare_optimized_kprobe(ap);
+
/*
* Clear gone flag to prevent allocating new slot again, and
* set disabled flag because it is not armed yet.
@@ -640,6 +916,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
| KPROBE_FLAG_DISABLED;
}

+ /* Copy ap's insn slot to p */
copy_kprobe(ap, p);
return add_new_kprobe(ap, p);
}
@@ -789,16 +1066,24 @@ int __kprobes register_kprobe(struct kprobe *p)
p->nmissed = 0;
INIT_LIST_HEAD(&p->list);
mutex_lock(&kprobe_mutex);
+ get_online_cpus();
+ mutex_lock(&text_mutex);
+
old_p = get_kprobe(p->addr);
if (old_p) {
+ /* Since this may unoptimize old_p, locking text_mutex. */
ret = register_aggr_kprobe(old_p, p);
goto out;
}

- mutex_lock(&text_mutex);
+ /* Check collision with other optimized kprobes */
+ old_p = get_optimized_kprobe((unsigned long)p->addr);
+ if (unlikely(old_p))
+ unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+
ret = arch_prepare_kprobe(p);
if (ret)
- goto out_unlock_text;
+ goto out;

INIT_HLIST_NODE(&p->hlist);
hlist_add_head_rcu(&p->hlist,
@@ -807,9 +1092,12 @@ int __kprobes register_kprobe(struct kprobe *p)
if (!kprobes_all_disarmed && !kprobe_disabled(p))
arch_arm_kprobe(p);

-out_unlock_text:
- mutex_unlock(&text_mutex);
+ /* Try to optimize kprobe */
+ try_to_optimize_kprobe(p);
+
out:
+ mutex_unlock(&text_mutex);
+ put_online_cpus();
mutex_unlock(&kprobe_mutex);

if (probed_mod)
@@ -831,7 +1119,7 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
return -EINVAL;

if (old_p == p ||
- (old_p->pre_handler == aggr_pre_handler &&
+ (kprobe_aggrprobe(old_p) &&
list_is_singular(&old_p->list))) {
/*
* Only probe on the hash list. Disarm only if kprobes are
@@ -839,8 +1127,13 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
* already have been removed. We save on flushing icache.
*/
if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
- disarm_kprobe(p);
+ disarm_kprobe(old_p);
hlist_del_rcu(&old_p->hlist);
+
+ /* If another kprobe was blocked, optimize it. */
+ old_p = get_optimized_kprobe((unsigned long)p->addr);
+ if (unlikely(old_p))
+ optimize_kprobe(old_p);
} else {
if (p->break_handler && !kprobe_gone(p))
old_p->break_handler = NULL;
@@ -855,8 +1148,13 @@ noclean:
list_del_rcu(&p->list);
if (!kprobe_disabled(old_p)) {
try_to_disable_aggr_kprobe(old_p);
- if (!kprobes_all_disarmed && kprobe_disabled(old_p))
- disarm_kprobe(old_p);
+ if (!kprobes_all_disarmed) {
+ if (kprobe_disabled(old_p))
+ disarm_kprobe(old_p);
+ else
+ /* Try to optimize this probe again */
+ optimize_kprobe(old_p);
+ }
}
}
return 0;
@@ -873,7 +1171,7 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
old_p = list_entry(p->list.next, struct kprobe, list);
list_del(&p->list);
arch_remove_kprobe(old_p);
- kfree(old_p);
+ free_aggr_kprobe(old_p);
}
}

@@ -1169,7 +1467,7 @@ static void __kprobes kill_kprobe(struct kprobe *p)
struct kprobe *kp;

p->flags |= KPROBE_FLAG_GONE;
- if (p->pre_handler == aggr_pre_handler) {
+ if (kprobe_aggrprobe(p)) {
/*
* If this is an aggr_kprobe, we have to list all the
* chained probes and mark them GONE.
@@ -1178,6 +1476,7 @@ static void __kprobes kill_kprobe(struct kprobe *p)
kp->flags |= KPROBE_FLAG_GONE;
p->post_handler = NULL;
p->break_handler = NULL;
+ kill_optimized_kprobe(p);
}
/*
* Here, we can remove insn_slot safely, because no thread calls
@@ -1287,6 +1586,11 @@ static int __init init_kprobes(void)
}
}

+#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
+ /* Init kprobe_optinsn_slots */
+ kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
+#endif
+
/* By default, kprobes are armed */
kprobes_all_disarmed = false;

@@ -1305,7 +1609,7 @@ static int __init init_kprobes(void)

#ifdef CONFIG_DEBUG_FS
static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
- const char *sym, int offset,char *modname)
+ const char *sym, int offset, char *modname, struct kprobe *pp)
{
char *kprobe_type;

@@ -1315,19 +1619,21 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
kprobe_type = "j";
else
kprobe_type = "k";
+
if (sym)
- seq_printf(pi, "%p %s %s+0x%x %s %s%s\n",
+ seq_printf(pi, "%p %s %s+0x%x %s ",
p->addr, kprobe_type, sym, offset,
- (modname ? modname : " "),
- (kprobe_gone(p) ? "[GONE]" : ""),
- ((kprobe_disabled(p) && !kprobe_gone(p)) ?
- "[DISABLED]" : ""));
+ (modname ? modname : " "));
else
- seq_printf(pi, "%p %s %p %s%s\n",
- p->addr, kprobe_type, p->addr,
- (kprobe_gone(p) ? "[GONE]" : ""),
- ((kprobe_disabled(p) && !kprobe_gone(p)) ?
- "[DISABLED]" : ""));
+ seq_printf(pi, "%p %s %p ",
+ p->addr, kprobe_type, p->addr);
+
+ if (!pp)
+ pp = p;
+ seq_printf(pi, "%s%s%s\n",
+ (kprobe_gone(p) ? "[GONE]" : ""),
+ ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""),
+ (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""));
}

static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1363,11 +1669,11 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
hlist_for_each_entry_rcu(p, node, head, hlist) {
sym = kallsyms_lookup((unsigned long)p->addr, NULL,
&offset, &modname, namebuf);
- if (p->pre_handler == aggr_pre_handler) {
+ if (kprobe_aggrprobe(p)) {
list_for_each_entry_rcu(kp, &p->list, list)
- report_probe(pi, kp, sym, offset, modname);
+ report_probe(pi, kp, sym, offset, modname, p);
} else
- report_probe(pi, p, sym, offset, modname);
+ report_probe(pi, p, sym, offset, modname, NULL);
}
preempt_enable();
return 0;
@@ -1470,12 +1776,13 @@ static void __kprobes arm_all_kprobes(void)
if (!kprobes_all_disarmed)
goto already_enabled;

+ /* Arm-side doesn't requires online_cpus */
mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist)
if (!kprobe_disabled(p))
- arch_arm_kprobe(p);
+ __arm_kprobe(p);
}
mutex_unlock(&text_mutex);

@@ -1502,16 +1809,18 @@ static void __kprobes disarm_all_kprobes(void)

kprobes_all_disarmed = true;
printk(KERN_INFO "Kprobes globally disabled\n");
+ get_online_cpus(); /* Disarming will unoptimize kprobes */
mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
- arch_disarm_kprobe(p);
+ __disarm_kprobe(p);
}
}

mutex_unlock(&text_mutex);
+ put_online_cpus();
mutex_unlock(&kprobe_mutex);
/* Allow all currently running kprobes to complete */
synchronize_sched();


--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat [at] redhat
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


fweisbec at gmail

Nov 23, 2009, 6:44 PM

Post #2 of 12 (162 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

On Mon, Nov 23, 2009 at 06:21:41PM -0500, Masami Hiramatsu wrote:
> +config OPTPROBES
> + bool "Kprobes jump optimization support (EXPERIMENTAL)"
> + default y
> + depends on KPROBES
> + depends on !PREEMPT


Why does it depends on !PREEMPT?



> + depends on HAVE_OPTPROBES
> + select KALLSYMS_ALL
> + help
> + This option will allow kprobes to optimize breakpoint to
> + a jump for reducing its overhead.
> +
> config HAVE_EFFICIENT_UNALIGNED_ACCESS
> bool
> help
> @@ -99,6 +110,8 @@ config HAVE_KPROBES
> config HAVE_KRETPROBES
> bool
>
> +config HAVE_OPTPROBES
> + bool
> #
> # An arch should select this if it provides all these things:
> #
> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index 1b672f7..aed1f95 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -122,6 +122,11 @@ struct kprobe {
> /* Kprobe status flags */
> #define KPROBE_FLAG_GONE 1 /* breakpoint has already gone */
> #define KPROBE_FLAG_DISABLED 2 /* probe is temporarily disabled */
> +#define KPROBE_FLAG_OPTIMIZED 4 /*
> + * probe is really optimized.
> + * NOTE:
> + * this flag is only for optimized_kprobe.
> + */
>
> /* Has this kprobe gone ? */
> static inline int kprobe_gone(struct kprobe *p)
> @@ -134,6 +139,12 @@ static inline int kprobe_disabled(struct kprobe *p)
> {
> return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE);
> }
> +
> +/* Is this kprobe really running optimized path ? */
> +static inline int kprobe_optimized(struct kprobe *p)
> +{
> + return p->flags & KPROBE_FLAG_OPTIMIZED;
> +}
> /*
> * Special probe type that uses setjmp-longjmp type tricks to resume
> * execution at a specified entry with a matching prototype corresponding
> @@ -249,6 +260,31 @@ extern kprobe_opcode_t *get_insn_slot(void);
> extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
> extern void kprobes_inc_nmissed_count(struct kprobe *p);
>
> +#ifdef CONFIG_OPTPROBES
> +/*
> + * Internal structure for direct jump optimized probe
> + */
> +struct optimized_kprobe {
> + struct kprobe kp;
> + struct list_head list; /* list for optimizing queue */
> + struct arch_optimized_insn optinsn;
> +};
> +
> +/* Architecture dependent functions for direct jump optimization */
> +extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
> +extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
> +extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
> +extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
> +extern int arch_optimize_kprobe(struct optimized_kprobe *op);
> +extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
> +extern kprobe_opcode_t *get_optinsn_slot(void);
> +extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
> +extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
> + unsigned long addr);
> +
> +extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
> +#endif /* CONFIG_OPTPROBES */
> +
> /* Get the kprobe at this addr (if any) - called with preemption disabled */
> struct kprobe *get_kprobe(void *addr);
> void kretprobe_hash_lock(struct task_struct *tsk,
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 10d2ed5..15aa797 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -44,6 +44,7 @@
> #include <linux/debugfs.h>
> #include <linux/kdebug.h>
> #include <linux/memory.h>
> +#include <linux/cpu.h>
>
> #include <asm-generic/sections.h>
> #include <asm/cacheflush.h>
> @@ -301,6 +302,31 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
> __free_insn_slot(&kprobe_insn_slots, slot, dirty);
> mutex_unlock(&kprobe_insn_mutex);
> }
> +#ifdef CONFIG_OPTPROBES
> +/* For optimized_kprobe buffer */
> +static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
> +static struct kprobe_insn_cache kprobe_optinsn_slots = {
> + .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
> + /* .insn_size is initialized later */
> + .nr_garbage = 0,
> +};
> +/* Get a slot for optimized_kprobe buffer */
> +kprobe_opcode_t __kprobes *get_optinsn_slot(void)
> +{
> + kprobe_opcode_t *ret = NULL;
> + mutex_lock(&kprobe_optinsn_mutex);
> + ret = __get_insn_slot(&kprobe_optinsn_slots);
> + mutex_unlock(&kprobe_optinsn_mutex);
> + return ret;
> +}



Just a small nano-neat: could you add a line between variable
declarations and the rest? And also just before the return?
It makes the code a bit easier to review.



> +
> +void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
> +{
> + mutex_lock(&kprobe_optinsn_mutex);
> + __free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
> + mutex_unlock(&kprobe_optinsn_mutex);
> +}
> +#endif
> #endif
>
> /* We have preemption disabled.. so it is safe to use __ versions */
> @@ -334,20 +360,270 @@ struct kprobe __kprobes *get_kprobe(void *addr)
> return NULL;
> }
>
> +static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
> +
> +/* Return true if the kprobe is an aggregator */
> +static inline int kprobe_aggrprobe(struct kprobe *p)
> +{
> + return p->pre_handler == aggr_pre_handler;
> +}
> +
> +/*
> + * Keep all fields in the kprobe consistent
> + */
> +static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
> +{
> + memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
> + memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
> +}
> +
> +#ifdef CONFIG_OPTPROBES
> +/*
> + * Call all pre_handler on the list, but ignores its return value.
> + * This must be called from arch-dep optimized caller.
> + */
> +void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
> +{
> + struct kprobe *kp;
> +
> + list_for_each_entry_rcu(kp, &p->list, list) {
> + if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
> + set_kprobe_instance(kp);
> + kp->pre_handler(kp, regs);
> + }
> + reset_kprobe_instance();
> + }
> +}
> +
> +/* Return true(!0) if the kprobe is ready for optimization. */
> +static inline int kprobe_optready(struct kprobe *p)
> +{
> + struct optimized_kprobe *op;
> + if (kprobe_aggrprobe(p)) {
> + op = container_of(p, struct optimized_kprobe, kp);
> + return arch_prepared_optinsn(&op->optinsn);
> + }
> + return 0;
> +}
> +
> +/* Return an optimized kprobe which replaces instructions including addr. */
> +struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
> +{
> + int i;
> + struct kprobe *p = NULL;
> + struct optimized_kprobe *op;
> + for (i = 0; !p && i < MAX_OPTIMIZED_LENGTH; i++)
> + p = get_kprobe((void *)(addr - i));
> +
> + if (p && kprobe_optready(p)) {
> + op = container_of(p, struct optimized_kprobe, kp);
> + if (arch_within_optimized_kprobe(op, addr))
> + return p;
> + }
> + return NULL;
> +}
> +
> +/* Optimization staging list, protected by kprobe_mutex */
> +static LIST_HEAD(optimizing_list);
> +
> +static void kprobe_optimizer(struct work_struct *work);
> +static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
> +#define OPTIMIZE_DELAY 5
> +
> +/* Kprobe jump optimizer */
> +static __kprobes void kprobe_optimizer(struct work_struct *work)
> +{
> + struct optimized_kprobe *op, *tmp;
> +
> + /* Lock modules while optimizing kprobes */
> + mutex_lock(&module_mutex);
> + mutex_lock(&kprobe_mutex);
> + if (kprobes_all_disarmed)
> + goto end;
> +
> + /* Wait quiesence period for ensuring all interrupts are done */
> + synchronize_sched();



It's not clear to me why you are doing that.
Is this waiting for pending int 3 kprobes handlers
to complete? If so, why, and what does that prevent?

Also, why is it a delayed work? I'm not sure what we are
waiting for here.


> +
> + get_online_cpus(); /* Use online_cpus while optimizing */



And this comment doesn't tell us much what this brings us.
The changelog tells it stands to avoid a text_mutex deadlock.
I'm not sure why we would deadlock without it.

Again, I think this dance with live patching protected
by int 3 only, which patching is in turn a necessary
stage before, is a complicated sequence that could be
simplified by choosing only one patching in stop_machine()
time.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


fweisbec at gmail

Nov 23, 2009, 7:31 PM

Post #3 of 12 (168 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

On Tue, Nov 24, 2009 at 03:44:19AM +0100, Frederic Weisbecker wrote:
> On Mon, Nov 23, 2009 at 06:21:41PM -0500, Masami Hiramatsu wrote:
> > +static void kprobe_optimizer(struct work_struct *work);
> > +static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
> > +#define OPTIMIZE_DELAY 5
> > +
> > +/* Kprobe jump optimizer */
> > +static __kprobes void kprobe_optimizer(struct work_struct *work)
> > +{
> > + struct optimized_kprobe *op, *tmp;
> > +
> > + /* Lock modules while optimizing kprobes */
> > + mutex_lock(&module_mutex);
> > + mutex_lock(&kprobe_mutex);
> > + if (kprobes_all_disarmed)
> > + goto end;
> > +
> > + /* Wait quiesence period for ensuring all interrupts are done */
> > + synchronize_sched();
>
>
>
> It's not clear to me why you are doing that.
> Is this waiting for pending int 3 kprobes handlers
> to complete? If so, why, and what does that prevent?


I _might_ have understood.
You have set up the optimized flags, then you wait for
any old-style int 3 kprobes to complete and route
to detour buffer so that you can patch the jump
safely in the dead code? (and finish with first byte
by patching the int 3 itself)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


mhiramat at redhat

Nov 24, 2009, 7:34 AM

Post #4 of 12 (163 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

Hi Frederic,

Frederic Weisbecker wrote:
> On Mon, Nov 23, 2009 at 06:21:41PM -0500, Masami Hiramatsu wrote:
>> +config OPTPROBES
>> + bool "Kprobes jump optimization support (EXPERIMENTAL)"
>> + default y
>> + depends on KPROBES
>> + depends on !PREEMPT
>
>
> Why does it depends on !PREEMPT?

Oh, because it has not supported preemptive kernel yet.
(I'd like to tell you why in another mail)

>> @@ -301,6 +302,31 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
>> __free_insn_slot(&kprobe_insn_slots, slot, dirty);
>> mutex_unlock(&kprobe_insn_mutex);
>> }
>> +#ifdef CONFIG_OPTPROBES
>> +/* For optimized_kprobe buffer */
>> +static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
>> +static struct kprobe_insn_cache kprobe_optinsn_slots = {
>> + .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
>> + /* .insn_size is initialized later */
>> + .nr_garbage = 0,
>> +};
>> +/* Get a slot for optimized_kprobe buffer */
>> +kprobe_opcode_t __kprobes *get_optinsn_slot(void)
>> +{
>> + kprobe_opcode_t *ret = NULL;
>> + mutex_lock(&kprobe_optinsn_mutex);
>> + ret = __get_insn_slot(&kprobe_optinsn_slots);
>> + mutex_unlock(&kprobe_optinsn_mutex);
>> + return ret;
>> +}
>
>
>
> Just a small nano-neat: could you add a line between variable
> declarations and the rest? And also just before the return?
> It makes the code a bit easier to review.

Sure :-)

>> +static void kprobe_optimizer(struct work_struct *work);
>> +static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
>> +#define OPTIMIZE_DELAY 5
>> +
>> +/* Kprobe jump optimizer */
>> +static __kprobes void kprobe_optimizer(struct work_struct *work)
>> +{
>> + struct optimized_kprobe *op, *tmp;
>> +
>> + /* Lock modules while optimizing kprobes */
>> + mutex_lock(&module_mutex);
>> + mutex_lock(&kprobe_mutex);
>> + if (kprobes_all_disarmed)
>> + goto end;
>> +
>> + /* Wait quiesence period for ensuring all interrupts are done */
>> + synchronize_sched();
>
>
>
> It's not clear to me why you are doing that.
> Is this waiting for pending int 3 kprobes handlers
> to complete? If so, why, and what does that prevent?
>
> Also, why is it a delayed work? I'm not sure what we are
> waiting for here.
[...]
> Again, I think this dance with live patching protected
> by int 3 only, which patching is in turn a necessary
> stage before, is a complicated sequence that could be
> simplified by choosing only one patching in stop_machine()
> time.

There is a reason why we have to wait here and it's excuse
why it hasn't supported preemption yet too, I'll tell you
in next mail :-)

>> +
>> + get_online_cpus(); /* Use online_cpus while optimizing */
>
>
>
> And this comment doesn't tell us much what this brings us.
> The changelog tells it stands to avoid a text_mutex deadlock.
> I'm not sure why we would deadlock without it.

As Mathieu and I discussed on LKML (http://lkml.org/lkml/2009/11/21/187)
text_mutex will be locked on the way of cpu-hotplug.
Since kprobes locks text_mutex too and stop_machine() refers online_cpus,
it will cause a dead-lock. So, I decided to use get_online_cpus() to
locking hotplug while optimizing/unoptimizng.

Thank you,

--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat [at] redhat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


mhiramat at redhat

Nov 24, 2009, 7:34 AM

Post #5 of 12 (162 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

Frederic Weisbecker wrote:
> On Tue, Nov 24, 2009 at 03:44:19AM +0100, Frederic Weisbecker wrote:
>> On Mon, Nov 23, 2009 at 06:21:41PM -0500, Masami Hiramatsu wrote:
>>> +static void kprobe_optimizer(struct work_struct *work);
>>> +static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
>>> +#define OPTIMIZE_DELAY 5
>>> +
>>> +/* Kprobe jump optimizer */
>>> +static __kprobes void kprobe_optimizer(struct work_struct *work)
>>> +{
>>> + struct optimized_kprobe *op, *tmp;
>>> +
>>> + /* Lock modules while optimizing kprobes */
>>> + mutex_lock(&module_mutex);
>>> + mutex_lock(&kprobe_mutex);
>>> + if (kprobes_all_disarmed)
>>> + goto end;
>>> +
>>> + /* Wait quiesence period for ensuring all interrupts are done */
>>> + synchronize_sched();
>>
>>
>>
>> It's not clear to me why you are doing that.
>> Is this waiting for pending int 3 kprobes handlers
>> to complete? If so, why, and what does that prevent?
>
>
> I _might_ have understood.
> You have set up the optimized flags, then you wait for
> any old-style int 3 kprobes to complete and route
> to detour buffer so that you can patch the jump
> safely in the dead code? (and finish with first byte
> by patching the int 3 itself)
>

Yeah, you might get almost correct answer.
The reason why we have to wait scheduling on all processors
is that this code may modify N instructions (not a single
instruction). This means, there is a chance that 2nd to nth
instructions are interrupted on other cpus when we start
code modifying.

Please imagine that 2nd instruction is interrupted and
stop_machine() replaces the 2nd instruction with jump
*address* while running interrupt handler. When the interrupt
returns to original address, there is no valid instructions
and it causes unexpected result.

To avoid this situation, we have to wait a scheduler quiescent
state on all cpus, because it also ensure that all current
interruption are done.

This also excuses why we don't need to wait when unoptimizing
and why it has not supported preemptive kernel yet.

In unoptimizing case, since there is just a single instruction
(jump), there is no nth instruction which can be interrupted.
Thus we can just use a stop_machine(). :-)

On the preemptive kernel, waiting scheduling is not work as we
see on non-preemptive kernel. Since processes can be preempted
in interruption, we can't ensure that the current running
interruption is done. (I assume that a pair of freeze_processes
and thaw_processes may possibly ensure that, or maybe we can
share some stack rewinding code with ksplice.)
So it depends on !PREEMPT.

Thank you,

--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat [at] redhat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


fweisbec at gmail

Nov 24, 2009, 11:45 AM

Post #6 of 12 (160 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

On Tue, Nov 24, 2009 at 10:34:08AM -0500, Masami Hiramatsu wrote:
> > And this comment doesn't tell us much what this brings us.
> > The changelog tells it stands to avoid a text_mutex deadlock.
> > I'm not sure why we would deadlock without it.
>
> As Mathieu and I discussed on LKML (http://lkml.org/lkml/2009/11/21/187)
> text_mutex will be locked on the way of cpu-hotplug.
> Since kprobes locks text_mutex too and stop_machine() refers online_cpus,
> it will cause a dead-lock. So, I decided to use get_online_cpus() to
> locking hotplug while optimizing/unoptimizng.


Ah ok :)
Could you add a comment in the code that explains it?

Thanks.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


fweisbec at gmail

Nov 24, 2009, 12:14 PM

Post #7 of 12 (156 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

On Tue, Nov 24, 2009 at 10:34:16AM -0500, Masami Hiramatsu wrote:
> Frederic Weisbecker wrote:
> > I _might_ have understood.
> > You have set up the optimized flags, then you wait for
> > any old-style int 3 kprobes to complete and route
> > to detour buffer so that you can patch the jump
> > safely in the dead code? (and finish with first byte
> > by patching the int 3 itself)
> >
>
> Yeah, you might get almost correct answer.
> The reason why we have to wait scheduling on all processors
> is that this code may modify N instructions (not a single
> instruction). This means, there is a chance that 2nd to nth
> instructions are interrupted on other cpus when we start
> code modifying.


Aaah ok!

In this case, you probably just need the synchronize_sched()
thing. The delayed work looks unnecessary.


> Please imagine that 2nd instruction is interrupted and
> stop_machine() replaces the 2nd instruction with jump
> *address* while running interrupt handler. When the interrupt
> returns to original address, there is no valid instructions
> and it causes unexpected result.


Yeah.


>
> To avoid this situation, we have to wait a scheduler quiescent
> state on all cpus, because it also ensure that all current
> interruption are done.


Ok.


> This also excuses why we don't need to wait when unoptimizing
> and why it has not supported preemptive kernel yet.


I see...so the non-preemptible kernel requirement looks
hard to workaround :-s


> In unoptimizing case, since there is just a single instruction
> (jump), there is no nth instruction which can be interrupted.
> Thus we can just use a stop_machine(). :-)


Ok.


>
> On the preemptive kernel, waiting scheduling is not work as we
> see on non-preemptive kernel. Since processes can be preempted
> in interruption, we can't ensure that the current running
> interruption is done. (I assume that a pair of freeze_processes
> and thaw_processes may possibly ensure that, or maybe we can
> share some stack rewinding code with ksplice.)
> So it depends on !PREEMPT.



Right.
However using freeze_processes() and thaw_processes() would be
probably too costly and it's not a guarantee that every processes
go to the refrigerator() :-), because some tasks are not freezable,
like the kernel threads by default if I remember well, unless they
call set_freezable(). That's a pity, we would just have needed
to set __kprobe in refrigerator().


PS: hmm btw I remember about a patch that
tagged refrigerator() as __cold but it looks like it hasn't been
applied....

Thanks.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


mhiramat at redhat

Nov 24, 2009, 12:59 PM

Post #8 of 12 (156 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

Frederic Weisbecker wrote:
> On Tue, Nov 24, 2009 at 10:34:16AM -0500, Masami Hiramatsu wrote:
>> Frederic Weisbecker wrote:
>>> I _might_ have understood.
>>> You have set up the optimized flags, then you wait for
>>> any old-style int 3 kprobes to complete and route
>>> to detour buffer so that you can patch the jump
>>> safely in the dead code? (and finish with first byte
>>> by patching the int 3 itself)
>>>
>>
>> Yeah, you might get almost correct answer.
>> The reason why we have to wait scheduling on all processors
>> is that this code may modify N instructions (not a single
>> instruction). This means, there is a chance that 2nd to nth
>> instructions are interrupted on other cpus when we start
>> code modifying.
>
>
> Aaah ok!
>
> In this case, you probably just need the synchronize_sched()
> thing. The delayed work looks unnecessary.

Yeah, the delayed work is for speeding up batch registration
which kprobes are already supported. Sometimes ~100 probes
can be set via batch registration I/F.

>> Please imagine that 2nd instruction is interrupted and
>> stop_machine() replaces the 2nd instruction with jump
>> *address* while running interrupt handler. When the interrupt
>> returns to original address, there is no valid instructions
>> and it causes unexpected result.
>
>
> Yeah.
>
>
>>
>> To avoid this situation, we have to wait a scheduler quiescent
>> state on all cpus, because it also ensure that all current
>> interruption are done.
>
>
> Ok.
>
>
>> This also excuses why we don't need to wait when unoptimizing
>> and why it has not supported preemptive kernel yet.
>
>
> I see...so the non-preemptible kernel requirement looks
> hard to workaround :-s

It's the next challenge I think :-)
Even though, kprobes itself still work on preemptive kernel,
so we don't lose any functionality.

>> In unoptimizing case, since there is just a single instruction
>> (jump), there is no nth instruction which can be interrupted.
>> Thus we can just use a stop_machine(). :-)
>
>
> Ok.
>
>
>>
>> On the preemptive kernel, waiting scheduling is not work as we
>> see on non-preemptive kernel. Since processes can be preempted
>> in interruption, we can't ensure that the current running
>> interruption is done. (I assume that a pair of freeze_processes
>> and thaw_processes may possibly ensure that, or maybe we can
>> share some stack rewinding code with ksplice.)
>> So it depends on !PREEMPT.
>
>
>
> Right.
> However using freeze_processes() and thaw_processes() would be
> probably too costly and it's not a guarantee that every processes
> go to the refrigerator() :-), because some tasks are not freezable,
> like the kernel threads by default if I remember well, unless they
> call set_freezable(). That's a pity, we would just have needed
> to set __kprobe in refrigerator().

Ah, right. Even though, we still have an option of ksplice code.

Thank you,

> PS: hmm btw I remember about a patch that
> tagged refrigerator() as __cold but it looks like it hasn't been
> applied....
>
> Thanks.
>

--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat [at] redhat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


hpa at zytor

Nov 24, 2009, 1:08 PM

Post #9 of 12 (157 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

On 11/24/2009 12:14 PM, Frederic Weisbecker wrote:
>
> PS: hmm btw I remember about a patch that
> tagged refrigerator() as __cold but it looks like it hasn't been
> applied....
>

Groan! That hurt!

-hpa

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


mhiramat at redhat

Nov 24, 2009, 1:15 PM

Post #10 of 12 (155 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

Frederic Weisbecker wrote:
> On Tue, Nov 24, 2009 at 10:34:08AM -0500, Masami Hiramatsu wrote:
>>> And this comment doesn't tell us much what this brings us.
>>> The changelog tells it stands to avoid a text_mutex deadlock.
>>> I'm not sure why we would deadlock without it.
>>
>> As Mathieu and I discussed on LKML (http://lkml.org/lkml/2009/11/21/187)
>> text_mutex will be locked on the way of cpu-hotplug.
>> Since kprobes locks text_mutex too and stop_machine() refers online_cpus,
>> it will cause a dead-lock. So, I decided to use get_online_cpus() to
>> locking hotplug while optimizing/unoptimizng.
>
>
> Ah ok :)
> Could you add a comment in the code that explains it?

Sure, of course :-)


--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat [at] redhat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


rostedt at goodmis

Nov 25, 2009, 1:08 PM

Post #11 of 12 (154 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

On Tue, 2009-11-24 at 15:59 -0500, Masami Hiramatsu wrote:

> > I see...so the non-preemptible kernel requirement looks
> > hard to workaround :-s
>
> It's the next challenge I think :-)
> Even though, kprobes itself still work on preemptive kernel,
> so we don't lose any functionality.

>From kstop_machine, we could search all tasks to see if any are about to
resume in the modified location. If there is, we could either

1) insert a normal kprobe
2) modify the return address of the task to jump to some trampoline to
finish the work and return to the code spot with a direct jump.

#2 is kind of nasty but seems like a fun thing to implement ;-)

-- Steve


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


mhiramat at redhat

Nov 25, 2009, 1:30 PM

Post #12 of 12 (149 views)
Permalink
Re: [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization [In reply to]

Steven Rostedt wrote:
> On Tue, 2009-11-24 at 15:59 -0500, Masami Hiramatsu wrote:
>
>>> I see...so the non-preemptible kernel requirement looks
>>> hard to workaround :-s
>>
>> It's the next challenge I think :-)
>> Even though, kprobes itself still work on preemptive kernel,
>> so we don't lose any functionality.
>
>> From kstop_machine, we could search all tasks to see if any are about to
> resume in the modified location. If there is, we could either
>
> 1) insert a normal kprobe
> 2) modify the return address of the task to jump to some trampoline to
> finish the work and return to the code spot with a direct jump.
>
> #2 is kind of nasty but seems like a fun thing to implement ;-)

Sure, anyway, a normal kprobe is already inserted, so we also can
just wait until all tasks don't resume to the spot :-)
(that's another reason why it uses delayed work for optimization.
we can try it again and again.)
And I think, that code will be shared with ksplice too.

Thank you,


--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat [at] redhat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Linux kernel RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.