--- zzzz-none-000/linux-3.10.107/kernel/trace/trace_event_perf.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/kernel/trace/trace_event_perf.c 2021-02-04 17:41:59.000000000 +0000 @@ -1,7 +1,7 @@ /* * trace event based perf event profiling/tracing * - * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra + * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra * Copyright (C) 2009-2010 Frederic Weisbecker */ @@ -21,13 +21,47 @@ /* Count the events in use (per event id, not per instance) */ static int total_ref_count; -static int perf_trace_event_perm(struct ftrace_event_call *tp_event, +static int perf_trace_event_perm(struct trace_event_call *tp_event, struct perf_event *p_event) { + if (tp_event->perf_perm) { + int ret = tp_event->perf_perm(tp_event, p_event); + if (ret) + return ret; + } + + /* + * We checked and allowed to create parent, + * allow children without checking. + */ + if (p_event->parent) + return 0; + + /* + * It's ok to check current process (owner) permissions in here, + * because code below is called only via perf_event_open syscall. + */ + /* The ftrace function trace is allowed only for root. */ - if (ftrace_event_is_function(tp_event) && - perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + if (ftrace_event_is_function(tp_event)) { + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* + * We don't allow user space callchains for function trace + * event, due to issues with page faults while tracing page + * fault handler and its overall trickiness nature. + */ + if (!p_event->attr.exclude_callchain_user) + return -EINVAL; + + /* + * Same reason to disable user stack dump as for user space + * callchains above. + */ + if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER) + return -EINVAL; + } /* No tracing, just counting, so no obvious leak */ if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) @@ -49,7 +83,7 @@ return 0; } -static int perf_trace_event_reg(struct ftrace_event_call *tp_event, +static int perf_trace_event_reg(struct trace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; @@ -109,7 +143,7 @@ static void perf_trace_event_unreg(struct perf_event *p_event) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; int i; if (--tp_event->perf_refcount > 0) @@ -138,17 +172,17 @@ static int perf_trace_event_open(struct perf_event *p_event) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event); } static void perf_trace_event_close(struct perf_event *p_event) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event); } -static int perf_trace_event_init(struct ftrace_event_call *tp_event, +static int perf_trace_event_init(struct trace_event_call *tp_event, struct perf_event *p_event) { int ret; @@ -172,8 +206,8 @@ int perf_trace_init(struct perf_event *p_event) { - struct ftrace_event_call *tp_event; - int event_id = p_event->attr.config; + struct trace_event_call *tp_event; + u64 event_id = p_event->attr.config; int ret = -EINVAL; mutex_lock(&event_mutex); @@ -202,7 +236,7 @@ int perf_trace_add(struct perf_event *p_event, int flags) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; struct hlist_head __percpu *pcpu_list; struct hlist_head *list; @@ -221,13 +255,14 @@ void perf_trace_del(struct perf_event *p_event, int flags) { - struct ftrace_event_call *tp_event = p_event->tp_event; - hlist_del_rcu(&p_event->hlist_entry); + struct trace_event_call *tp_event = p_event->tp_event; + if (!hlist_unhashed(&p_event->hlist_entry)) + hlist_del_rcu(&p_event->hlist_entry); tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); } -__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs *regs, int *rctxp) +void *perf_trace_buf_prepare(int size, unsigned short type, + struct pt_regs **regs, int *rctxp) { struct trace_entry *entry; unsigned long flags; @@ -236,12 +271,18 @@ BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, + "perf buffer not large enough")) + return NULL; + pc = preempt_count(); *rctxp = perf_swevent_get_recursion_context(); if (*rctxp < 0) return NULL; + if (regs) + *regs = this_cpu_ptr(&__perf_regs[*rctxp]); raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); /* zero the dead bytes from align to not leak stack to user */ @@ -255,6 +296,7 @@ return raw_data; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); +NOKPROBE_SYMBOL(perf_trace_buf_prepare); #ifdef CONFIG_FUNCTION_TRACER static void @@ -266,6 +308,10 @@ struct pt_regs regs; int rctx; + head = this_cpu_ptr(event_function.perf_events); + if (hlist_empty(head)) + return; + #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ sizeof(u64)) - sizeof(u32)) @@ -279,8 +325,6 @@ entry->ip = ip; entry->parent_ip = parent_ip; - - head = this_cpu_ptr(event_function.perf_events); perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, 1, ®s, head, NULL); @@ -314,7 +358,7 @@ ftrace_function_local_disable(&event->ftrace_ops); } -int perf_ftrace_event_register(struct ftrace_event_call *call, +int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data) { switch (type) {