--- zzzz-none-000/linux-3.10.107/include/linux/perf_event.h 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/include/linux/perf_event.h 2021-02-04 17:41:59.000000000 +0000 @@ -48,9 +48,12 @@ #include #include #include +#include #include #include #include +#include +#include #include struct perf_callchain_entry { @@ -64,25 +67,6 @@ }; /* - * single taken branch record layout: - * - * from: source instruction (may not always be a branch insn) - * to: branch target - * mispred: branch target was mispredicted - * predicted: branch target was predicted - * - * support for mispred, predicted is optional. In case it - * is not supported mispred = predicted = 0. - */ -struct perf_branch_entry { - __u64 from; - __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - reserved:62; -}; - -/* * branch stack layout: * nr: number of taken branches stored in entries[] * @@ -96,11 +80,6 @@ struct perf_branch_entry entries[0]; }; -struct perf_regs_user { - __u64 abi; - struct pt_regs *regs; -}; - struct task_struct; /* @@ -136,10 +115,20 @@ struct hrtimer hrtimer; }; struct { /* tracepoint */ - struct task_struct *tp_target; /* for tp_event->class */ struct list_head tp_list; }; + struct { /* intel_cqm */ + int cqm_state; + u32 cqm_rmid; + int is_group_event; + struct list_head cqm_events_entry; + struct list_head cqm_groups_entry; + struct list_head cqm_group_entry; + }; + struct { /* itrace */ + int itrace_started; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ /* @@ -147,38 +136,82 @@ * problem hw_breakpoint has with context * creation and event initalization. */ - struct task_struct *bp_target; struct arch_hw_breakpoint info; struct list_head bp_list; }; #endif }; + /* + * If the event is a per task event, this will point to the task in + * question. See the comment in perf_event_alloc(). + */ + struct task_struct *target; + +/* + * hw_perf_event::state flags; used to track the PERF_EF_* state. + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + int state; + + /* + * The last observed hardware counter value, updated with a + * local64_cmpxchg() such that pmu::read() can be called nested. + */ local64_t prev_count; + + /* + * The period to start the next sample with. + */ u64 sample_period; + + /* + * The period we started this sample with. + */ u64 last_period; + + /* + * However much is left of the current period; note that this is + * a full 64bit value and allows for generation of periods longer + * than hardware might allow. + */ local64_t period_left; + + /* + * State for throttling the event, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 interrupts_seq; u64 interrupts; + /* + * State for freq target events, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 freq_time_stamp; u64 freq_count_stamp; #endif }; -/* - * hw_perf_event::state flags - */ -#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ -#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ -#define PERF_HES_ARCH 0x04 - struct perf_event; /* * Common implementation detail of pmu::{start,commit,cancel}_txn */ -#define PERF_EVENT_TXN 0x1 +#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ + +/** + * pmu::capabilities flags + */ +#define PERF_PMU_CAP_NO_INTERRUPT 0x01 +#define PERF_PMU_CAP_NO_NMI 0x02 +#define PERF_PMU_CAP_AUX_NO_SG 0x04 +#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 +#define PERF_PMU_CAP_EXCLUSIVE 0x10 +#define PERF_PMU_CAP_ITRACE 0x20 /** * struct pmu - generic performance monitoring unit @@ -186,14 +219,22 @@ struct pmu { struct list_head entry; + struct module *module; struct device *dev; const struct attribute_group **attr_groups; - char *name; + const char *name; int type; + /* + * various common per-pmu feature flags + */ + int capabilities; + int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; + atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; + int hrtimer_interval_ms; /* * Fully disable/enable this PMU, can be used to protect from the PMI @@ -204,31 +245,84 @@ /* * Try and initialize the event for this PMU. - * Should return -ENOENT when the @event doesn't match this PMU. + * + * Returns: + * -ENOENT -- @event is not for this PMU + * + * -ENODEV -- @event is for this PMU but PMU not present + * -EBUSY -- @event is for this PMU but PMU temporarily unavailable + * -EINVAL -- @event is for this PMU but @event is not valid + * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported + * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges + * + * 0 -- @event is for this PMU and valid + * + * Other error return values are allowed. */ int (*event_init) (struct perf_event *event); + /* + * Notification that the event was mapped or unmapped. Called + * in the context of the mapping task. + */ + void (*event_mapped) (struct perf_event *event); /*optional*/ + void (*event_unmapped) (struct perf_event *event); /*optional*/ + + /* + * Flags for ->add()/->del()/ ->start()/->stop(). There are + * matching hw_perf_event::state flags. + */ #define PERF_EF_START 0x01 /* start the counter when adding */ #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ /* - * Adds/Removes a counter to/from the PMU, can be done inside - * a transaction, see the ->*_txn() methods. + * Adds/Removes a counter to/from the PMU, can be done inside a + * transaction, see the ->*_txn() methods. + * + * The add/del callbacks will reserve all hardware resources required + * to service the event, this includes any counter constraint + * scheduling etc. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on. + * + * ->add() called without PERF_EF_START should result in the same state + * as ->add() followed by ->stop(). + * + * ->del() must always PERF_EF_UPDATE stop an event. If it calls + * ->stop() that must deal with already being stopped without + * PERF_EF_UPDATE. */ int (*add) (struct perf_event *event, int flags); void (*del) (struct perf_event *event, int flags); /* - * Starts/Stops a counter present on the PMU. The PMI handler - * should stop the counter when perf_event_overflow() returns - * !0. ->start() will be used to continue. + * Starts/Stops a counter present on the PMU. + * + * The PMI handler should stop the counter when perf_event_overflow() + * returns !0. ->start() will be used to continue. + * + * Also used to change the sample period. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on -- will be called from NMI context with the PMU generates + * NMIs. + * + * ->stop() with PERF_EF_UPDATE will read the counter and update + * period/count values like ->read() would. + * + * ->start() with PERF_EF_RELOAD will reprogram the the counter + * value, must be preceded by a ->stop() with PERF_EF_UPDATE. */ void (*start) (struct perf_event *event, int flags); void (*stop) (struct perf_event *event, int flags); /* * Updates the counter value of the event. + * + * For sampling capable PMUs this will also update the software period + * hw_perf_event::period_left field. */ void (*read) (struct perf_event *event); @@ -239,20 +333,26 @@ * * Start the transaction, after this ->add() doesn't need to * do schedulability tests. + * + * Optional. */ - void (*start_txn) (struct pmu *pmu); /* optional */ + void (*start_txn) (struct pmu *pmu, unsigned int txn_flags); /* * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. + * + * Optional. */ - int (*commit_txn) (struct pmu *pmu); /* optional */ + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->del() is called * for each successful ->add() during the transaction. + * + * Optional. */ - void (*cancel_txn) (struct pmu *pmu); /* optional */ + void (*cancel_txn) (struct pmu *pmu); /* * Will return the value for perf_event_mmap_page::index for this event, @@ -261,15 +361,44 @@ int (*event_idx) (struct perf_event *event); /*optional */ /* - * flush branch stack on context-switches (needed in cpu-wide mode) + * context-switches callback */ - void (*flush_branch_stack) (void); + void (*sched_task) (struct perf_event_context *ctx, + bool sched_in); + /* + * PMU specific data size + */ + size_t task_ctx_size; + + + /* + * Return the count value for a counter. + */ + u64 (*count) (struct perf_event *event); /*optional*/ + + /* + * Set up pmu-private data structures for an AUX area + */ + void *(*setup_aux) (int cpu, void **pages, + int nr_pages, bool overwrite); + /* optional */ + + /* + * Free pmu-private AUX data structures + */ + void (*free_aux) (void *aux); /* optional */ + + /* + * Filter events for PMU-specific reasons. + */ + int (*filter_match) (struct perf_event *event); /* optional */ }; /** * enum perf_event_active_state - the states of a event */ enum perf_event_active_state { + PERF_EVENT_STATE_EXIT = -3, PERF_EVENT_STATE_ERROR = -2, PERF_EVENT_STATE_OFF = -1, PERF_EVENT_STATE_INACTIVE = 0, @@ -298,6 +427,7 @@ #define PERF_ATTACH_CONTEXT 0x01 #define PERF_ATTACH_GROUP 0x02 #define PERF_ATTACH_TASK 0x04 +#define PERF_ATTACH_TASK_DATA 0x08 struct perf_cgroup; struct ring_buffer; @@ -307,10 +437,33 @@ */ struct perf_event { #ifdef CONFIG_PERF_EVENTS - struct list_head group_entry; + /* + * entry onto perf_event_context::event_list; + * modifications require ctx->lock + * RCU safe iterations. + */ struct list_head event_entry; + + /* + * XXX: group_entry and sibling_list should be mutually exclusive; + * either you're a sibling on a group, or you're the group leader. + * Rework the code to always use the same list element. + * + * Locked for modification by both ctx->mutex and ctx->lock; holding + * either sufficies for read. + */ + struct list_head group_entry; struct list_head sibling_list; + + /* + * We need storage to track the entries in perf_pmu_migrate_context; we + * cannot use the event_entry because of RCU and we want to keep the + * group in tact which avoids us using the other two entries. + */ + struct list_head migrate_entry; + struct hlist_node hlist_entry; + struct list_head active_entry; int nr_siblings; int group_flags; struct perf_event *group_leader; @@ -392,6 +545,8 @@ struct ring_buffer *rb; struct list_head rb_entry; + unsigned long rcu_batches; + int rcu_pending; /* poll related */ wait_queue_head_t waitq; @@ -411,11 +566,12 @@ struct pid_namespace *ns; u64 id; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; #ifdef CONFIG_EVENT_TRACING - struct ftrace_event_call *tp_event; + struct trace_event_call *tp_event; struct event_filter *filter; #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops ftrace_ops; @@ -449,6 +605,7 @@ */ struct mutex mutex; + struct list_head active_ctx_list; struct list_head pinned_groups; struct list_head flexible_groups; struct list_head event_list; @@ -476,8 +633,11 @@ u64 generation; int pin_count; int nr_cgroups; /* cgroup evts */ - int nr_branch_stack; /* branch_stack evt */ + void *task_ctx_data; /* pmu specific data */ struct rcu_head rcu_head; + + struct delayed_work orphans_remove; + bool orphans_remove_sched; }; /* @@ -494,8 +654,12 @@ struct perf_event_context *task_ctx; int active_oncpu; int exclusive; - struct list_head rotation_list; - int jiffies_interval; + + raw_spinlock_t hrtimer_lock; + struct hrtimer hrtimer; + ktime_t hrtimer_interval; + unsigned int hrtimer_active; + struct pmu *unique_pmu; struct perf_cgroup *cgrp; }; @@ -505,13 +669,55 @@ struct ring_buffer *rb; unsigned long wakeup; unsigned long size; - void *addr; + union { + void *addr; + unsigned long head; + }; int page; }; +#ifdef CONFIG_CGROUP_PERF + +/* + * perf_cgroup_info keeps track of time_enabled for a cgroup. + * This is a per-cpu dynamically allocated data structure. + */ +struct perf_cgroup_info { + u64 time; + u64 timestamp; +}; + +struct perf_cgroup { + struct cgroup_subsys_state css; + struct perf_cgroup_info __percpu *info; +}; + +/* + * Must ensure cgroup is pinned (css_get) before calling + * this function. In other words, we cannot call this function + * if there is no cgroup event for the current CPU context. + */ +static inline struct perf_cgroup * +perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) +{ + return container_of(task_css_check(task, perf_event_cgrp_id, + ctx ? lockdep_is_held(&ctx->lock) + : true), + struct perf_cgroup, css); +} +#endif /* CONFIG_CGROUP_PERF */ + #ifdef CONFIG_PERF_EVENTS -extern int perf_pmu_register(struct pmu *pmu, char *name, int type); +extern void *perf_aux_output_begin(struct perf_output_handle *handle, + struct perf_event *event); +extern void perf_aux_output_end(struct perf_output_handle *handle, + unsigned long size, bool truncated); +extern int perf_aux_output_skip(struct perf_output_handle *handle, + unsigned long size); +extern void *perf_get_aux(struct perf_output_handle *handle); + +extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); extern int perf_num_counters(void); @@ -524,9 +730,13 @@ extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task); +extern struct perf_event *perf_event_get(unsigned int fd); +extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); +extern void perf_sched_cb_dec(struct pmu *pmu); +extern void perf_sched_cb_inc(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern int perf_event_refresh(struct perf_event *event, int refresh); @@ -540,35 +750,60 @@ void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); +extern u64 perf_event_read_local(struct perf_event *event); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); struct perf_sample_data { - u64 type; + /* + * Fields set by perf_sample_data_init(), group so as to + * minimize the cachelines touched. + */ + u64 addr; + struct perf_raw_record *raw; + struct perf_branch_stack *br_stack; + u64 period; + u64 weight; + u64 txn; + union perf_mem_data_src data_src; + /* + * The other fields, optionally {set,used} by + * perf_{prepare,output}_sample(). + */ + u64 type; u64 ip; struct { u32 pid; u32 tid; } tid_entry; u64 time; - u64 addr; u64 id; u64 stream_id; struct { u32 cpu; u32 reserved; } cpu_entry; - u64 period; - union perf_mem_data_src data_src; struct perf_callchain_entry *callchain; - struct perf_raw_record *raw; - struct perf_branch_stack *br_stack; - struct perf_regs_user regs_user; + + /* + * regs_user may point to task_pt_regs or to regs_user_copy, depending + * on arch details. + */ + struct perf_regs regs_user; + struct pt_regs regs_user_copy; + + struct perf_regs regs_intr; u64 stack_user_size; - u64 weight; -}; +} ____cacheline_aligned; + +/* default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA)) static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) @@ -578,11 +813,9 @@ data->raw = NULL; data->br_stack = NULL; data->period = period; - data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; - data->regs_user.regs = NULL; - data->stack_user_size = 0; data->weight = 0; - data->data_src.val = 0; + data->data_src.val = PERF_MEM_NA; + data->txn = 0; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -598,6 +831,22 @@ struct perf_sample_data *data, struct pt_regs *regs); +extern void perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); + +extern void +perf_event_header__init_id(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event); +extern void +perf_event__output_id_sample(struct perf_event *event, + struct perf_output_handle *handle, + struct perf_sample_data *sample); + +extern void +perf_log_lost_samples(struct perf_event *event, u64 lost); + static inline bool is_sampling_event(struct perf_event *event) { return event->attr.sample_period != 0; @@ -613,6 +862,7 @@ extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs @@ -637,41 +887,80 @@ static __always_inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { - struct pt_regs hot_regs; + if (static_key_false(&perf_swevent_enabled[event_id])) + __perf_sw_event(event_id, nr, regs, addr); +} + +DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]); +/* + * 'Special' version for the scheduler, it hard assumes no recursion, + * which is guaranteed by us not actually scheduling inside other swevents + * because those disable preemption. + */ +static __always_inline void +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) +{ if (static_key_false(&perf_swevent_enabled[event_id])) { - if (!regs) { - perf_fetch_caller_regs(&hot_regs); - regs = &hot_regs; - } - __perf_sw_event(event_id, nr, regs, addr); + struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); + + perf_fetch_caller_regs(regs); + ___perf_sw_event(event_id, nr, regs, addr); } } extern struct static_key_deferred perf_sched_events; +static __always_inline bool +perf_sw_migrate_enabled(void) +{ + if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS])) + return true; + return false; +} + +static inline void perf_event_task_migrate(struct task_struct *task) +{ + if (perf_sw_migrate_enabled()) + task->sched_migrated = 1; +} + static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_in(prev, task); + + if (perf_sw_migrate_enabled() && task->sched_migrated) { + struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); + + perf_fetch_caller_regs(regs); + ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0); + task->sched_migrated = 0; + } } static inline void perf_event_task_sched_out(struct task_struct *prev, struct task_struct *next) { - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); + perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_out(prev, next); } +static inline u64 __perf_event_count(struct perf_event *event) +{ + return local64_read(&event->count) + atomic64_read(&event->child_count); +} + extern void perf_event_mmap(struct vm_area_struct *vma); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); -extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_exec(void); +extern void perf_event_comm(struct task_struct *tsk, bool exec); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -734,6 +1023,16 @@ return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; } +static inline bool needs_branch_stack(struct perf_event *event) +{ + return event->attr.branch_sample_type != 0; +} + +static inline bool has_aux(struct perf_event *event) +{ + return event->pmu->setup_aux; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); @@ -743,11 +1042,25 @@ unsigned int len); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); +extern u64 perf_swevent_set_period(struct perf_event *event); extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); extern int __perf_event_disable(void *info); extern void perf_event_task_tick(void); -#else +#else /* !CONFIG_PERF_EVENTS: */ +static inline void * +perf_aux_output_begin(struct perf_output_handle *handle, + struct perf_event *event) { return NULL; } +static inline void +perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, + bool truncated) { } +static inline int +perf_aux_output_skip(struct perf_output_handle *handle, + unsigned long size) { return -EINVAL; } +static inline void * +perf_get_aux(struct perf_output_handle *handle) { return NULL; } +static inline void +perf_event_task_migrate(struct task_struct *task) { } static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { } @@ -758,6 +1071,12 @@ static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { } +static inline struct perf_event *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); } +static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event) +{ + return ERR_PTR(-EINVAL); +} +static inline u64 perf_event_read_local(struct perf_event *event) { return -EINVAL; } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } @@ -769,6 +1088,8 @@ static inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { } +static inline void perf_bp_event(struct perf_event *event, void *data) { } static inline int perf_register_guest_info_callbacks @@ -777,15 +1098,18 @@ (struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } -static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_exec(void) { } +static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } +static inline u64 perf_swevent_set_period(struct perf_event *event) { return 0; } static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } +static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) @@ -803,14 +1127,16 @@ #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) /* - * This has to have a higher priority than migration_notifier in sched.c. + * This has to have a higher priority than migration_notifier in sched/core.c. */ #define perf_cpu_notifier(fn) \ do { \ - static struct notifier_block fn##_nb __cpuinitdata = \ + static struct notifier_block fn##_nb = \ { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ unsigned long cpu = smp_processor_id(); \ unsigned long flags; \ + \ + cpu_notifier_register_begin(); \ fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ (void *)(unsigned long)cpu); \ local_irq_save(flags); \ @@ -819,9 +1145,21 @@ local_irq_restore(flags); \ fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ (void *)(unsigned long)cpu); \ - register_cpu_notifier(&fn##_nb); \ + __register_cpu_notifier(&fn##_nb); \ + cpu_notifier_register_done(); \ } while (0) +/* + * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the + * callback for already online CPUs. + */ +#define __perf_cpu_notifier(fn) \ +do { \ + static struct notifier_block fn##_nb = \ + { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ + \ + __register_cpu_notifier(&fn##_nb); \ +} while (0) struct perf_pmu_events_attr { struct device_attribute attr; @@ -829,12 +1167,22 @@ const char *event_str; }; +ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); + #define PMU_EVENT_ATTR(_name, _var, _id, _show) \ static struct perf_pmu_events_attr _var = { \ .attr = __ATTR(_name, 0444, _show, NULL), \ .id = _id, \ }; +#define PMU_EVENT_ATTR_STRING(_name, _var, _str) \ +static struct perf_pmu_events_attr _var = { \ + .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .id = 0, \ + .event_str = _str, \ +}; + #define PMU_FORMAT_ATTR(_name, _format) \ static ssize_t \ _name##_show(struct device *dev, \