--- zzzz-none-000/linux-3.10.107/Documentation/trace/ftrace.txt 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/Documentation/trace/ftrace.txt 2021-02-04 17:41:59.000000000 +0000 @@ -108,8 +108,8 @@ data is read from this file, it is consumed, and will not be read again with a sequential read. The "trace" file is static, and if the tracer is not - adding more data,they will display the same - information every time they are read. + adding more data, it will display the same + information every time it is read. trace_options: @@ -204,6 +204,12 @@ Have the function tracer only trace a single thread. + set_event_pid: + + Have the events only trace a task with a PID listed in this file. + Note, sched_switch and sched_wake_up will also trace events + listed in this file. + set_graph_function: Set a "trigger" function where tracing should start @@ -234,6 +240,11 @@ will be displayed on the same line as the function that is returning registers. + If the callback registered to be traced by a function with + the "ip modify" attribute (thus the regs->ip can be changed), + an 'I' will be displayed on the same line as the function that + can be overridden. + function_profile_enabled: When set it will enable all functions with either the function @@ -341,6 +352,11 @@ x86-tsc: Architectures may define their own clocks. For example, x86 uses its own TSC cycle clock here. + ppc-tb: This uses the powerpc timebase register value. + This is in sync across CPUs and can also be used + to correlate events across hypervisor/guest if + tb_offset is known. + To set a clock, simply echo the clock name into this file. echo global > trace_clock @@ -655,7 +671,11 @@ read the irq flags variable, an 'X' will always be printed here. - need-resched: 'N' task need_resched is set, '.' otherwise. + need-resched: + 'N' both TIF_NEED_RESCHED and PREEMPT_NEED_RESCHED is set, + 'n' only TIF_NEED_RESCHED is set, + 'p' only PREEMPT_NEED_RESCHED is set, + '.' otherwise. hardirq/softirq: 'H' - hard irq occurred inside a softirq. @@ -676,9 +696,13 @@ needs to be fixed to be only relative to the same CPU. The marks are determined by the difference between this current trace and the next trace. - '!' - greater than preempt_mark_thresh (default 100) - '+' - greater than 1 microsecond - ' ' - less than or equal to 1 microsecond. + '$' - greater than 1 second + '@' - greater than 100 milisecond + '*' - greater than 10 milisecond + '#' - greater than 1000 microsecond + '!' - greater than 100 microsecond + '+' - greater than 10 microsecond + ' ' - less than or equal to 10 microsecond. The rest is the same as the 'trace' file. @@ -735,7 +759,7 @@ function as well as the function being traced. print-parent: - bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul + bash-4000 [01] 1477.606694: simple_strtoul <-kstrtoul noprint-parent: bash-4000 [01] 1477.606694: simple_strtoul @@ -759,7 +783,7 @@ latency-format option is enabled. bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \ - (+0.000ms): simple_strtoul (strict_strtoul) + (+0.000ms): simple_strtoul (kstrtoul) raw - This will display raw numbers. This option is best for use with user applications that can translate the raw @@ -1511,7 +1535,7 @@ -0 3d.h4 1us+: 0:120:R + [003] 2448: 94:R sleep -0 3d.h4 2us : ttwu_do_activate.constprop.87 <-try_to_wake_up -0 3d.h3 3us : check_preempt_curr <-ttwu_do_wakeup - -0 3d.h3 3us : resched_task <-check_preempt_curr + -0 3d.h3 3us : resched_curr <-check_preempt_curr -0 3dNh3 4us : task_woken_rt <-ttwu_do_wakeup -0 3dNh3 4us : _raw_spin_unlock <-try_to_wake_up -0 3dNh3 4us : sub_preempt_count <-_raw_spin_unlock @@ -1729,7 +1753,7 @@ yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll -# echo -1 > set_ftrace_pid +# echo > set_ftrace_pid # cat trace |head # tracer: function # @@ -1928,25 +1952,50 @@ ie: - 0) | up_write() { - 0) 0.646 us | _spin_lock_irqsave(); - 0) 0.684 us | _spin_unlock_irqrestore(); - 0) 3.123 us | } - 0) 0.548 us | fput(); - 0) + 58.628 us | } + 3) # 1837.709 us | } /* __switch_to */ + 3) | finish_task_switch() { + 3) 0.313 us | _raw_spin_unlock_irq(); + 3) 3.177 us | } + 3) # 1889.063 us | } /* __schedule */ + 3) ! 140.417 us | } /* __schedule */ + 3) # 2034.948 us | } /* schedule */ + 3) * 33998.59 us | } /* schedule_preempt_disabled */ [...] - 0) | putname() { - 0) | kmem_cache_free() { - 0) 0.518 us | __phys_addr(); - 0) 1.757 us | } - 0) 2.861 us | } - 0) ! 115.305 us | } - 0) ! 116.402 us | } + 1) 0.260 us | msecs_to_jiffies(); + 1) 0.313 us | __rcu_read_unlock(); + 1) + 61.770 us | } + 1) + 64.479 us | } + 1) 0.313 us | rcu_bh_qs(); + 1) 0.313 us | __local_bh_enable(); + 1) ! 217.240 us | } + 1) 0.365 us | idle_cpu(); + 1) | rcu_irq_exit() { + 1) 0.417 us | rcu_eqs_enter_common.isra.47(); + 1) 3.125 us | } + 1) ! 227.812 us | } + 1) ! 457.395 us | } + 1) @ 119760.2 us | } + + [...] + + 2) | handle_IPI() { + 1) 6.979 us | } + 2) 0.417 us | scheduler_ipi(); + 1) 9.791 us | } + 1) + 12.917 us | } + 2) 3.490 us | } + 1) + 15.729 us | } + 1) + 18.542 us | } + 2) $ 3594274 us | } + means that the function exceeded 10 usecs. ! means that the function exceeded 100 usecs. + # means that the function exceeded 1000 usecs. + * means that the function exceeded 10 msecs. + @ means that the function exceeded 100 msecs. + $ means that the function exceeded 1 sec. - The task/pid field displays the thread cmdline and pid which @@ -1999,6 +2048,32 @@ 360.774530 | 1) 0.594 us | __phys_addr(); +The function name is always displayed after the closing bracket +for a function if the start of that function is not in the +trace buffer. + +Display of the function name after the closing bracket may be +enabled for functions whose start is in the trace buffer, +allowing easier searching with grep for function durations. +It is default disabled. + + hide: echo nofuncgraph-tail > trace_options + show: echo funcgraph-tail > trace_options + + Example with nofuncgraph-tail (default): + 0) | putname() { + 0) | kmem_cache_free() { + 0) 0.518 us | __phys_addr(); + 0) 1.757 us | } + 0) 2.861 us | } + + Example with funcgraph-tail: + 0) | putname() { + 0) | kmem_cache_free() { + 0) 0.518 us | __phys_addr(); + 0) 1.757 us | } /* kmem_cache_free() */ + 0) 2.861 us | } /* putname() */ + You can put some comments on specific functions by using trace_printk() For example, if you want to put a comment inside the __might_sleep() function, you just have to include @@ -2368,6 +2443,23 @@ echo '!writeback*:mod:ext3' >> set_ftrace_filter + Mod command supports module globbing. Disable tracing for all + functions except a specific module: + + echo '!*:mod:!ext3' >> set_ftrace_filter + + Disable tracing for all modules, but still trace kernel: + + echo '!*:mod:*' >> set_ftrace_filter + + Enable filter only for kernel: + + echo '*write*:mod:!*' >> set_ftrace_filter + + Enable filter for module globbing: + + echo '*write*:mod:*snd*' >> set_ftrace_filter + - traceon/traceoff These commands turn tracing on and off when the specified functions are hit. The parameter determines how many times the @@ -2430,6 +2522,19 @@ echo '!schedule:disable_event:sched:sched_switch' > \ set_ftrace_filter +- dump + When the function is hit, it will dump the contents of the ftrace + ring buffer to the console. This is useful if you need to debug + something, and want to dump the trace when a certain function + is hit. Perhaps its a function that is called before a tripple + fault happens and does not allow you to get a regular dump. + +- cpudump + When the function is hit, it will dump the contents of the ftrace + ring buffer for the current CPU to the console. Unlike the "dump" + command, it only prints out the contents of the ring buffer for the + CPU that executed the function that triggered the dump. + trace_pipe ----------