Changes between Initial Version and Version 1 of linux/profiling


Ignore:
Timestamp:
10/31/2017 09:27:40 PM (5 years ago)
Author:
Tim Harvey
Comment:

restored html from 2017/9/14 cache

Legend:

Unmodified
Added
Removed
Modified
  • linux/profiling

    v1 v1  
     1{{{#!html
     2
     3 <head>
     4    <title>
     5      linux/profiling – Gateworks
     6    </title>
     7      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
     8      <meta http-equiv="X-UA-Compatible" content="IE=edge" />
     9    <!--[if IE]><script type="text/javascript">
     10      if (/^#__msie303:/.test(window.location.hash))
     11        window.location.replace(window.location.hash.replace(/^#__msie303:/, '#'));
     12    </script><![endif]-->
     13        <link rel="search" href="/search" />
     14        <link rel="help" href="/wiki/TracGuide" />
     15        <link rel="alternate" href="/wiki/linux/profiling?format=txt" type="text/x-trac-wiki" title="Plain Text" />
     16        <link rel="up" href="/wiki/linux" title="View parent page" />
     17        <link rel="start" href="/wiki" />
     18        <link rel="stylesheet" href="/chrome/common/css/trac.css" type="text/css" /><link rel="stylesheet" href="/chrome/common/css/wiki.css" type="text/css" />
     19        <link rel="shortcut icon" href="/chrome/common/trac.ico" type="image/x-icon" />
     20        <link rel="icon" href="/chrome/common/trac.ico" type="image/x-icon" />
     21      <link type="application/opensearchdescription+xml" rel="search" href="/search/opensearch" title="Search Gateworks" />
     22      <script type="text/javascript" charset="utf-8" src="/chrome/common/js/jquery.js"></script>
     23      <script type="text/javascript" charset="utf-8" src="/chrome/common/js/babel.js"></script>
     24      <script type="text/javascript" charset="utf-8" src="/chrome/common/js/trac.js"></script>
     25      <script type="text/javascript" charset="utf-8" src="/chrome/common/js/search.js"></script>
     26      <script type="text/javascript" charset="utf-8" src="/chrome/common/js/folding.js"></script>
     27    <script type="text/javascript">
     28      jQuery(document).ready(function($) {
     29        $("#content").find("h1,h2,h3,h4,h5,h6").addAnchor(_("Link to this section"));
     30        $("#content").find(".wikianchor").each(function() {
     31          $(this).addAnchor(babel.format(_("Link to #%(id)s"), {id: $(this).attr('id')}));
     32        });
     33        $(".foldable").enableFolding(true, true);
     34      });
     35    </script>
     36  </head>
     37  <body>
     38    <div id="banner">
     39      <div id="header">
     40        <a id="logo" href="http://trac.gateworks.com"><img src="/chrome/site/g2998.png" alt="" /></a>
     41      </div>
     42      <form id="search" action="/search" method="get">
     43        <div>
     44          <label for="proj-search">Search:</label>
     45          <input type="text" id="proj-search" name="q" size="18" value="" />
     46          <input type="submit" value="Search" />
     47        </div>
     48      </form>
     49      <div id="metanav" class="nav">
     50    <ul>
     51      <li class="first"><a href="/login">Login</a></li><li><a href="/wiki/TracGuide">Help/Guide</a></li><li><a href="/about">About Trac</a></li><li><a href="/prefs">Preferences</a></li><li class="last"><a href="/register">Register</a></li>
     52    </ul>
     53  </div>
     54    </div>
     55    <div id="mainnav" class="nav">
     56    <ul>
     57      <li class="first active"><a href="/wiki">Wiki</a></li><li><a href="/timeline">Timeline</a></li><li><a href="/browser">Browse Source</a></li><li class="last"><a href="/search">Search</a></li>
     58    </ul>
     59  </div>
     60    <div id="main">
     61      <div id="pagepath" class="noprint">
     62  <a class="pathentry first" title="View WikiStart" href="/wiki">wiki:</a><a class="pathentry" href="/wiki/linux" title="View linux"><b style="color:#000;background:#ffff66">linux</b></a><span class="pathentry sep">/</span><a class="pathentry" href="/wiki/linux/profiling" title="View linux/profiling"><b style="color:#000;background:#66ffff">profiling</b></a>
     63</div>
     64      <div id="ctxtnav" class="nav">
     65        <h2>Context Navigation</h2>
     66        <ul>
     67          <li class="first"><a href="/wiki/linux">Up</a></li><li><a href="/wiki/WikiStart">Start Page</a></li><li><a href="/wiki/TitleIndex">Index</a></li><li class="last"><a href="/wiki/linux/profiling?action=history">History</a></li>
     68        </ul>
     69        <hr />
     70      </div>
     71    <div id="content" class="wiki">
     72      <div class="wikipage searchable">
     73       
     74          <div id="wikipage" class="trac-content"><p>
     75</p><div class="wiki-toc">
     76<ol>
     77  <li>
     78    <a href="#LinuxOSCodeProfiling"><b style="color:#000;background:#ffcc99">Linux OS Code Profiling</b></a>
     79    <ol>
     80      <li>
     81        <a href="#BasicKernelProfilingCONFIG_PROFILINGandreadprofile">Basic Kernel <b style="color:#000;background:#66ffff">Profiling</b> (CONFIG_<b style="color:#000;background:#66ffff">PROFILING</b>  and readprofile)</a>
     82      </li>
     83      <li>
     84        <a href="#OProfile">OProfile</a>
     85        <ol>
     86          <li>
     87            <a href="#OProfileStandardModeimx6">OProfile Standard Mode (imx6)</a>
     88          </li>
     89          <li>
     90            <a href="#OProfileLegacyModecns3xxx">OProfile Legacy Mode (cns3xxx)</a>
     91          </li>
     92        </ol>
     93      </li>
     94      <li>
     95        <a href="#Perf">Perf</a>
     96      </li>
     97      <li>
     98        <a href="#OpenWrt">OpenWrt</a>
     99      </li>
     100    </ol>
     101  </li>
     102</ol>
     103</div><p>
     104</p>
     105<h1 id="LinuxOSCodeProfiling"><b style="color:#000;background:#ffcc99">Linux OS Code Profiling</b></h1>
     106<p>
     107There are several options for code <b style="color:#000;background:#66ffff">profiling</b> on the <b style="color:#000;background:#ffff66">Linux</b> OS. The kernel itself has a <b style="color:#000;background:#66ffff">profiling</b> API which can be enabled:
     108</p>
     109<ul><li>CONFIG_<b style="color:#000;background:#66ffff">PROFILING</b> - General <b style="color:#000;background:#66ffff">profiling</b>
     110</li><li>CONFIG_OPROFILE - OProfile system <b style="color:#000;background:#66ffff">profiling</b> (capable of <b style="color:#000;background:#66ffff">profiling</b> the whole system including kernel, kernel modules, libraries, and applications)
     111</li></ul><p>
     112OProfile was the <b style="color:#000;background:#66ffff">profiling</b> tool of choice for <b style="color:#000;background:#ffff66">linux</b> devls for nearly 10 years. A few years back various kernel developers defined and implemented a new formal kernel API to access performance monitor counters (PMC's), which are hardware elements in most modern CPU's, to address needs of performance tools. Prior to this new API oPOProfileofile used a special OProfile-specific kernel module while other tools relied on patches (perctr, perfmon).
     113</p>
     114<p>
     115The developers of the new <b style="color:#000;background:#66ffff">profiling</b> API also developed an example tool that used the new API called 'perf'. The perf tool has thus matured greatly in the past few years. oprfile is strickly a <b style="color:#000;background:#66ffff">profiling</b> tool.
     116</p>
     117<p>
     118There are other options that are not described here:
     119</p>
     120<ul><li>valgrind / cachegrind / dtrace
     121</li><li>Google CPU <b style="color:#000;background:#66ffff">profiler</b>
     122</li><li>gprof
     123</li></ul><p>
     124Reference:
     125</p>
     126<ul><li><a class="ext-link" href="http://rhaas.blogspot.co.uk/2012/06/perf-good-bad-ugly.html"><span class="icon">​</span>http://rhaas.blogspot.co.uk/2012/06/perf-good-bad-ugly.html</a>
     127</li><li><a class="ext-link" href="http://homepages.cwi.nl/~aeb/linux/profile.html"><span class="icon">​</span>http://homepages.cwi.nl/~aeb/<b style="color:#000;background:#ffff66">linux</b>/profile.html</a>
     128</li></ul><h2 id="BasicKernelProfilingCONFIG_PROFILINGandreadprofile">Basic Kernel <b style="color:#000;background:#66ffff">Profiling</b> (CONFIG_<b style="color:#000;background:#66ffff">PROFILING</b>  and readprofile)</h2>
     129<p>
     130There are several facilities to see where the kernel spends its resources. A simple one which can be built-in with (CONFIG_<b style="color:#000;background:#66ffff">PROFILING</b>) will store the current EIP (instruction pointer) at each clock tick.
     131</p>
     132<p>
     133To use this ensure the kernel is built with CONFIG_<b style="color:#000;background:#66ffff">PROFILING</b> and either boot the kernel with command line option <strong>profile=2</strong> or enable at runtime with an <strong>echo 2 &gt; /sys/kernel/<b style="color:#000;background:#66ffff">profiling</b></strong>.
     134</p>
     135<p>
     136This will cause a file /proc/profile to be created. The number provided (2 in the example above) is the number of positions EIP is shifted right when <b style="color:#000;background:#66ffff">profiling</b>. So a large number gives a coarse profile. The counters are reset by writing to /proc/profile.
     137</p>
     138<p>
     139The utility readprofile will output statistics for you. It does not sort so you have to invoke sort explicitly. But given a memory map it will translate addresses to kernel symbols.
     140</p>
     141<p>
     142Example:
     143</p>
     144<ol><li>boot kernel compiled with CONFIG_<b style="color:#000;background:#66ffff">PROFILING</b>
     145</li><li>enable (either with placing <strong>profile=2</strong> on cmdline or dynamically with:
     146<pre class="wiki">echo 2 &gt; /sys/kernel/<b style="color:#000;background:#66ffff">profiling</b> # enable <b style="color:#000;background:#66ffff">profiling</b>
     147</pre></li><li>(optional) clear counters
     148<pre class="wiki">echo &gt; /proc/profile # reset counters
     149</pre></li><li>do some activity you wish to profile
     150</li><li>use <strong>readprofile</strong> to interpret the results:
     151<pre class="wiki">readprofile -m System.map | sort -nr | head -2
     152510502 total                                      0.1534
     153508548 default_idle                           10594.7500
     154</pre></li></ol><ul><li>The first column gives the number of timer ticks. The last column gives the number of ticks divided by the size of the function.
     155</li><li>The command readprofile -r is equivalent to echo &gt; /proc/profile.
     156</li></ul><p>
     157References:
     158</p>
     159<ul><li><a class="ext-link" href="http://lxr.missinglinkelectronics.com/linux/Documentation/basic_profiling.txt"><span class="icon">​</span>http://lxr.missinglinkelectronics.com/<b style="color:#000;background:#ffff66">linux</b>/Documentation/basic_<b style="color:#000;background:#66ffff">profiling</b>.txt</a>
     160</li><li><a class="ext-link" href="http://homepages.cwi.nl/~aeb/linux/profile.html"><span class="icon">​</span>http://homepages.cwi.nl/~aeb/<b style="color:#000;background:#ffff66">linux</b>/profile.html</a>
     161</li><li>See <a class="ext-link" href="http://lxr.missinglinkelectronics.com/linux/kernel/profile.c"><span class="icon">​</span>kernel/profile.c</a> and <a class="ext-link" href="http://lxr.missinglinkelectronics.com/linux/fs/proc/proc_misc.c"><span class="icon">​</span>fs/proc/proc_misc.c</a> and <a class="ext-link" href="http://techpubs.sgi.com/library/tpl/cgi-bin/getdoc.cgi?coll=linux&amp;db=man&amp;fname=/usr/share/catman/man1/readprofile.1.html"><span class="icon">​</span>readprofile(1)</a>.
     162</li></ul><h2 id="OProfile">OProfile</h2>
     163<p>
     164OProfile provides a <b style="color:#000;background:#66ffff">profiler</b> and post-processing tools for analyzing profile data, event counter.
     165</p>
     166<p>
     167The tool used is called <strong>operf</strong>. Some processors are not supported by the underlying new perf_events kernel API and thus not supported by operf. If you see <strong>Your kernel's Performance Events Subsystem does not support your processor type</strong> then you need to try and use opcontrol for the legacy mode.
     168</p>
     169<p>
     170References:
     171</p>
     172<ul><li><a class="ext-link" href="http://oprofile.sourceforge.net/"><span class="icon">​</span>http://oprofile.sourceforge.net/</a>
     173</li><li><a class="ext-link" href="http://oprofile.sourceforge.net/doc/index.html"><span class="icon">​</span>http://oprofile.sourceforge.net/doc/index.html</a>
     174</li></ul><h3 id="OProfileStandardModeimx6">OProfile Standard Mode (imx6)</h3>
     175<p>
     176Starting with v0.9.8, OProfile switched over to using the new perf_events kernel API with a new set of userspace tools (however OProfile still supports the legacy mode - see below).
     177</p>
     178<p>
     179Standard mode tools:
     180</p>
     181<ul><li>operf -
     182</li><li>ocount - collect raw event counts on a per-app, per-process, per-cpu, or systrem-wide
     183</li></ul><p>
     184Using the standard mode, post-processing of collected raw events is not necessary.
     185</p>
     186<h3 id="OProfileLegacyModecns3xxx">OProfile Legacy Mode (cns3xxx)</h3>
     187<p>
     188The <strong>legacy mode</strong> (for CPU's that do not implement the new perf_events kernel <b style="color:#000;background:#66ffff">profiling</b> API. The Gateworks Laguna family using the Cavium cns3xxx CPU falls into this category.
     189</p>
     190<p>
     191The legacy mode tools consists of:
     192</p>
     193<ul><li>oprofile kernel module (requires CONFIG_<b style="color:#000;background:#66ffff">PROFILING</b>=y and CONFIG_OPROFILE=m)
     194</li><li>opcontrol - used to setup <b style="color:#000;background:#66ffff">profiling</b> (need vmlinux file)
     195</li><li>opprofiled - the daemon (controlled via opcontrol)
     196</li><li>opreport - report on collected samples
     197</li></ul><p>
     198opcontrol parameters:
     199</p>
     200<ul><li>--session-dir specifies the location to store samples. It defaults to /var/lib/oprofile and you can use this (with both opcontrol and opreport) to use samples from alternate locations
     201</li><li>--separate specifies how to seperate samples. By default they are all stored in a single file (none), but you can choose to store by:
     202<ul><li>none - no profile separation (default)
     203</li><li>lib - per-application profiles for libraries
     204</li><li>kernel - per-application profiles for the kernel and kernel modules
     205</li><li>thread - profiles for each thread and each task
     206</li><li>cpu - profiles for each CPU
     207</li><li>all - all of the above
     208</li></ul></li><li>Using <strong>profile specification parameters</strong> you can choose how to sample and report data"
     209<ul><li>cpu:0 - report just cpu0 (assuming data was collected separately (see above))
     210</li></ul></li><li>--vmlinux=file (both for opcontrol and opreport) specifies the vmlinux kernel image required for decrypting kernel symbols
     211</li><li>--setup will store the following list of parameters in /root/.oprofile/daemonrc to be used as default settings for opcontrol and opreport. Alternatively you can specify setup options to each program as needed
     212</li></ul><p>
     213Example usage:
     214</p>
     215<ol><li>copy your current kernel's vmlinux to /tmp
     216</li><li>(optional) setup our configuration for vmlinux symbol decrypting, specific session location, and separating events by cpu:
     217<pre class="wiki">opcontrol --setup --vmlinux=/tmp/vmlinux --session-dir=/tmp/session1 --separate=cpu
     218</pre></li><li>start capturing events:
     219<pre class="wiki">opcontrol --start
     220</pre><ul><li>you can force a flush of collected events via <strong>opcontrol --dump</strong> at any time
     221</li><li>you can clearout current collected events via <strong>opcontrol --reset<em> at any time
     222</em></strong></li></ul></li><li>stop capturing events (and flush data):
     223<pre class="wiki">opcontrol --shutdown
     224</pre></li><li>report events:
     225<pre class="wiki">opreport --vmlinux=/tmp/vmlinux --session-dir=/tmp/session1
     226</pre><ul><li>if capturing events from individual cpu's separately (as shown above) you can show the info for just cpu0 via <strong>opreport cpu:0</strong>
     227</li><li>Note that opreport doesn't make use of the conf file generated by opcontrol --setup
     228</li></ul></li></ol><p>
     229Important notes:
     230</p>
     231<ul><li>because the cns3xxx kernel and/or hardware does not support a performance counter and this means we are forced into timer based mode using timer irq. In this mode <b style="color:#000;background:#66ffff">profiling</b> is not useful when using code that disables irqs or runs in hardirq context
     232</li></ul><p>
     233References:
     234</p>
     235<ul><li><a class="ext-link" href="http://oprofile.sourceforge.net/doc/controlling-daemon.html"><span class="icon">​</span>http://oprofile.sourceforge.net/doc/controlling-daemon.html</a>
     236</li><li><a class="ext-link" href="http://oprofile.sourceforge.net/doc/getting-started-with-legacy.html"><span class="icon">​</span>http://oprofile.sourceforge.net/doc/getting-started-with-legacy.html</a>
     237</li></ul><h2 id="Perf">Perf</h2>
     238<p>
     239In general <b style="color:#000;background:#66ffff">profiling</b> with the <strong>perf</strong> tool is considered easier to install and run.
     240</p>
     241<p>
     242Example:
     243</p>
     244<ol><li>(optional) copy your current kernel's vmlinux to /tmp
     245</li><li>capture 120 seconds worth of <b style="color:#000;background:#66ffff">profiling</b> data
     246<pre class="wiki">perf record -p $(pidofprogram) sleep 120
     247</pre></li><li>report data (using kernel symbols):
     248<pre class="wiki">perf report -k /tmp/vmlinux
     249</pre><ul><li>the -k is optional and adds kernel symbol decoding
     250</li></ul></li></ol><p>
     251References:
     252</p>
     253<ul><li><a class="ext-link" href="https://perf.wiki.kernel.org/index.php/Tutorial"><span class="icon">​</span>https://perf.wiki.kernel.org/index.php/Tutorial</a>
     254</li></ul><h2 id="OpenWrt"><a class="wiki" href="/wiki/OpenWrt">OpenWrt</a></h2>
     255<p>
     256<a class="wiki" href="/wiki/OpenWrt">OpenWrt</a> has support for both oProfile and perf. Because perf depends on glibc (or at least is configured that way) we recommend oprofile when using <a class="wiki" href="/wiki/OpenWrt">OpenWrt</a>.
     257</p>
     258<p>
     259To enable oProfile on <a class="wiki" href="/wiki/OpenWrt">OpenWrt</a> do a make menuconfig and:
     260</p>
     261<ul><li>Global build Settings -&gt; Compile the kernel with <b style="color:#000;background:#66ffff">profiling</b> enabled
     262</li><li>Development -&gt; oprofile
     263</li><li>Development -&gt; oprofile-utils
     264<ul><li>Note that package/devel/oprofile/Makefile may need +librt added to DEPENDS
     265</li></ul></li></ul><p>
     266To enable perf (glibc required):
     267</p>
     268<ul><li>Global build Settings -&gt; Compile the kernel with <b style="color:#000;background:#66ffff">profiling</b> enabled
     269</li><li>Development -&gt; perf
     270</li></ul><p>
     271You likely want to run non-stripped binaries for anything you want to actually investigate. One way of doing this is to build them with CONFIG_DEBUG=y. For example building compat-wireless:
     272</p>
     273<pre class="wiki">make target/<b style="color:#000;background:#ffff66">linux</b>/mac80211/{clean,compile} V=99 CONFIG_DEBUG=y
     274</pre><p>
     275References:
     276</p>
     277<ul><li><a class="ext-link" href="http://false.ekta.is/2012/11/cpu-profiling-applications-on-openwrt-with-perf-or-oprofile/"><span class="icon">​</span><b style="color:#000;background:#66ffff">Profiling</b> on OpenWrt with perf or OProfile</a>
     278</li></ul></div>
     279         
     280          <div class="trac-modifiedby">
     281            <span><a href="/wiki/linux/profiling?action=diff&amp;version=3" title="Version 3 by tharvey: added note about cns3xxx timer based profiling limitations">Last modified</a> <a class="timeline" href="/timeline?from=2015-04-07T16%3A03%3A47-07%3A00&amp;precision=second" title="See timeline at 04/07/15 16:03:47">2 years ago</a></span>
     282            <span class="trac-print">Last modified on 04/07/15 16:03:47</span>
     283          </div>
     284       
     285       
     286      </div>
     287     
     288
     289    </div>
     290    <div id="altlinks">
     291      <h3>Download in other formats:</h3>
     292      <ul>
     293        <li class="last first">
     294          <a rel="nofollow" href="/wiki/linux/profiling?format=txt">Plain Text</a>
     295        </li>
     296      </ul>
     297    </div>
     298    </div>
     299    <div id="footer" lang="en" xml:lang="en"><hr />
     300      <a id="tracpowered" href="http://trac.edgewall.org/"><img src="/chrome/common/trac_logo_mini.png" height="30" width="107" alt="Trac Powered" /></a>
     301      <p class="left">Powered by <a href="/about"><strong>Trac 1.0</strong></a><br />
     302        By <a href="http://www.edgewall.org/">Edgewall Software</a>.</p>
     303      <p class="right">Visit the Trac open source project at<br /><a href="http://trac.edgewall.org/">http://trac.edgewall.org/</a></p>
     304    </div>
     305 
     306</body>
     307
     308}}}