Sophie

Sophie

distrib > Mandriva > current > i586 > media > contrib-backports > by-pkgid > e578866d55cd81fdb23827cdf3cec911 > files > 683

python-scikits-learn-0.6-1mdv2010.2.i586.rpm



<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    
    <title>Train error vs Test error &mdash; scikits.learn v0.6.0 documentation</title>
    <link rel="stylesheet" href="../_static/nature.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '../',
        VERSION:     '0.6.0',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  true
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="author" title="About these documents" href="../about.html" />
    <link rel="top" title="scikits.learn v0.6.0 documentation" href="../index.html" />
    <link rel="up" title="Examples" href="index.html" />
    <link rel="next" title="Faces recognition example using eigenfaces and SVMs" href="applications/plot_face_recognition.html" />
    <link rel="prev" title="Receiver operating characteristic (ROC) with cross validation" href="plot_roc_crossval.html" /> 
  </head>
  <body>
    <div class="header-wrapper">
      <div class="header">
          <p class="logo"><a href="../index.html">
            <img src="../_static/scikit-learn-logo-small.png" alt="Logo"/>
          </a>
          </p><div class="navbar">
          <ul>
            <li><a href="../install.html">Download</a></li>
            <li><a href="../support.html">Support</a></li>
            <li><a href="../user_guide.html">User Guide</a></li>
            <li><a href="index.html">Examples</a></li>
            <li><a href="../developers/index.html">Development</a></li>
       </ul>

<div class="search_form">

<div id="cse" style="width: 100%;"></div>
<script src="http://www.google.com/jsapi" type="text/javascript"></script>
<script type="text/javascript">
  google.load('search', '1', {language : 'en'});
  google.setOnLoadCallback(function() {
    var customSearchControl = new google.search.CustomSearchControl('016639176250731907682:tjtqbvtvij0');
    customSearchControl.setResultSetSize(google.search.Search.FILTERED_CSE_RESULTSET);
    var options = new google.search.DrawOptions();
    options.setAutoComplete(true);
    customSearchControl.draw('cse', options);
  }, true);
</script>

</div>

          </div> <!-- end navbar --></div>
    </div>

    <div class="content-wrapper">

    <!-- <div id="blue_tile"></div> -->

        <div class="sphinxsidebar">
        <div class="rel">
          <a href="plot_roc_crossval.html" title="Receiver operating characteristic (ROC) with cross validation"
             accesskey="P">previous</a> |
          <a href="applications/plot_face_recognition.html" title="Faces recognition example using eigenfaces and SVMs"
             accesskey="N">next</a> |
          <a href="../genindex.html" title="General Index"
             accesskey="I">index</a>
        </div>
        

        <h3>Contents</h3>
         <ul>
<li><a class="reference internal" href="#">Train error vs Test error</a></li>
</ul>


        

        </div>

      <div class="content">
            
      <div class="documentwrapper">
        <div class="bodywrapper">
          <div class="body">
            
  <div class="section" id="train-error-vs-test-error">
<span id="example-plot-train-error-vs-test-error-py"></span><h1>Train error vs Test error<a class="headerlink" href="#train-error-vs-test-error" title="Permalink to this headline">ΒΆ</a></h1>
<p>Illustration of how the performance of an estimator on unseen data (test data)
is not the same as the performance on training data. As the regularization
increases the performance on train decreases while the performance on test
is optimal within a range of values of the regularization parameter.
The example with an Elastic-Net regression model and the performance is
measured using the explained variance a.k.a. R^2.</p>
<img alt="auto_examples/images/plot_train_error_vs_test_error.png" class="align-center" src="auto_examples/images/plot_train_error_vs_test_error.png" />
<p><strong>Python source code:</strong> <a class="reference download internal" href="../_downloads/plot_train_error_vs_test_error.py"><tt class="xref download docutils literal"><span class="pre">plot_train_error_vs_test_error.py</span></tt></a></p>
<div class="highlight-python"><div class="highlight"><pre><span class="k">print</span> <span class="n">__doc__</span>

<span class="c"># Author: Alexandre Gramfort &lt;alexandre.gramfort@inria.fr&gt;</span>
<span class="c"># License: BSD Style.</span>

<span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">scikits.learn</span> <span class="kn">import</span> <span class="n">linear_model</span>

<span class="c">###############################################################################</span>
<span class="c"># Generate sample data</span>
<span class="n">n_samples_train</span><span class="p">,</span> <span class="n">n_samples_test</span><span class="p">,</span> <span class="n">n_features</span> <span class="o">=</span> <span class="mi">75</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">500</span>
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">coef</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">n_features</span><span class="p">)</span>
<span class="n">coef</span><span class="p">[</span><span class="mi">50</span><span class="p">:]</span> <span class="o">=</span> <span class="mf">0.0</span> <span class="c"># only the top 10 features are impacting the model</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">n_samples_train</span> <span class="o">+</span> <span class="n">n_samples_test</span><span class="p">,</span> <span class="n">n_features</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">coef</span><span class="p">)</span>

<span class="c"># Split train and test data</span>
<span class="n">X_train</span><span class="p">,</span> <span class="n">X_test</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:</span><span class="n">n_samples_train</span><span class="p">],</span> <span class="n">X</span><span class="p">[</span><span class="n">n_samples_train</span><span class="p">:]</span>
<span class="n">y_train</span><span class="p">,</span> <span class="n">y_test</span> <span class="o">=</span> <span class="n">y</span><span class="p">[:</span><span class="n">n_samples_train</span><span class="p">],</span> <span class="n">y</span><span class="p">[</span><span class="n">n_samples_train</span><span class="p">:]</span>

<span class="c">###############################################################################</span>
<span class="c"># Compute train and test errors</span>
<span class="n">alphas</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">5</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">60</span><span class="p">)</span>
<span class="n">enet</span> <span class="o">=</span> <span class="n">linear_model</span><span class="o">.</span><span class="n">ElasticNet</span><span class="p">(</span><span class="n">rho</span><span class="o">=</span><span class="mf">0.7</span><span class="p">)</span>
<span class="n">train_errors</span> <span class="o">=</span> <span class="nb">list</span><span class="p">()</span>
<span class="n">test_errors</span> <span class="o">=</span> <span class="nb">list</span><span class="p">()</span>
<span class="k">for</span> <span class="n">alpha</span> <span class="ow">in</span> <span class="n">alphas</span><span class="p">:</span>
    <span class="n">enet</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X_train</span><span class="p">,</span> <span class="n">y_train</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="n">alpha</span><span class="p">)</span>
    <span class="n">train_errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">enet</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_train</span><span class="p">,</span> <span class="n">y_train</span><span class="p">))</span>
    <span class="n">test_errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">enet</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_test</span><span class="p">,</span> <span class="n">y_test</span><span class="p">))</span>

<span class="n">i_alpha_optim</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">test_errors</span><span class="p">)</span>
<span class="n">alpha_optim</span> <span class="o">=</span> <span class="n">alphas</span><span class="p">[</span><span class="n">i_alpha_optim</span><span class="p">]</span>
<span class="k">print</span> <span class="s">&quot;Optimal regularization parameter : </span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="n">alpha_optim</span>

<span class="c"># Estimate the coef_ on full data with optimal regularization parameter</span>
<span class="n">coef_</span> <span class="o">=</span> <span class="n">enet</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="n">alpha_optim</span><span class="p">)</span><span class="o">.</span><span class="n">coef_</span>

<span class="c">###############################################################################</span>
<span class="c"># Plot results functions</span>

<span class="kn">import</span> <span class="nn">pylab</span> <span class="kn">as</span> <span class="nn">pl</span>
<span class="n">pl</span><span class="o">.</span><span class="n">subplot</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">semilogx</span><span class="p">(</span><span class="n">alphas</span><span class="p">,</span> <span class="n">train_errors</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&#39;Train&#39;</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">semilogx</span><span class="p">(</span><span class="n">alphas</span><span class="p">,</span> <span class="n">test_errors</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&#39;Test&#39;</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">vlines</span><span class="p">(</span><span class="n">alpha_optim</span><span class="p">,</span> <span class="n">pl</span><span class="o">.</span><span class="n">ylim</span><span class="p">()[</span><span class="mi">0</span><span class="p">],</span> <span class="n">np</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">test_errors</span><span class="p">),</span>
                        <span class="n">color</span><span class="o">=</span><span class="s">&#39;k&#39;</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&#39;Optimum on test&#39;</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="s">&#39;lower left&#39;</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">ylim</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mf">1.2</span><span class="p">])</span>
<span class="n">pl</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s">&#39;Regularization parameter&#39;</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">ylabel</span><span class="p">(</span><span class="s">&#39;Performance&#39;</span><span class="p">)</span>

<span class="c"># Show estimated coef_ vs true coef</span>
<span class="n">pl</span><span class="o">.</span><span class="n">subplot</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">coef</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&#39;True coef&#39;</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">coef_</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&#39;Estimated coef&#39;</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">legend</span><span class="p">()</span>
<span class="n">pl</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="mf">0.09</span><span class="p">,</span> <span class="mf">0.04</span><span class="p">,</span> <span class="mf">0.94</span><span class="p">,</span> <span class="mf">0.94</span><span class="p">,</span> <span class="mf">0.26</span><span class="p">,</span> <span class="mf">0.26</span><span class="p">)</span>
<span class="n">pl</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
</pre></div>
</div>
</div>


          </div>
        </div>
      </div>
        <div class="clearer"></div>
      </div>
    </div>

    <div class="footer">
        <p style="text-align: center">This documentation is relative
        to scikits.learn version 0.6.0<p>
        &copy; 2010, scikits.learn developers (BSD Lincense).
      Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.0.5. Design by <a href="http://webylimonada.com">Web y Limonada</a>.
    </div>
  </body>
</html>