Sophie

Sophie

distrib > Mageia > 5 > x86_64 > media > nonfree-release > by-pkgid > d44b02ea46d82d6a48df31bbd1a088f3 > files > 1707

nvidia-cuda-toolkit-devel-6.5.14-6.mga5.nonfree.x86_64.rpm

<!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-us" xml:lang="en-us">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
      <meta http-equiv="X-UA-Compatible" content="IE=edge"></meta>
      <meta name="copyright" content="(C) Copyright 2005"></meta>
      <meta name="DC.rights.owner" content="(C) Copyright 2005"></meta>
      <meta name="DC.Type" content="concept"></meta>
      <meta name="DC.Title" content="NVBLAS"></meta>
      <meta name="abstract" content="The User guide for NVBLAS, drop-in BLAS replacement, multi-GPUs accelerated"></meta>
      <meta name="description" content="The User guide for NVBLAS, drop-in BLAS replacement, multi-GPUs accelerated"></meta>
      <meta name="DC.Coverage" content="CUDA API References"></meta>
      <meta name="DC.subject" content="NVBLAS"></meta>
      <meta name="keywords" content="NVBLAS"></meta>
      <meta name="DC.Format" content="XHTML"></meta>
      <meta name="DC.Identifier" content="abstract"></meta>
      <link rel="stylesheet" type="text/css" href="../common/formatting/commonltr.css"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/site.css"></link>
      <title>NVBLAS :: CUDA Toolkit Documentation</title>
      <!--[if lt IE 9]>
      <script src="../common/formatting/html5shiv-printshiv.min.js"></script>
      <![endif]-->
      <script type="text/javascript" charset="utf-8" src="../common/scripts/tynt/tynt.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.ba-hashchange.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.scrollintoview.min.js"></script>
      <script type="text/javascript" src="../search/htmlFileList.js"></script>
      <script type="text/javascript" src="../search/htmlFileInfoList.js"></script>
      <script type="text/javascript" src="../search/nwSearchFnt.min.js"></script>
      <script type="text/javascript" src="../search/stemmers/en_stemmer.min.js"></script>
      <script type="text/javascript" src="../search/index-1.js"></script>
      <script type="text/javascript" src="../search/index-2.js"></script>
      <script type="text/javascript" src="../search/index-3.js"></script>
      <link rel="canonical" href="http://docs.nvidia.com/cuda/nvblas/index.html"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/qwcode.highlight.css"></link>
   </head>
   <body>
      
      <header id="header"><span id="company">NVIDIA</span><span id="site-title">CUDA Toolkit Documentation</span><form id="search" method="get" action="search">
            <input type="text" name="search-text"></input><fieldset id="search-location">
               <legend>Search In:</legend>
               <label><input type="radio" name="search-type" value="site"></input>Entire Site</label>
               <label><input type="radio" name="search-type" value="document"></input>Just This Document</label></fieldset>
            <button type="reset">clear search</button>
            <button id="submit" type="submit">search</button></form>
      </header>
      <div id="site-content">
         <nav id="site-nav">
            <div class="category closed"><a href="../index.html" title="The root of the site.">CUDA Toolkit
                  v6.5</a></div>
            <div class="category"><a href="index.html" title="NVBLAS">NVBLAS</a></div>
            <ul>
               <li>
                  <div class="section-link"><a href="#introduction">1.&nbsp;Introduction</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#overview">2.&nbsp;Overview</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#routines">3.&nbsp;GPU accelerated routines</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#symbols-interception">4.&nbsp;BLAS symbols interception</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#configuration-file">5.&nbsp;Configuration</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#NVBLAS_CONFIG_FILE">5.1.&nbsp;NVBLAS_CONFIG_FILE environment variable</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#configuration_keywords">5.2.&nbsp;Configuration keywords</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#nvblas_logfile">5.2.1.&nbsp;NVBLAS_LOGFILE</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvblas_cpu_blas_lib">5.2.2.&nbsp;NVBLAS_CPU_BLAS_LIB</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvblas_gpu_list">5.2.3.&nbsp;NVBLAS_GPU_LIST</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvblas_tile_dim">5.2.4.&nbsp;NVBLAS_TILE_DIM</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvblas_gpu_disabled">5.2.5.&nbsp;NVBLAS_GPU_DISABLED_&lt;BLAS_FUNC_NAME&gt;</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvblas_cpu_ratio">5.2.6.&nbsp;NVBLAS_CPU_RATIO_&lt;BLAS_FUNC_NAME&gt;</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvblas_autopin_mem_enabled">5.2.7.&nbsp;NVBLAS_AUTOPIN_MEM_ENABLED</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#configuration_example">5.2.8.&nbsp;Config file Example</a></div>
                           </li>
                        </ul>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#Installation">6.&nbsp;Installation</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#Usage">7.&nbsp;Usage</a></div>
               </li>
            </ul>
         </nav>
         <div id="resize-nav"></div>
         <nav id="search-results">
            <h2>Search Results</h2>
            <ol></ol>
         </nav>
         
         <div id="contents-container">
            <div id="breadcrumbs-container">
               <div id="release-info">NVBLAS
                  (<a href="../../pdf/NVBLAS_Library.pdf">PDF</a>)
                  -
                  
                  v6.5
                  (<a href="https://developer.nvidia.com/cuda-toolkit-archive">older</a>)
                  -
                  Last updated August 1, 2014
                  -
                  <a href="mailto:cudatools@nvidia.com?subject=CUDA Toolkit Documentation Feedback: NVBLAS">Send Feedback</a>
                  -
                  <span class="st_facebook"></span><span class="st_twitter"></span><span class="st_linkedin"></span><span class="st_reddit"></span><span class="st_slashdot"></span><span class="st_tumblr"></span><span class="st_sharethis"></span></div>
            </div>
            <article id="contents">
               <div class="topic nested0" id="abstract"><a name="abstract" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#abstract" name="abstract" shape="rect">NVBLAS</a></h2>
                  <div class="body conbody"></div>
               </div>
               <div class="topic concept nested0" xml:lang="en-us" id="introduction"><a name="introduction" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#introduction" name="introduction" shape="rect">1.&nbsp;Introduction</a></h2>
                  <div class="body conbody">
                     <p class="p">The NVBLAS Library is a GPU-accelerated Libary that implements BLAS (Basic Linear Algebra Subprograms). 
                        It can accelerate most BLAS Level-3 routines by dynamically routing BLAS calls to one or more NVIDIA GPUs present in the system,
                        
                        when the charateristics of the call make it to speedup on a GPU.
                        
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" xml:lang="en-us" id="overview"><a name="overview" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#overview" name="overview" shape="rect">2.&nbsp;Overview</a></h2>
                  <div class="body conbody">
                     <p class="p">The NVBLAS Library is built on top of the cuBLAS Library using only the CUBLASXT API (See the CUBLASXT API section of the
                        cuBLAS Documentation for more details).
                        NVBLAS also requires the presence of a CPU BLAS lirbary on the system. Currently NVBLAS intercepts only compute intensive
                        BLAS Level-3 calls (see table below).
                        Depending on the charateristics of those BLAS calls, NVBLAS will redirect the calls to the GPUs present in the system or to
                        CPU.
                        That decision is based on a simple heuristic that estimates if the BLAS call will execute for long enough to amortize the
                        PCI transfers of the input and output data to the GPU.  
                        
                        <strong class="ph b">Because NVBLAS does not support all standard BLAS routines, it might be necessary to associate it with an existing full BLAS
                           Library. 
                           Please refer to the <a class="xref" href="index.html#Usage" shape="rect">Usage</a> section for more details.</strong></p>
                  </div>
               </div>
               <div class="topic concept nested0" xml:lang="en-us" id="routines"><a name="routines" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#routines" name="routines" shape="rect">3.&nbsp;GPU accelerated routines</a></h2>
                  <div class="body conbody">
                     <p class="p">
                        NVBLAS offloads only the compute-intensive BLAS3 routines which have the best potential
                        for acceleration on GPUs.
                        
                     </p>
                     <div class="p">The current supported routines are in the table below :
                        
                        
                        
                        <div class="tablenoborder">
                           <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="13.143098116390217%" id="d54e97" rowspan="1" colspan="1">
                                       Routine
                                       
                                    </th>
                                    <th class="entry" valign="top" width="12.468372223784089%" id="d54e100" rowspan="1" colspan="1">
                                       Types
                                       
                                    </th>
                                    <th class="entry" valign="top" width="74.38852965982569%" id="d54e103" rowspan="1" colspan="1">
                                       Operation
                                       
                                    </th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="13.143098116390217%" headers="d54e97" rowspan="1" colspan="1">
                                       <p class="p">gemm</p>
                                    </td>
                                    <td class="entry" valign="top" width="12.468372223784089%" headers="d54e100" rowspan="1" colspan="1">
                                       <p class="p">S,D,C,Z</p>
                                    </td>
                                    <td class="entry" valign="top" width="74.38852965982569%" headers="d54e103" rowspan="1" colspan="1">
                                       <p class="p">multiplication of 2 matrices.</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="13.143098116390217%" headers="d54e97" rowspan="1" colspan="1">
                                       <p class="p">syrk</p>
                                    </td>
                                    <td class="entry" valign="top" width="12.468372223784089%" headers="d54e100" rowspan="1" colspan="1">
                                       <p class="p">S,D,C,Z</p>
                                    </td>
                                    <td class="entry" valign="top" width="74.38852965982569%" headers="d54e103" rowspan="1" colspan="1">
                                       <p class="p">symmetric rank-k update</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="13.143098116390217%" headers="d54e97" rowspan="1" colspan="1">
                                       <p class="p">herk</p>
                                    </td>
                                    <td class="entry" valign="top" width="12.468372223784089%" headers="d54e100" rowspan="1" colspan="1">
                                       <p class="p">C,Z</p>
                                    </td>
                                    <td class="entry" valign="top" width="74.38852965982569%" headers="d54e103" rowspan="1" colspan="1">
                                       <p class="p">hermitian rank-k update</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="13.143098116390217%" headers="d54e97" rowspan="1" colspan="1">
                                       <p class="p">syr2k</p>
                                    </td>
                                    <td class="entry" valign="top" width="12.468372223784089%" headers="d54e100" rowspan="1" colspan="1">
                                       <p class="p">S,D,C,Z</p>
                                    </td>
                                    <td class="entry" valign="top" width="74.38852965982569%" headers="d54e103" rowspan="1" colspan="1">
                                       <p class="p">symmetric rank-2k update</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="13.143098116390217%" headers="d54e97" rowspan="1" colspan="1">
                                       <p class="p">her2k</p>
                                    </td>
                                    <td class="entry" valign="top" width="12.468372223784089%" headers="d54e100" rowspan="1" colspan="1">
                                       <p class="p">C,Z</p>
                                    </td>
                                    <td class="entry" valign="top" width="74.38852965982569%" headers="d54e103" rowspan="1" colspan="1">
                                       <p class="p">hermitian rank-2k update</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="13.143098116390217%" headers="d54e97" rowspan="1" colspan="1">
                                       <p class="p">trsm</p>
                                    </td>
                                    <td class="entry" valign="top" width="12.468372223784089%" headers="d54e100" rowspan="1" colspan="1">
                                       <p class="p">S,D,C,Z</p>
                                    </td>
                                    <td class="entry" valign="top" width="74.38852965982569%" headers="d54e103" rowspan="1" colspan="1">
                                       <p class="p">triangular solve with multiple right-hand sides</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="13.143098116390217%" headers="d54e97" rowspan="1" colspan="1">
                                       <p class="p">trmm</p>
                                    </td>
                                    <td class="entry" valign="top" width="12.468372223784089%" headers="d54e100" rowspan="1" colspan="1">
                                       <p class="p">S,D,C,Z</p>
                                    </td>
                                    <td class="entry" valign="top" width="74.38852965982569%" headers="d54e103" rowspan="1" colspan="1">
                                       <p class="p">triangular matrix-matrix multiplication</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="13.143098116390217%" headers="d54e97" rowspan="1" colspan="1">
                                       <p class="p">symm</p>
                                    </td>
                                    <td class="entry" valign="top" width="12.468372223784089%" headers="d54e100" rowspan="1" colspan="1">
                                       <p class="p">S,D,C,Z</p>
                                    </td>
                                    <td class="entry" valign="top" width="74.38852965982569%" headers="d54e103" rowspan="1" colspan="1">
                                       <p class="p">symmetric matrix-matrix multiplication</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="13.143098116390217%" headers="d54e97" rowspan="1" colspan="1">
                                       <p class="p">hemm</p>
                                    </td>
                                    <td class="entry" valign="top" width="12.468372223784089%" headers="d54e100" rowspan="1" colspan="1">
                                       <p class="p">C,Z</p>
                                    </td>
                                    <td class="entry" valign="top" width="74.38852965982569%" headers="d54e103" rowspan="1" colspan="1">
                                       <p class="p">hermitian matrix-matrix multiplication</p>
                                    </td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" xml:lang="en-us" id="symbols-interception"><a name="symbols-interception" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#symbols-interception" name="symbols-interception" shape="rect">4.&nbsp;BLAS symbols interception</a></h2>
                  <div class="body conbody">
                     <p class="p"> Standard BLAS Library implementations usually expose multiple symbols for the same routines. Let say <samp class="ph codeph">func</samp> is a BLAS routine name,
                        <samp class="ph codeph">func_</samp>  or/and <samp class="ph codeph">func</samp> are usually defined as extern symbols. Some BLAS Libraries might also expose some symbols with a proprietary appended prefix.
                        NVBLAS intercepts only the symbols <samp class="ph codeph">func_</samp> and <samp class="ph codeph">func</samp>. The user needs to make sure that the application intended to be GPU-accelerated by NVBLAS
                        actually calls those defined symbols. Any other symbols will not be intercepted and the original BLAS routine will be executed
                        for those cases. 
                        
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" xml:lang="en-us" id="configuration-file"><a name="configuration-file" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#configuration-file" name="configuration-file" shape="rect">5.&nbsp;Configuration</a></h2>
                  <div class="body conbody">
                     <p class="p">Because NVBLAS is a drop-in replacement of BLAS, it must be configured through an ASCII text file that describes how 
                        many and which GPUs can participate in the intercepted BLAS calls. 
                        The configuration file is parsed at the time of the loading of the library. 
                        The format of the configuration file is based on keywords optionally followed by one or more user-defined parameters. 
                        At most one keyword per line is allowed. Blank lines or lines started by the character <samp class="ph codeph">#</samp> are ignored.
                        
                     </p>
                  </div>
                  <div class="topic concept nested1" xml:lang="en-us" id="NVBLAS_CONFIG_FILE"><a name="NVBLAS_CONFIG_FILE" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#NVBLAS_CONFIG_FILE" name="NVBLAS_CONFIG_FILE" shape="rect">5.1.&nbsp;NVBLAS_CONFIG_FILE environment variable</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The location and name of the configuration file must be defined by the environment variable <samp class="ph codeph">NVBLAS_CONFIG_FILE</samp>. 
                           By default, if <samp class="ph codeph">NVBLAS_CONFIG_FILE</samp> is not defined, NVBLAS will try to open the file <samp class="ph codeph">nvblas.conf</samp> in the current directory.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" xml:lang="en-us" id="configuration_keywords"><a name="configuration_keywords" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#configuration_keywords" name="configuration_keywords" shape="rect">5.2.&nbsp;Configuration keywords</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The configuration keywords syntax is described in the following sub-sections.
                           
                        </p>
                     </div>
                     <div class="topic concept nested2" xml:lang="en-us" id="nvblas_logfile"><a name="nvblas_logfile" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvblas_logfile" name="nvblas_logfile" shape="rect">5.2.1.&nbsp;NVBLAS_LOGFILE</a></h3>
                        <div class="body conbody">
                           <p class="p">This keyword defines the file where NVBLAS should print status and error messages. By default, if not defined, the standard
                              error output file (e.g stderr) will be used.
                              It is advised to define this keyword early in the configuration to capture errors in parsing that file itself.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" xml:lang="en-us" id="nvblas_cpu_blas_lib"><a name="nvblas_cpu_blas_lib" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvblas_cpu_blas_lib" name="nvblas_cpu_blas_lib" shape="rect">5.2.2.&nbsp;NVBLAS_CPU_BLAS_LIB</a></h3>
                        <div class="body conbody">
                           <p class="p">This keyword defines the CPU BLAS dynamic library file (e.g .so file on Linux or .dll on Windows) that NVBLAS should open
                              to find the CPU BLAS symbols definitions.
                              This keyword must be defined for NVBLAS to work. Because CPU Blas libraries are often composed of multiple files, even though
                              this keyword is set to the 
                              full path to the main file of the CPU library, it might still be necessary to define the right path to find the rest of the
                              library files in the environment of your system. 
                              On Linux, this can be done by setting the environment variable LD_LIBRARY_PATH whereas on Windows, this can be done by setting
                              the environment variable PATH.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" xml:lang="en-us" id="nvblas_gpu_list"><a name="nvblas_gpu_list" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvblas_gpu_list" name="nvblas_gpu_list" shape="rect">5.2.3.&nbsp;NVBLAS_GPU_LIST</a></h3>
                        <div class="body conbody">
                           <div class="p">This keyword defines the list of GPUs that should participate in the computation of the intercepted BLAS calls.
                              If not defined, only GPU device 0 is used, since that is normally the most compute-capable GPU installed in the system. 
                              This keyword can be set to a list of device numbers separated by blank characters.
                              Also the following wildcard keywords are also accepted for simplicity :
                              
                              
                              <div class="tablenoborder">
                                 <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                                    <thead class="thead" align="left">
                                       <tr class="row">
                                          <th class="entry" valign="top" width="15.015256142604786%" id="d54e427" rowspan="1" colspan="1">
                                             Keyword
                                             
                                          </th>
                                          <th class="entry" valign="top" width="84.98474385739522%" id="d54e430" rowspan="1" colspan="1">
                                             Meaning
                                             
                                          </th>
                                       </tr>
                                    </thead>
                                    <tbody class="tbody">
                                       <tr class="row">
                                          <td class="entry" valign="top" width="15.015256142604786%" headers="d54e427" rowspan="1" colspan="1">
                                             <p class="p">ALL</p>
                                          </td>
                                          <td class="entry" valign="top" width="84.98474385739522%" headers="d54e430" rowspan="1" colspan="1">
                                             <p class="p">All compute-capable GPUs detected on the system will be used by NVBLAS</p>
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" width="15.015256142604786%" headers="d54e427" rowspan="1" colspan="1">
                                             <p class="p">ALL0</p>
                                          </td>
                                          <td class="entry" valign="top" width="84.98474385739522%" headers="d54e430" rowspan="1" colspan="1">
                                             <p class="p">GPU device 0, AND all others GPUs detected that have the same compute-capabilities as device 0 will be used by NVBLAS</p>
                                          </td>
                                       </tr>
                                    </tbody>
                                 </table>
                              </div>
                           </div>
                           <p class="p"><strong class="ph b">Note :  In the current release of CUBLAS, the CUBLASXT API supports two GPUs if they are on the same board such as Tesla K10
                                 or GeForce GTX690 and one GPU otherwise.
                                 Because NVBLAS is built on top of the CUBLASXT API, NVBLAS has the same restriction. 
                                 If access to more GPUs devices is needed, details of the licensing are described at <a class="xref" href="https://developer.nvidia.com/cublasxt" target="_blank" shape="rect">cublasXt</a>. 
                                 </strong></p>
                        </div>
                     </div>
                     <div class="topic concept nested2" xml:lang="en-us" id="nvblas_tile_dim"><a name="nvblas_tile_dim" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvblas_tile_dim" name="nvblas_tile_dim" shape="rect">5.2.4.&nbsp;NVBLAS_TILE_DIM</a></h3>
                        <div class="body conbody">
                           <p class="p">This keyword defines the tile dimension that should be used to divide the matrices involved in the computation. 
                              This definition maps directly to a call of the cublasXt API routine <samp class="ph codeph">cublasXtSetBlockDim</samp>.
                              Refer to cuBLAS documentation to understand the tradeoffs associated with setting this to a larger or a smaller value.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" xml:lang="en-us" id="nvblas_gpu_disabled"><a name="nvblas_gpu_disabled" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvblas_gpu_disabled" name="nvblas_gpu_disabled" shape="rect">5.2.5.&nbsp;NVBLAS_GPU_DISABLED_&lt;BLAS_FUNC_NAME&gt;</a></h3>
                        <div class="body conbody">
                           <p class="p">This keyword, appended with the name of a BLAS routine disables NVBLAS from running a specified routine on the GPU. This feature
                              is intended mainly for debugging purposes.
                              by default, all supported BLAS routines are enabled.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" xml:lang="en-us" id="nvblas_cpu_ratio"><a name="nvblas_cpu_ratio" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvblas_cpu_ratio" name="nvblas_cpu_ratio" shape="rect">5.2.6.&nbsp;NVBLAS_CPU_RATIO_&lt;BLAS_FUNC_NAME&gt;</a></h3>
                        <div class="body conbody">
                           <p class="p">This keyword, appended with the name of ta BLAS routine defines the ratio of the 
                              workload that should remain on the CPU in the event that the NVBLAS decides to offload work for that routine on the GPU. 
                              This functionality is directly mapped to the cublasXt API routine <samp class="ph codeph">cublasXtSetCpuRatio</samp>.
                              By default, the ratio is defined to zero for all routines.
                              Please refer to the cuBLAS Documentation for details and for the list of routines which support this feature.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" xml:lang="en-us" id="nvblas_autopin_mem_enabled"><a name="nvblas_autopin_mem_enabled" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvblas_autopin_mem_enabled" name="nvblas_autopin_mem_enabled" shape="rect">5.2.7.&nbsp;NVBLAS_AUTOPIN_MEM_ENABLED</a></h3>
                        <div class="body conbody">
                           <p class="p">This keyword enables the Pinning Memory mode. 
                              This functionality is directly mapped to the cublasXt API routine <samp class="ph codeph">cublasXtSetPinningMemMode</samp>. 
                              If this keyowrd is not present in the configuration file, the Pinning Memory mode will be set to CUBLASXT_PINNING_DISABLED.
                              Please refer to the cuBLAS Documentation for details.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" xml:lang="en-us" id="configuration_example"><a name="configuration_example" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#configuration_example" name="configuration_example" shape="rect">5.2.8.&nbsp;Config file Example</a></h3>
                        <div class="body conbody">
                           <p class="p">The example below shows a typical NVBLAS configuration file :</p><pre xml:space="preserve">#Copyright 2013 NVIDIA Corporation.  All rights reserved.
# This is the configuration file to use NVBLAS Library
# Setup the environment variable NVBLAS_CONFIG_FILE to specify your own config file.
# By default, if NVBLAS_CONFIG_FILE is not defined, 
# NVBLAS Library will try to open the file "nvblas.conf" in its current directory
# Example : NVBLAS_CONFIG_FILE  /home/cuda_user/my_nvblas.conf

# Specify which output log file (default is stderr)
NVBLAS_LOGFILE  nvblas.log

#Put here the CPU BLAS fallback Library of your choice
NVBLAS_CPU_BLAS_LIB  libopenblas.so
#NVBLAS_CPU_BLAS_LIB  libmkl_rt.so

# List of GPU devices Id to participate to the computation 
# Use ALL if you want all your GPUs to contribute
# Use ALL0, if you want all your GPUs of the same type as device 0 to contribute
# However, NVBLAS consider that all GPU have the same performance and PCI bandwidth
# By default if no GPU are listed, only device 0 will be used

#NVBLAS_GPU_LIST 0 2 4
#NVBLAS_GPU_LIST ALL
NVBLAS_GPU_LIST ALL0

# Tile Dimension
NVBLAS_TILE_DIM 2048

# Autopin Memory
NVBLAS_AUTOPIN_MEM_ENABLED

#List of BLAS routines that are prevented from running on GPU (use for debugging purpose
# The current list of BLAS routines supported by NVBLAS are
# GEMM, SYRK, HERK, TRSM, TRMM, SYMM, HEMM, SYR2K, HER2K

#NVBLAS_GPU_DISABLED_SGEMM 
#NVBLAS_GPU_DISABLED_DGEMM 
#NVBLAS_GPU_DISABLED_CGEMM 
#NVBLAS_GPU_DISABLED_ZGEMM 

# Computation can be optionally hybridized between CPU and GPU
# By default, GPU-supported BLAS routines are ran fully on GPU
# The option NVBLAS_CPU_RATIO_&lt;BLAS_ROUTINE&gt; give the ratio [0,1] 
# of the amount of computation that should be done on CPU
# CAUTION : this option should be used wisely because it can actually
# significantly reduced the overall performance if too much work is given to CPU

#NVBLAS_CPU_RATIO_CGEMM 0.07
</pre></div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" xml:lang="en-us" id="Installation"><a name="Installation" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#Installation" name="Installation" shape="rect">Installation</a></h2>
                  <div class="body conbody">
                     <p class="p">The NVBLAS Library is part of the CUDA Toolkit, and will be installed along all the other CUDA libraries. 
                        It is available on 64-bit operating systems.
                        NVBLAS Library is built on top of cuBLAS, so the  cuBLAS library need to be accessible by NVBLAS.
                        
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" xml:lang="en-us" id="Usage"><a name="Usage" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#Usage" name="Usage" shape="rect">Usage</a></h2>
                  <div class="body conbody">
                     <p class="p">To use the NVBLAS Library, the user application must be relinked against NVBLAS in addition to the original CPU Blas 
                        (technically only NVBLAS is needed unless some BLAS routines not supported by NVBLAS are used by the application). 
                        To be sure that the linker links against the exposed symbols of NVBLAS and not the ones from the CPU Blas, 
                        the NVBLAS Library needs to be put before the CPU Blas on the linkage command line.
                        
                     </p>
                     <p class="p">
                        On Linux, an alternative way to use NVBLAS Library is to use the LD_PRELOAD environment variable; this technique has the advantage
                        of avoiding the relinkage step. 
                        However, the user should avoid defining that environment variable globally because it will cause the NVBLAS library to be
                        loaded by every shell command executed on the system, 
                        thus leading to a lack of responsiveness of the system.
                        
                     </p>
                     <p class="p"> Finally mathematical tools and libraries often offer the opportunity to specify the BLAS Library to be used through an environment
                        variable or a configuration file. 
                        Because NVBLAS does not support all the standard BLAS routines, it might be necessary to pair NVBLAS with a full BLAS library,
                        even though your application only calls supported NVBLAS routines.
                        Fortunately, those tools and libraries usually offer a way to specify multiple BLAS Libraries. Please refer to the documentation
                        of the appropriate tools and libraries for details.
                        
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" id="notices-header"><a name="notices-header" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#notices-header" name="notices-header" shape="rect">Notices</a></h2>
                  <div class="topic reference nested1" id="notice"><a name="notice" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#notice" name="notice" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Notice</h3>
                           <p class="p">ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND
                              SEPARATELY, "MATERIALS") ARE BEING PROVIDED "AS IS." NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE
                              WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS
                              FOR A PARTICULAR PURPOSE. 
                           </p>
                           <p class="p">Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the
                              consequences of use of such information or for any infringement of patents or other rights of third parties that may result
                              from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications
                              mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information
                              previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems
                              without express written approval of NVIDIA Corporation.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="trademarks"><a name="trademarks" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#trademarks" name="trademarks" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Trademarks</h3>
                           <p class="p">NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation
                              in the U.S. and other countries.  Other company and product names may be trademarks of
                              the respective companies with which they are associated.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="copyright"><a name="copyright" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#copyright" name="copyright" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Copyright</h3>
                           <p class="p">© 2007-<span class="ph">2014</span> NVIDIA Corporation. All rights reserved.
                           </p>
                           <p class="p">This product includes software developed by the Syncro Soft SRL (http://www.sync.ro/).</p>
                        </div>
                     </div>
                  </div>
               </div>
               
               <hr id="contents-end"></hr>
               
            </article>
         </div>
      </div>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/formatting/common.min.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-write.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-tracker.js"></script>
      <script type="text/javascript">var switchTo5x=true;</script><script type="text/javascript" src="http://w.sharethis.com/button/buttons.js"></script><script type="text/javascript">stLight.options({publisher: "998dc202-a267-4d8e-bce9-14debadb8d92", doNotHash: false, doNotCopy: false, hashAddressBar: false});</script></body>
</html>