Sophie

Sophie

distrib > Mageia > 5 > x86_64 > media > nonfree-updates > by-pkgid > fd8445e7e4d58b8cfe6e0150bd441ee1 > files > 1139

nvidia-cuda-toolkit-devel-6.5.14-6.1.mga5.nonfree.x86_64.rpm

<!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-us" xml:lang="en-us">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
      <meta http-equiv="X-UA-Compatible" content="IE=edge"></meta>
      <meta name="copyright" content="(C) Copyright 2005"></meta>
      <meta name="DC.rights.owner" content="(C) Copyright 2005"></meta>
      <meta name="DC.Type" content="concept"></meta>
      <meta name="DC.Title" content="CUDA-MEMCHECK"></meta>
      <meta name="abstract" content="The user manual for CUDA-MEMCHECK."></meta>
      <meta name="description" content="The user manual for CUDA-MEMCHECK."></meta>
      <meta name="DC.Coverage" content="Tools"></meta>
      <meta name="DC.subject" content="CUDA MEMCHECK, CUDA MEMCHECK features, CUDA MEMCHECK tools, CUDA MEMCHECK supported OS, CUDA MEMCHECK supported devices, CUDA MEMCHECK error, CUDA MEMCHECK racecheck, CUDA MEMCHECK backtrace, CUDA MEMCHECK hardware exception, CUDA MEMCHECK memory access"></meta>
      <meta name="keywords" content="CUDA MEMCHECK, CUDA MEMCHECK features, CUDA MEMCHECK tools, CUDA MEMCHECK supported OS, CUDA MEMCHECK supported devices, CUDA MEMCHECK error, CUDA MEMCHECK racecheck, CUDA MEMCHECK backtrace, CUDA MEMCHECK hardware exception, CUDA MEMCHECK memory access"></meta>
      <meta name="DC.Format" content="XHTML"></meta>
      <meta name="DC.Identifier" content="abstract"></meta>
      <link rel="stylesheet" type="text/css" href="../common/formatting/commonltr.css"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/site.css"></link>
      <title>CUDA-MEMCHECK :: CUDA Toolkit Documentation</title>
      <!--[if lt IE 9]>
      <script src="../common/formatting/html5shiv-printshiv.min.js"></script>
      <![endif]-->
      <script type="text/javascript" charset="utf-8" src="../common/scripts/tynt/tynt.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.ba-hashchange.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.scrollintoview.min.js"></script>
      <script type="text/javascript" src="../search/htmlFileList.js"></script>
      <script type="text/javascript" src="../search/htmlFileInfoList.js"></script>
      <script type="text/javascript" src="../search/nwSearchFnt.min.js"></script>
      <script type="text/javascript" src="../search/stemmers/en_stemmer.min.js"></script>
      <script type="text/javascript" src="../search/index-1.js"></script>
      <script type="text/javascript" src="../search/index-2.js"></script>
      <script type="text/javascript" src="../search/index-3.js"></script>
      <link rel="canonical" href="http://docs.nvidia.com/cuda/cuda-memcheck/index.html"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/qwcode.highlight.css"></link>
   </head>
   <body>
      
      <header id="header"><span id="company">NVIDIA</span><span id="site-title">CUDA Toolkit Documentation</span><form id="search" method="get" action="search">
            <input type="text" name="search-text"></input><fieldset id="search-location">
               <legend>Search In:</legend>
               <label><input type="radio" name="search-type" value="site"></input>Entire Site</label>
               <label><input type="radio" name="search-type" value="document"></input>Just This Document</label></fieldset>
            <button type="reset">clear search</button>
            <button id="submit" type="submit">search</button></form>
      </header>
      <div id="site-content">
         <nav id="site-nav">
            <div class="category closed"><a href="../index.html" title="The root of the site.">CUDA Toolkit
                  v6.5</a></div>
            <div class="category"><a href="index.html" title="CUDA-MEMCHECK">CUDA-MEMCHECK</a></div>
            <ul>
               <li>
                  <div class="section-link"><a href="#introduction">1.&nbsp;Introduction</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#about-cuda-memcheck">1.1.&nbsp;About CUDA-MEMCHECK</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#why-cuda-memcheck">1.2.&nbsp;Why CUDA-MEMCHECK?</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#how-to-get-cuda-memcheck">1.3.&nbsp;How to Get CUDA-MEMCHECK</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#cuda-memcheck-tools">1.4.&nbsp;CUDA-MEMCHECK tools</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#using-cuda-memcheck">2.&nbsp;Using CUDA-MEMCHECK</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#command-line-options">2.1.&nbsp;Command Line Options</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#supported-operating-systems">2.2.&nbsp;Supported Operating Systems</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#supported-devices">2.3.&nbsp;Supported Devices</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#compilation-options">2.4.&nbsp;Compilation Options</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#memcheck-tool">3.&nbsp;Memcheck Tool</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#what-is-memcheck">3.1.&nbsp;What is Memcheck ?</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#supported-error-detection">3.2.&nbsp;Supported Error Detection</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#using-memcheck">3.3.&nbsp;Using Memcheck</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#understanding-memcheck-errors">3.4.&nbsp;Understanding Memcheck Errors</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#integrated-mode">3.5.&nbsp;Integrated Mode</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#api-error-checking">3.6.&nbsp;CUDA API Error Checking</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#device-side-allocation-checking">3.7.&nbsp;Device Side Allocation Checking</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#leak-checking">3.8.&nbsp;Leak Checking</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#racecheck-tool">4.&nbsp;Racecheck Tool</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#what-is-racecheck">4.1.&nbsp;What is Racecheck ?</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#what-are-hazards">4.2.&nbsp;What are Hazards?</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#using-racecheck">4.3.&nbsp;Using Racecheck</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#racecheck-report-modes">4.4.&nbsp;Racecheck report modes</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#understanding-racecheck-analysis-reports">4.5.&nbsp;Understanding Racecheck Analysis Reports</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#understanding-racecheck-hazard-reports">4.6.&nbsp;Understanding Racecheck Hazard Reports</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#cuda-memcheck-features">5.&nbsp;CUDA-MEMCHECK Features</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#nonblocking-mode">5.1.&nbsp;Nonblocking Mode</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#stack-backtraces">5.2.&nbsp;Stack Backtraces</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#name-demangling">5.3.&nbsp;Name Demangling</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#dynamic-parallelism">5.4.&nbsp;Dynamic Parallelism</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#error-actions">5.5.&nbsp;Error Actions</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#escape-sequences">5.6.&nbsp;Escape Sequences</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#specifying-filters">5.7.&nbsp;Specifying Filters</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#os-specific-behavior">6.&nbsp;Operating System Specific Behavior</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#os-specific-windows">6.1.&nbsp;Windows Specific Behavior</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#os-specific-android">6.2.&nbsp;Android Specific Behavior</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#cuda-fortran-support">7.&nbsp;CUDA Fortran Support</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#unique_1124504307">7.1.&nbsp;CUDA Fortran Specific Behavior</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#cuda-memcheck-tool-examples">8.&nbsp;CUDA-MEMCHECK Tool Examples</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#example-use-of-memcheck">8.1.&nbsp;Example Use of Memcheck</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#memcheck-demo-output">8.1.1.&nbsp;memcheck_demo Output </a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#memcheck-demo-output-with-memcheck-release-build">8.1.2.&nbsp;memcheck_demo Output with Memcheck (Release Build)</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#memcheck-demo-output-with-memcheck-debug-build">8.1.3.&nbsp;memcheck_demo Output with Memcheck (Debug Build)</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#leak-checking-in-cuda-memcheck">8.1.4.&nbsp;Leak Checking in CUDA-MEMCHECK</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#integrated-cuda-memcheck-example">8.2.&nbsp;Integrated CUDA-MEMCHECK Example</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#memory-access-error-reporting">A.&nbsp;Memory Access Error Reporting</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#hardware-exception-reporting">B.&nbsp;Hardware Exception Reporting</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#release-notes">C.&nbsp;Release Notes</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#new-features-6.5">C.1.&nbsp;New Features in 6.5</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#new-features-6.0">C.2.&nbsp;New Features in 6.0</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#new-features-5.5">C.3.&nbsp;New Features in 5.5</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#new-features-5.0">C.4.&nbsp;New Features in 5.0</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#known-issues">D.&nbsp;Known Issues</a></div>
               </li>
            </ul>
         </nav>
         <div id="resize-nav"></div>
         <nav id="search-results">
            <h2>Search Results</h2>
            <ol></ol>
         </nav>
         
         <div id="contents-container">
            <div id="breadcrumbs-container">
               <div id="release-info">CUDA-MEMCHECK
                  (<a href="../../pdf/CUDA_Memcheck.pdf">PDF</a>)
                  -
                  
                  v6.5
                  (<a href="https://developer.nvidia.com/cuda-toolkit-archive">older</a>)
                  -
                  Last updated August 1, 2014
                  -
                  <a href="mailto:cudatools@nvidia.com?subject=CUDA Toolkit Documentation Feedback: CUDA-MEMCHECK">Send Feedback</a>
                  -
                  <span class="st_facebook"></span><span class="st_twitter"></span><span class="st_linkedin"></span><span class="st_reddit"></span><span class="st_slashdot"></span><span class="st_tumblr"></span><span class="st_sharethis"></span></div>
            </div>
            <article id="contents">
               <div class="topic nested0" id="abstract"><a name="abstract" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#abstract" name="abstract" shape="rect">CUDA-MEMCHECK</a></h2>
                  <div class="body conbody"></div>
               </div>
               <div class="topic concept nested0" id="introduction"><a name="introduction" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#introduction" name="introduction" shape="rect">1.&nbsp;Introduction</a></h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="about-cuda-memcheck"><a name="about-cuda-memcheck" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#about-cuda-memcheck" name="about-cuda-memcheck" shape="rect">1.1.&nbsp;About CUDA-MEMCHECK</a></h3>
                     <div class="body conbody">
                        <p class="p">CUDA-MEMCHECK is a functional correctness checking suite included in the
                           CUDA toolkit. This suite contains multiple tools that can perform different
                           types of checks. The <dfn class="term">memcheck</dfn> tool is capable of precisely detecting and
                           attributing out of bounds and misaligned memory access errors in CUDA applications.
                           The tool also reports hardware exceptions encountered by the GPU.
                           The <dfn class="term">racecheck</dfn> tool can report shared memory data access hazards
                           that can cause data races.This document describes the usage of these tools.
                           
                        </p>
                        <p class="p">CUDA-MEMCHECK can be run in <dfn class="term">standalone mode</dfn> where the user's
                           application is started under CUDA-MEMCHECK. The <dfn class="term">memcheck</dfn> tool can also
                           be enabled in <dfn class="term">integrated mode</dfn> inside CUDA-GDB.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="why-cuda-memcheck"><a name="why-cuda-memcheck" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#why-cuda-memcheck" name="why-cuda-memcheck" shape="rect">1.2.&nbsp;Why CUDA-MEMCHECK?</a></h3>
                     <div class="body conbody">
                        <p class="p">NVIDIA allows developers to easily harness the power of GPUs to solve problems in
                           parallel using CUDA. CUDA applications often run thousands of threads in parallel.
                           Every programmer invariably encounters memory access errors and thread ordering
                           errors that are hard to detect and time consuming to debug. The number of
                           such errors increases substantially when dealing with thousands of threads.
                           The CUDA-MEMCHECK suite is designed to detect such errors in your CUDA application.
                           Using the memcheck tool, CUDA-MEMCHECK can identify memory access errors as well
                           as hardware reported program errors. The racecheck tool in CUDA-MEMCHECK
                           can identify hazards caused by race conditions in the CUDA program.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="how-to-get-cuda-memcheck"><a name="how-to-get-cuda-memcheck" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#how-to-get-cuda-memcheck" name="how-to-get-cuda-memcheck" shape="rect">1.3.&nbsp;How to Get CUDA-MEMCHECK</a></h3>
                     <div class="body conbody">
                        <p class="p">CUDA-MEMCHECK is installed as part of the CUDA toolkit.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="cuda-memcheck-tools"><a name="cuda-memcheck-tools" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#cuda-memcheck-tools" name="cuda-memcheck-tools" shape="rect">1.4.&nbsp;CUDA-MEMCHECK tools</a></h3>
                     <div class="body conbody">
                        <div class="p">
                           Tools allow use the basic CUDA-MEMCHECK infrastructure to provide different
                           checking mechanisms. Currently, the supported tools are :
                           
                           <ul class="ul">
                              <li class="li"><dfn class="term">Memcheck</dfn> - The memory access error and leak detection tool.
                                 See <a class="xref" href="index.html#memcheck-tool" shape="rect">Memcheck Tool</a></li>
                              <li class="li"><dfn class="term">Racecheck</dfn> - The shared memory data access hazard detection tool.
                                 See <a class="xref" href="index.html#racecheck-tool" shape="rect">Racecheck Tool</a></li>
                           </ul>
                        </div>
                        <div class="tablenoborder"><a name="cuda-memcheck-tools__supported-modes-by-tool" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="cuda-memcheck-tools__supported-modes-by-tool" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 1. Supported Modes by CUDA-MEMCHECK tool</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e177" rowspan="1" colspan="1">Tool Name</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e180" rowspan="1" colspan="1">Standalone Mode</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e183" rowspan="1" colspan="1">Integrated Mode</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e177" rowspan="1" colspan="1">Memcheck</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e180" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e183" rowspan="1" colspan="1">Yes</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e177" rowspan="1" colspan="1">Racecheck</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e180" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e183" rowspan="1" colspan="1">No</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="using-cuda-memcheck"><a name="using-cuda-memcheck" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#using-cuda-memcheck" name="using-cuda-memcheck" shape="rect">2.&nbsp;Using CUDA-MEMCHECK</a></h2>
                  <div class="body conbody">
                     <div class="p">CUDA-MEMCHECK tools can be invoked by running the <samp class="ph codeph">cuda-memcheck</samp>
                        executable as follows:
                        <pre class="pre screen" xml:space="preserve">
<strong class="ph b">cuda-memcheck [options] app_name [app_options] </strong>
</pre></div>
                     <p class="p">
                        For a full list of options that can be specified to memcheck and their default values, see <a class="xref" href="index.html#command-line-options" shape="rect">Command Line Options</a>.
                        
                     </p>
                  </div>
                  <div class="topic concept nested1" id="command-line-options"><a name="command-line-options" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options" name="command-line-options" shape="rect">2.1.&nbsp;Command Line Options</a></h3>
                     <div class="body conbody">
                        <p class="p"> Command line options can be specified to <samp class="ph codeph">cuda-memcheck</samp>.
                           With some exceptions, the options to memcheck are usually of
                           the form <samp class="ph codeph">--option value</samp>. The option list can be terminated
                           by specifying <samp class="ph codeph">--</samp>. All subsequent words on the command line are
                           treated as the application being run and its arguments.
                           
                        </p>
                        <p class="p"> The table below describes the supported
                           options in detail. The first column is the option name as passed to CUDA-MEMCHECK.
                           Some options have a one character short form, which is given in parentheses.
                           These options can be invoked using a single hypen. For example, the help option
                           can be invoked as <samp class="ph codeph">-h</samp>. The options that have a short form do not
                           take a value.
                           
                        </p>
                        <p class="p"> The second column contains the permissible values for the option. In case the
                           value is user defined, this is shown below in braces {}. An option
                           that can accept any numerical value is represented as <dfn class="term"> {number} </dfn>.
                           Blank entries indicate that the value is not present.
                           
                        </p>
                        <p class="p"> The third column contains the default value of the option. Some options have
                           different default values depending on the architecture they are being run on.
                           
                        </p>
                        <div class="tablenoborder"><a name="command-line-options__cuda-memcheck-command-line-options" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options__cuda-memcheck-command-line-options" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 2. CUDA-MEMCHECK Command line options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="20%" id="d54e299" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="20%" id="d54e302" rowspan="1" colspan="1">Values</th>
                                    <th class="entry" valign="top" width="20%" id="d54e305" rowspan="1" colspan="1">Default</th>
                                    <th class="entry" valign="top" width="40%" id="d54e308" rowspan="1" colspan="1">Description</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">binary-patching</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">yes, no</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">yes</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">Controls whether CUDA-MEMCHECK should modify the application binary at runtime. This option is enabled by default. Setting
                                       this to "no" will reduce the precision of errors reported by the tool. Normal users will not need to modify this flag.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">demangle</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">full, simple, no</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">full</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">Enables demangling of device function names. For more information, see <a class="xref" href="index.html#name-demangling" shape="rect">Name Demangling</a>.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">destroy-on-device-error</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">context,kernel</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">context</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">This controls how the application proceeds on hitting a memory access error. For more information, see <a class="xref" href="index.html#error-actions" shape="rect">Error Actions</a>.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">error-exitcode</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">{number}</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">0</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">The exit code CUDA-MEMCHECK will return if the original application succeeded but memcheck detected errors were present. This
                                       is meant to allow CUDA-MEMCHECK to be integrated into automated test suites
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">filter</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">{key1=val1}[{,key2=val2}]</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">Controls which application kernels will be checked by the running CUDA-MEMCHECK tool. For more information, see <a class="xref" href="index.html#specifying-filters" shape="rect">Specifying Filters</a>.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">flush-to-disk</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">yes,no</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">no</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">Forces every disk write to be flushed to disk. When enabled, this will make CUDA-MEMCHECK tools much slower. </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">force-blocking-launches</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">yes,no</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">no on SM 2.0+, yes on SM 1.x</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">This forces all host kernel launches to be sequential. When enabled, the number and precision of memcheck reported errors
                                       will decrease. This option only has effect on SM 2.0 and higher GPUs 
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">help (h)</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">Displays the help message</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">language</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">c,fortran</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">c</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">This controls application source language specific behavior in CUDA-MEMCHECK tools. For fortan specific behavior, see <a class="xref" href="index.html#unique_1124504307" shape="rect">CUDA Fortran Specific Behavior</a>.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">log-file</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">{filename}</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">This is the file CUDA-MEMCHECK will write all of its text output to. By default, CUDA-MEMCHECK will print all output to stdout.
                                       For more information, see <a class="xref" href="index.html#escape-sequences" shape="rect">Escape Sequences</a>.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">prefix</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">{string}</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">========</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">The string prepended to CUDA-MEMCHECK output lines</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">print-level</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">info,warn,error,fatal</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">info</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">The minimum level print level of messages from CUDA-MEMCHECK.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">read</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">{filename}</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">The input CUDA-MEMCHECK file to read data from. This can be used in conjunction with the --save option to allow processing
                                       records after a run.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">save</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">{filename}</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">Filename where CUDA-MEMCHECK will save the output from the current run. For more information, see <a class="xref" href="index.html#escape-sequences" shape="rect">Escape Sequences</a>.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">show-backtrace</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">yes,host,device,no</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">yes</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1"> Displays a backtrace for most types of errors. No disables all backtraces, Yes enables all backtraces. Host enables only
                                       host side backtraces. Device enables only device side backtraces. For more information, see <a class="xref" href="index.html#stack-backtraces" shape="rect">Stack Backtraces</a>.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">tool</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">memcheck, racecheck</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">memcheck</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">Controls which CUDA-MEMCHECK tool is actively running</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e299" rowspan="1" colspan="1">version (V)</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e302" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e305" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e308" rowspan="1" colspan="1">Prints the version of cuda-memcheck</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <div class="tablenoborder"><a name="command-line-options__memcheck-tool-command-line-options" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options__memcheck-tool-command-line-options" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 3. <dfn class="term">Memcheck</dfn> Tool Command line options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="20%" id="d54e619" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="20%" id="d54e622" rowspan="1" colspan="1">Values</th>
                                    <th class="entry" valign="top" width="20%" id="d54e625" rowspan="1" colspan="1">Default</th>
                                    <th class="entry" valign="top" width="40%" id="d54e628" rowspan="1" colspan="1">Description</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e619" rowspan="1" colspan="1">check-api-memory-access</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e622" rowspan="1" colspan="1">yes,no</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e625" rowspan="1" colspan="1">yes</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e628" rowspan="1" colspan="1">Enable checking of cudaMemcpy/cudaMemset</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e619" rowspan="1" colspan="1">check-device-heap</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e622" rowspan="1" colspan="1">yes,no</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e625" rowspan="1" colspan="1">yes on SM 2.0+, no on SM 1.x</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e628" rowspan="1" colspan="1">Enable checking of device heap allocations. This applies to both error checking and leak checking. This option only has effect
                                       on SM 2.0 and higher GPUs
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e619" rowspan="1" colspan="1">leak-check</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e622" rowspan="1" colspan="1">full,no</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e625" rowspan="1" colspan="1">no</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e628" rowspan="1" colspan="1">Prints information about all allocations that have not been freed via cudaFree at the point when the context was destroyed.
                                       For more information, see <a class="xref" href="index.html#leak-checking" shape="rect">Leak Checking</a>.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e619" rowspan="1" colspan="1">report-api-errors</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e622" rowspan="1" colspan="1">all, explicit, no</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e625" rowspan="1" colspan="1">explicit</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e628" rowspan="1" colspan="1">Report errors if any CUDA API call fails. For more information, see <a class="xref" href="index.html#api-error-checking" shape="rect">CUDA API Error Checking</a>.
                                    </td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <div class="tablenoborder"><a name="command-line-options__racecheck-tool-command-line-options" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options__racecheck-tool-command-line-options" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 4. <dfn class="term">Racecheck</dfn> Tool Command line options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="20%" id="d54e726" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="20%" id="d54e729" rowspan="1" colspan="1">Values</th>
                                    <th class="entry" valign="top" width="20%" id="d54e732" rowspan="1" colspan="1">Default</th>
                                    <th class="entry" valign="top" width="40%" id="d54e735" rowspan="1" colspan="1">Description</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e726" rowspan="1" colspan="1">racecheck-report</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e729" rowspan="1" colspan="1">hazard,analysis,all</td>
                                    <td class="entry" valign="top" width="20%" headers="d54e732" rowspan="1" colspan="1">analysis</td>
                                    <td class="entry" valign="top" width="40%" headers="d54e735" rowspan="1" colspan="1">Controls how racecheck reports information. For more information, see <a class="xref" href="index.html#racecheck-report-modes" shape="rect">Racecheck report modes</a>.
                                    </td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="supported-operating-systems"><a name="supported-operating-systems" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#supported-operating-systems" name="supported-operating-systems" shape="rect">2.2.&nbsp;Supported Operating Systems</a></h3>
                     <div class="body conbody">
                        <p class="p">The standalone CUDA-MEMCHECK binary is supported on all CUDA supported platforms
                           i.e. Windows, Mac OS X, supported Linux distributions and Android.
                           CUDA-MEMCHECK can interoperate with CUDA-GDB on Android, Linux and Mac OS X.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="supported-devices"><a name="supported-devices" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#supported-devices" name="supported-devices" shape="rect">2.3.&nbsp;Supported Devices</a></h3>
                     <div class="body conbody">
                        <p class="p">The CUDA-MEMCHECK tool suite is supported on all CUDA capable GPUs with SM versions
                           1.1 and above. Individual tools may support a different subset of GPUs.
                           The table below contains the list of current tools and their supported GPUs
                        </p>
                        <div class="tablenoborder"><a name="supported-devices__supported-devices-by-tool" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="supported-devices__supported-devices-by-tool" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 5. Supported Devices by CUDA-MEMCHECK tool</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="28.57142857142857%" id="d54e812" rowspan="1" colspan="1">Tool Name</th>
                                    <th class="entry" valign="top" width="14.285714285714285%" id="d54e815" rowspan="1" colspan="1">SM 1.x</th>
                                    <th class="entry" valign="top" width="14.285714285714285%" id="d54e818" rowspan="1" colspan="1">SM 2.x</th>
                                    <th class="entry" valign="top" width="14.285714285714285%" id="d54e821" rowspan="1" colspan="1">SM 3.0</th>
                                    <th class="entry" valign="top" width="14.285714285714285%" id="d54e824" rowspan="1" colspan="1">SM 3.5</th>
                                    <th class="entry" valign="top" width="14.285714285714285%" id="d54e828" rowspan="1" colspan="1">SM 5.0</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="28.57142857142857%" headers="d54e812" rowspan="1" colspan="1">Memcheck</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e815" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e818" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e821" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e824" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e828" rowspan="1" colspan="1">Yes</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="28.57142857142857%" headers="d54e812" rowspan="1" colspan="1">Racecheck</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e815" rowspan="1" colspan="1">No</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e818" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e821" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e824" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="14.285714285714285%" headers="d54e828" rowspan="1" colspan="1">Yes</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="compilation-options"><a name="compilation-options" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#compilation-options" name="compilation-options" shape="rect">2.4.&nbsp;Compilation Options</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The CUDA-MEMCHECK tools <dfn class="term">memcheck</dfn> and <dfn class="term">racecheck</dfn> do not
                           need any special compilation flags to function.
                           
                        </p>
                        <p class="p">
                           The output displayed by the CUDA-MEMCHECK tools is more useful with some extra
                           compiler flags. The <samp class="ph codeph">-G</samp> option to nvcc forces the compiler to
                           generate debug information for the CUDA application. To generate line number
                           information for applications without affecting the optimization level of
                           the output, the <samp class="ph codeph">-lineinfo</samp> option to nvcc can be used.
                           The CUDA-MEMCHECK tools fully support both of these options
                           and can display source attribution of errors for applications
                           compiled with line information.
                           
                        </p>
                        <p class="p"> The stack backtrace feature of the CUDA-MEMCHECK tools is more useful
                           when the application contains function symbol names. For the host backtrace,
                           this varies based on the host OS. On Linux, the host compiler
                           must be given the <samp class="ph codeph">-rdynamic</samp> option to retain function
                           symbols. On Windows, the application must be compiled for debugging,
                           i.e. the <samp class="ph codeph">/Zi</samp> option. When using nvcc, flags to the host
                           compiler can be specified using the <samp class="ph codeph">-Xcompiler</samp> option.
                           For the device backtrace, the full
                           frame information is only available when the application is compiled
                           with device debug information. The compiler can skip generation of
                           frame information when building with optimizations.
                           
                        </p>
                        <div class="p">
                           Sample command line to build with function symbols and device side line
                           information on linux:
                           <pre class="pre screen" xml:space="preserve">
nvcc -Xcompiler -rdynamic -lineinfo  -o out in.cu
</pre></div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="memcheck-tool"><a name="memcheck-tool" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#memcheck-tool" name="memcheck-tool" shape="rect">3.&nbsp;Memcheck Tool</a></h2>
                  <div class="body conbody">
                     <p class="p"></p>
                  </div>
                  <div class="topic concept nested1" id="what-is-memcheck"><a name="what-is-memcheck" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#what-is-memcheck" name="what-is-memcheck" shape="rect">3.1.&nbsp;What is Memcheck ?</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The <dfn class="term">memcheck</dfn> tool is a run time error detection tool for
                           CUDA applications. The tool can
                           precisely detect and report out of bounds and misaligned memory accesses to
                           global, local, shared and global atomic instructions in CUDA
                           applications. It can also detect and report hardware reported error
                           information. In addition, the memcheck tool can detect and report memory
                           leaks in the user application.
                           
                        </p>
                        <p class="p"></p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="supported-error-detection"><a name="supported-error-detection" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#supported-error-detection" name="supported-error-detection" shape="rect">3.2.&nbsp;Supported Error Detection</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The errors that can be reported by the memcheck tool are summarized in the table
                           below. The location column indicates whether the report originates from the
                           host or from the device. The precision of an error is explained in the paragraph
                           below.
                           
                        </p>
                        <div class="tablenoborder"><a name="supported-error-detection__memcheck-error-types" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="supported-error-detection__memcheck-error-types" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 6. Memcheck reported error types</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e992" rowspan="1" colspan="1">Name</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e995" rowspan="1" colspan="1">Description</th>
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e998" rowspan="1" colspan="1">Location</th>
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e1001" rowspan="1" colspan="1">Precision</th>
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e1004" rowspan="1" colspan="1">See also</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e992" rowspan="1" colspan="1"><dfn class="term">Memory access error</dfn></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e995" rowspan="1" colspan="1">
                                       Errors due to
                                       out of bounds or misaligned accesses to memory by a global,
                                       local, shared or global atomic access.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e998" rowspan="1" colspan="1">Device</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1001" rowspan="1" colspan="1">Precise</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1004" rowspan="1" colspan="1"><a class="xref" href="index.html#memory-access-error-reporting" shape="rect">Memory Access Error Reporting</a></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e992" rowspan="1" colspan="1"><dfn class="term">Hardware exception</dfn></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e995" rowspan="1" colspan="1">
                                       Errors that are reported
                                       by the hardware error reporting mechanism.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e998" rowspan="1" colspan="1">Device</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1001" rowspan="1" colspan="1">Imprecise</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1004" rowspan="1" colspan="1"><a class="xref" href="index.html#hardware-exception-reporting" shape="rect">Hardware Exception Reporting</a></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e992" rowspan="1" colspan="1"><dfn class="term">Malloc/Free errors</dfn></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e995" rowspan="1" colspan="1">
                                       Errors that occur due to incorrect
                                       use of <samp class="ph codeph">malloc()/free()</samp>
                                       in CUDA kernels.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e998" rowspan="1" colspan="1">Device</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1001" rowspan="1" colspan="1">Precise</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1004" rowspan="1" colspan="1"><a class="xref" href="index.html#device-side-allocation-checking" shape="rect">Device Side Allocation Checking</a></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e992" rowspan="1" colspan="1"><dfn class="term">CUDA API errors</dfn></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e995" rowspan="1" colspan="1">
                                       Reported when a CUDA API call in the application
                                       returns a failure.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e998" rowspan="1" colspan="1">Host</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1001" rowspan="1" colspan="1">Precise</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1004" rowspan="1" colspan="1"><a class="xref" href="index.html#api-error-checking" shape="rect">CUDA API Error Checking</a></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e992" rowspan="1" colspan="1"><dfn class="term">cudaMalloc memory leaks</dfn></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e995" rowspan="1" colspan="1">
                                       Allocations of device memory using <samp class="ph codeph">cudaMalloc()</samp>
                                       that have not been freed by the application.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e998" rowspan="1" colspan="1">Host</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1001" rowspan="1" colspan="1">Precise</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1004" rowspan="1" colspan="1"><a class="xref" href="index.html#leak-checking" shape="rect">Leak Checking</a></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e992" rowspan="1" colspan="1"><dfn class="term">Device Heap Memory Leaks</dfn></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e995" rowspan="1" colspan="1">
                                       Allocations of device memory using <samp class="ph codeph">malloc()</samp>
                                       in device code that have not been freed by the application.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e998" rowspan="1" colspan="1">Device</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1001" rowspan="1" colspan="1">Imprecise</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e1004" rowspan="1" colspan="1"><a class="xref" href="index.html#device-side-allocation-checking" shape="rect">Device Side Allocation Checking</a></td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <p class="p">
                           The memcheck tool reports two classes of errors
                           <dfn class="term">precise</dfn> and <dfn class="term">imprecise</dfn>.
                           
                        </p>
                        <p class="p"><dfn class="term">Precise</dfn> errors in memcheck are those that the tool can uniquely
                           identify and gather all information for.
                           For these errors, memcheck can report the block and thread coordinates
                           of the thread causing the failure, the PC of the instruction performing the
                           access, as well as the address being accessed and its size and type. If the CUDA
                           application contains line number information (by either being compiled with device
                           side debugging information, or with line information), then the tool will also
                           print the source file and line number of the erroneous access.
                           
                        </p>
                        <p class="p"><dfn class="term">Imprecise</dfn> errors are errors reported by the hardware
                           error reporting mechanism that could not be precisely attributed to a particular
                           thread. The precision of the error varies based on the type of the error
                           and in many cases, memcheck may not be able to attribute the cause
                           of the error back to the source file and line. Imprecise error reporting
                           is only supported on SM 2.0 and higher GPUs.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="using-memcheck"><a name="using-memcheck" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#using-memcheck" name="using-memcheck" shape="rect">3.3.&nbsp;Using Memcheck</a></h3>
                     <div class="body conbody">
                        <div class="p">
                           The memcheck tool is enabled by default when running the
                           CUDA-MEMCHECK application. It can also be explicitly enabled by using
                           the <samp class="ph codeph">--tool memcheck</samp> option.
                           <pre class="pre screen" xml:space="preserve">
<strong class="ph b">cuda-memcheck [memcheck_options] app_name [app_options]</strong>
</pre></div>
                        <p class="p">
                           When run in this way, the memcheck tool will look for precise, imprecise,
                           malloc/free and CUDA API errors. The reporting of device leaks must be explictly
                           enabled.
                           Errors identified by the memcheck tool are displayed on the screen after
                           the application has completed execution. See
                           <a class="xref" href="index.html#understanding-memcheck-errors" shape="rect">Understanding Memcheck Errors</a> for more information about
                           how to interpret the messages printed by the tool.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="understanding-memcheck-errors"><a name="understanding-memcheck-errors" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#understanding-memcheck-errors" name="understanding-memcheck-errors" shape="rect">3.4.&nbsp;Understanding Memcheck Errors</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The memcheck tool can produce a variety of different errors. This is a short
                           guide showing some samples of errors and explaining how the information
                           in each error report can be interpreted.
                           
                        </p>
                        <ol class="ol">
                           <li class="li">
                              <div class="p"><dfn class="term">Memory access error</dfn>: Memory access errors are generated for errors
                                 that the memcheck tool can correctly attribute and identify the erroneous
                                 instruction. Below is an example of a precise memory access error
                                 <pre class="pre screen" xml:space="preserve">
========= Invalid __global__ write of size 4
=========     at 0x00000060 in memcheck_demo.cu:6:unaligned_kernel(void)
=========     by thread (0,0,0) in block (0,0,0)
=========     Address 0x400100001 is misaligned
</pre></div>
                              <div class="p">
                                 Let us examine this error line by line :
                                 <pre class="pre screen" xml:space="preserve">Invalid __global__ write of size 4</pre>
                                 
                                 The first line shows the memory segment, type and size being accessed.
                                 The memory segment is one of :
                                 <ul class="ul">
                                    <li class="li">__global__ : for device global memory</li>
                                    <li class="li">__shared__ : for per block shared memory</li>
                                    <li class="li">__local__  : for per thread local memory</li>
                                 </ul>
                                 
                                 In this case, the access was to device global memory.
                                 The next field contains information about the type of access,
                                 whether it was a read or a write. In this case, the access is a write.
                                 Finally, the last item is the size of the access in bytes. In this
                                 example, the access was 4 bytes in size.
                                 
                              </div>
                              <div class="p"><pre class="pre screen" xml:space="preserve">at 0x00000060 in memcheck_demo.cu:6:unaligned_kernel(void)</pre>
                                 
                                 The second line contains the PC of the instruction, the source file
                                 and line number (if available) and the CUDA kernel name.
                                 In this example, the instruction causing the access was at
                                 PC 0x60 inside the <samp class="ph codeph">unaligned_kernel</samp> CUDA kernel.
                                 Additionally, since the application was compiled with line number
                                 information, this instruction corresponds to line 6 in the memcheck_demo.cu
                                 source file.
                                 
                              </div>
                              <div class="p"><pre class="pre screen" xml:space="preserve">by thread (0,0,0) in block (0,0,0)</pre>
                                 
                                 The third line contains the thread indices and block indices of the
                                 thread on which the error was hit.
                                 In this example, the thread doing the erroneous access belonged to
                                 the first thread in the first block.
                                 </div>
                              <div class="p"><pre class="pre screen" xml:space="preserve">Address 0x400100001 is misaligned</pre>
                                 
                                 The fourth line contains the memory address being accessed and the type
                                 of of access error. The type of access error can either be out of bounds
                                 access or misaligned access.
                                 In this example, the access was to address 0x400100001 and the access
                                 error was because this address was not aligned correctly.
                                 </div>
                              <p class="p"></p>
                           </li>
                           <li class="li">
                              <p class="p"><dfn class="term">Hardware exception</dfn>: Imprecise errors are generated for errors that
                                 the hardware reports to the memcheck tool. Hardware exceptions have a variety
                                 of formats and messages. Typically, the first line will provide some information
                                 about the type of error encountered.
                                 
                              </p>
                              <p class="p"></p>
                           </li>
                           <li class="li">
                              <div class="p"><dfn class="term">Malloc/free error</dfn>: Malloc/free errors refer to the errors in the
                                 invocation of device side <samp class="ph codeph">malloc()/free()</samp> in CUDA kernels.
                                 An example of a malloc/free error :
                                 <pre class="pre screen" xml:space="preserve">
========= Malloc/Free error encountered : Double free
=========     at 0x000079d8
=========     by thread (0,0,0) in block (0,0,0)
=========     Address 0x400aff920
</pre></div>
                              <div class="p">
                                 We can examine this line by line.
                                 <pre class="pre screen" xml:space="preserve">Malloc/Free error encountered : Double free</pre>
                                 
                                 The first line indicates that this is a malloc/free error, and contains
                                 the type of error. This type can be :
                                 <ul class="ul">
                                    <li class="li">Double free : This indicates that the thread called
                                       <samp class="ph codeph">free()</samp> on an allocation that
                                       has already been freed.
                                       
                                    </li>
                                    <li class="li">Invalid pointer to free : This indicates that <samp class="ph codeph">free</samp>
                                       was called
                                       on a pointer that was not returned
                                       by <samp class="ph codeph">malloc()</samp></li>
                                    <li class="li">Heap corruption : This indicates generalized heap corruption,
                                       or cases where the state of the heap
                                       was modified in a way that memcheck
                                       did not expect
                                       
                                    </li>
                                 </ul>
                                 
                                 
                                 In this example, the error is due to calling <samp class="ph codeph">free()</samp>
                                 on a pointer which had already been freed.
                                 
                              </div>
                              <div class="p"><pre class="pre screen" xml:space="preserve">at 0x000079d8</pre>
                                 
                                 The second line gives the PC on GPU where the error was reported.
                                 This PC is usually inside of system code, and is not interesting
                                 to the user. The device frame backtrace will contain the location
                                 in user code where the <samp class="ph codeph">malloc()/free()</samp> call was
                                 made.
                                 
                              </div>
                              <div class="p"><pre class="pre screen" xml:space="preserve">by thread (0,0,0) in block (0,0,0)</pre>
                                 
                                 The third line contains the thread and block indices of the thread
                                 that caused this error. In this example, the thread has threadIdx = (0,0,0)
                                 and blockIdx = (0,0,0)
                                 </div>
                              <div class="p"><pre class="pre screen" xml:space="preserve">Address 0x400aff920</pre>
                                 
                                 This line contains the value of the pointer passed to <samp class="ph codeph">free()</samp>
                                 or returned by <samp class="ph codeph">malloc()</samp></div>
                              <p class="p"></p>
                           </li>
                           <li class="li">
                              <div class="p"><dfn class="term">Leak errors</dfn>: Errors are reported for allocations
                                 created using cudaMalloc and for allocations on the device heap that were
                                 not freed when the CUDA context was destroyed.
                                 An example of a cudaMalloc allocation leak report follows :
                                 <pre class="pre screen" xml:space="preserve">
========= Leaked 64 bytes at 0x400200200
</pre>
                                 
                                 The error message reports information about the size of the allocation
                                 that was leaked as well as the address of the allocation on the device.
                                 </div>
                              <div class="p">
                                 A device heap leak message will be explicitly identified as such:
                                 <pre class="pre screen" xml:space="preserve">
========= Leaked 16 bytes at 0x4012ffff6 on the device heap
</pre></div>
                              <p class="p"></p>
                           </li>
                           <li class="li">
                              <div class="p"><dfn class="term">CUDA API error</dfn>: CUDA API errors are reported for CUDA
                                 API calls that return an error value. An example of a CUDA API error:
                                 <pre class="pre screen" xml:space="preserve">
========= Program hit error 11 on CUDA API call to cudaMemset
</pre>
                                 
                                 The message contains the returned value of the CUDA API call, as well as
                                 the name of the API function that was called.
                                 </div>
                           </li>
                        </ol>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="integrated-mode"><a name="integrated-mode" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#integrated-mode" name="integrated-mode" shape="rect">3.5.&nbsp;Integrated Mode</a></h3>
                     <div class="body conbody">
                        <div class="p">You can execute the memcheck tool from within CUDA-GDB by using the
                           following option before running the application:
                           <pre class="pre screen" xml:space="preserve">(cuda-gdb) <strong class="ph b">set cuda memcheck on</strong></pre></div>
                        <p class="p">
                           In integrated mode, the memcheck tool improves the precision of error
                           reporting by CUDA-GDB. The memory access checks are enabled, allowing
                           identification of the thread that may be causing a warp or
                           device level exception.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="api-error-checking"><a name="api-error-checking" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#api-error-checking" name="api-error-checking" shape="rect">3.6.&nbsp;CUDA API Error Checking</a></h3>
                     <div class="body conbody">
                        <p class="p"> The memcheck tool supports reporting an error if a CUDA API call made by the user
                           program returned an error. The tool supports this detection for both
                           CUDA run time and CUDA driver API calls. In all cases, if the API function
                           call has a nonzero return value, CUDA-MEMCHECK will print an error message
                           containing the name of the API call that failed and the return value of the
                           API call.
                           
                        </p>
                        <p class="p">
                           CUDA API error reports do not terminate the application, they merely provide
                           extra information. It is up to the application to check the
                           return status of CUDA API calls and handle error conditions appropriately.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="device-side-allocation-checking"><a name="device-side-allocation-checking" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#device-side-allocation-checking" name="device-side-allocation-checking" shape="rect">3.7.&nbsp;Device Side Allocation Checking</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           On SM 2.0 and higher GPUs, the <dfn class="term">memcheck</dfn> tool checks accesses to
                           allocations in the device heap.
                           
                        </p>
                        <p class="p">
                           These allocations are created by calling <samp class="ph codeph">malloc()</samp> inside a kernel.
                           This feature is implicitly enabled and can be disabled by specifying the
                           <samp class="ph codeph">--check-device-heap no</samp> option. This
                           feature is only activated for kernels in the application that call
                           <samp class="ph codeph">malloc()</samp>.
                           
                        </p>
                        <p class="p">
                           The current implementation does not require space on the device heap, and so
                           the heap allocation behavior of the program with and without memcheck
                           should remain similar. The <dfn class="term">memcheck</dfn> tool does require space in
                           device global memory to track these heap allocations and will print an internal
                           error message if it is not able to allocate this space in device global memory.
                           
                        </p>
                        <div class="p">
                           In addition to access checks, the <dfn class="term">memcheck</dfn> tool can now perform
                           libc style checks on the <samp class="ph codeph">malloc()/free()</samp> calls. The tool
                           will report an error if the application calls a <samp class="ph codeph">free()</samp> twice
                           on a kernel, or if it calls <samp class="ph codeph">free()</samp> on an invalid pointer.
                           
                           <div class="note note"><span class="notetitle">Note:</span> Make sure to look at the device side backtrace to find the location
                              in the application where the <samp class="ph codeph">malloc()/free()</samp> call was
                              made
                           </div>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="leak-checking"><a name="leak-checking" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#leak-checking" name="leak-checking" shape="rect">3.8.&nbsp;Leak Checking</a></h3>
                     <div class="body conbody">
                        <p class="p">The <dfn class="term">memcheck</dfn> tool can detect leaks of allocated memory.
                        </p>
                        <p class="p">Memory leaks are device side allocations that have not been freed by the time
                           the context is destroyed. The <dfn class="term">memcheck</dfn> tool tracks  device memory
                           allocations created using the CUDA driver or runtime APIs. Starting in CUDA 5,
                           allocations that are created dynamically on the device heap by calling
                           <samp class="ph codeph">malloc()</samp> inside a kernel are also tracked.
                        </p>
                        <p class="p">For an accurate leak checking summary to be generated, the application's
                           CUDA context must be destroyed at the end. This can be done explicitly by
                           calling <samp class="ph codeph">cuCtxDestroy()</samp> in applications using the CUDA driver
                           API, or by calling
                           <samp class="ph codeph">cudaDeviceReset()</samp> in applications programmed against the CUDA
                           run time API.
                        </p>
                        <p class="p">The <samp class="ph codeph">--leak-check full</samp> option must be specified to enable
                           leak checking.
                        </p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="racecheck-tool"><a name="racecheck-tool" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#racecheck-tool" name="racecheck-tool" shape="rect">4.&nbsp;Racecheck Tool</a></h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="what-is-racecheck"><a name="what-is-racecheck" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#what-is-racecheck" name="what-is-racecheck" shape="rect">4.1.&nbsp;What is Racecheck ?</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The <dfn class="term">racecheck</dfn> tool is a run time shared memory data access hazard
                           detector. The primary use of this tool is to help identify memory access
                           race conditions in CUDA applications that use shared memory.
                           
                        </p>
                        <p class="p">
                           In CUDA applications, storage declared with the <samp class="ph codeph">__shared__</samp>
                           qualifier is placed in on chip <dfn class="term">shared memory</dfn>. All threads in a
                           thread block can access this per block shared memory. Shared memory goes out of
                           scope when the thread block completes execution. As shared memory is on chip,
                           it is frequently used for inter thread communication and as a temporary
                           buffer to hold data being processed. As this data is being accessed by
                           multiple threads in parallel, incorrect program assumptions may result
                           in data races.
                           Racecheck is a tool built to identify these hazards and help users write
                           programs free of shared memory races.
                           
                        </p>
                        <p class="p">
                           Currently, this tool only supports detecting accesses to on-chip shared memory.
                           For supported architectures, see <a class="xref" href="index.html#supported-devices" shape="rect">Supported Devices</a>.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="what-are-hazards"><a name="what-are-hazards" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#what-are-hazards" name="what-are-hazards" shape="rect">4.2.&nbsp;What are Hazards?</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           A <dfn class="term">data access hazard</dfn> is a case where two threads
                           attempt to access the same location in memory resulting in nondeterministic
                           behavior, based on the relative order of the two accesses.
                           These hazards cause <dfn class="term">data races</dfn> where the behavior or the output
                           of the application depends on the order in which all parallel threads are
                           executed by the hardware. Race conditions manifest as intermittent application
                           failures or as failures when attempting to run a working application on a
                           different GPU.
                           
                        </p>
                        <div class="p">
                           The racecheck tool identifies three types of canonical hazards in a program.
                           These are :
                           
                           <ul class="ul">
                              <li class="li"> Write-After-Write (<dfn class="term">WAW</dfn>) hazards
                                 
                                 <p class="p"> This hazard occurs when two threads attempt to
                                    write data to the same memory location. The resulting value
                                    in that location depends on the relative order of the two
                                    accesses.
                                    
                                 </p>
                              </li>
                              <li class="li"> Write-After-Read (<dfn class="term">WAR</dfn>) hazards
                                 
                                 <p class="p"> This hazard occurs when two threads access the same memory location,
                                    with one thread performing a read and another a write. In
                                    this case, the writing thread is ordered before the reading
                                    thread and the value returned to the reading thread is
                                    not the original value at the memory location.
                                    
                                 </p>
                              </li>
                              <li class="li"> Read-After-Write (<dfn class="term">RAW</dfn>) hazards
                                 
                                 <p class="p"> This hazard occurs when two threads access the same memory
                                    location, with one thread performing a read and the other a write.
                                    In this case, the reading thread reads the value before the
                                    writing thread commits it.
                                    
                                 </p>
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="using-racecheck"><a name="using-racecheck" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#using-racecheck" name="using-racecheck" shape="rect">4.3.&nbsp;Using Racecheck</a></h3>
                     <div class="body conbody">
                        <div class="p">
                           The racecheck tool is enabled by running the CUDA-MEMCHECK application
                           with the <samp class="ph codeph">--tool racecheck</samp> option.
                           <pre class="pre screen" xml:space="preserve">
<strong class="ph b">cuda-memcheck --tool racecheck [memcheck_options] app_name [app_options]</strong>
</pre></div>
                        <p class="p">
                           Once racecheck has identified a hazard, the user can make program modifications
                           to ensure this hazard is no longer present.
                           In the case of Write-After-Write hazards, the program should be modified
                           so that multiple writes are not happening to the same location.
                           In the case of Read-After-Write and Write-After-Read hazards, the reading
                           and writing locations should be deterministically ordered. In CUDA kernels,
                           this can be achieved by inserting a <samp class="ph codeph">__syncthreads()</samp> call
                           between the two accesses.
                           
                        </p>
                        <div class="note note"><span class="notetitle">Note:</span> The racecheck tool does not perform any memory access error checking.
                           It is recommended that users first run the memcheck tool to ensure the
                           application is free of errors
                           
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="racecheck-report-modes"><a name="racecheck-report-modes" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#racecheck-report-modes" name="racecheck-report-modes" shape="rect">4.4.&nbsp;Racecheck report modes</a></h3>
                     <div class="body conbody">
                        <div class="p">
                           The racecheck tool can produce two types of output :
                           
                           <ul class="ul">
                              <li class="li"><dfn class="term">Hazard</dfn> reports
                                 
                                 <p class="p"> These reports contain detailed information about one particular hazard.
                                    Each hazard report is byte accurate and represents information about
                                    conflicting accesses between two threads that affect this byte of
                                    shared memory.
                                    
                                 </p>
                              </li>
                              <li class="li"><dfn class="term">Analysis</dfn> reports
                                 
                                 <p class="p"> These reports contain a post analysis set of reports. These reports
                                    are produced by the racecheck tool by analysing multiple hazard
                                    reports and examining active device state.
                                    For example usage of analysis reports, see <a class="xref" href="index.html#understanding-racecheck-analysis-reports" shape="rect">Understanding Racecheck Analysis Reports</a>.
                                    
                                 </p>
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="understanding-racecheck-analysis-reports"><a name="understanding-racecheck-analysis-reports" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#understanding-racecheck-analysis-reports" name="understanding-racecheck-analysis-reports" shape="rect">4.5.&nbsp;Understanding Racecheck Analysis Reports</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           In <dfn class="term">analysis</dfn> reports, the racecheck tool produces a series of high level
                           messages that identify the source locations of a particular race, based on observed
                           hazards and other machine state
                           
                        </p>
                        <div class="p">
                           A sample racecheck analysis report is below:
                           
                           <pre class="pre screen" xml:space="preserve">
========= Race reported between Write access at 0x00000050 in raceGroupBasic.cu:53:WAW(void)
=========     and Write access at 0x00000050 in raceGroupBasic.cu:53:WAW(void)
</pre></div>
                        <p class="p">
                           The analysis record contains high level information about the hazard that is conveyed to
                           the end user. Each line contains information about a unique location in the application
                           which is participating in the race.
                           
                        </p>
                        <div class="p">
                           The first line contains the type of access. The access can be either a :
                           
                           <ul class="ul">
                              <li class="li">Read</li>
                              <li class="li">Write</li>
                           </ul>
                           
                           
                           The next item on the line is the PC of the location where the access happened from. In
                           this case, the PC is 0x50. If the application was compiled was compiled with line number
                           information, this line will also contain the file name and line number of the access.
                           Finally, the line contains the kernel name of the kernel containing the access.
                           
                        </div>
                        <p class="p">
                           A given analysis report will always contain at least one line which is performing a write
                           access. A common strategy to eliminate races which contain only write accesses is to
                           ensure that the write access is performed by only one thread. In the case of races
                           with multiple readers and one writer, introducing explicit program ordering
                           via a <samp class="ph codeph">__syncthreads()</samp> call can avoid the race condition.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="understanding-racecheck-hazard-reports"><a name="understanding-racecheck-hazard-reports" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#understanding-racecheck-hazard-reports" name="understanding-racecheck-hazard-reports" shape="rect">4.6.&nbsp;Understanding Racecheck Hazard Reports</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           In <dfn class="term">hazard</dfn> reporting mode, the racecheck tool produces a series of messages
                           detailing information about hazards in the application. The tool is byte accurate and
                           produces a message for each byte on which a hazard was detected. Additionally, when enabled,
                           the host backtrace for the launch of the kernel will also be displayed.
                           
                        </p>
                        <div class="p">
                           A sample racecheck hazard is below:
                           <pre class="pre screen" xml:space="preserve">
========= ERROR: Potential WAW hazard detected at __shared__ 0x0 in block (0, 0, 0) :
=========     Write Thread (0, 0, 0) at 0x00000088 in raceWAW.cu:18:WAW(void)
=========     Write Thread (1, 0, 0) at 0x00000088 in raceWAW.cu:18:WAW(void)
=========     Current Value : 0, Incoming Value : 2
</pre></div>
                        <p class="p">
                           The hazard records are dense and capture a lot of interesting information.
                           In general terms, the first line contains information about the hazard
                           its severity, type and address, as well as information about the thread
                           block where it occurred.
                           The next 2 lines contain detailed information about the two threads that were
                           in contention. These two lines are ordered chronologically, so the first entry
                           is for the access that occurred earlier and the second for the access that
                           occurred later. The final line is printed for some hazard types and captures
                           the actual data that was being written.
                           
                        </p>
                        <div class="p">
                           Examining this line by line, we have :
                           <pre class="pre screen" xml:space="preserve">ERROR: Potential WAW hazard detected at __shared__ 0x0 in block (0, 0, 0)</pre></div>
                        <div class="p">
                           The first word on this line indicates the severity of this hazard. This can be
                           
                           <ul class="ul">
                              <li class="li"><dfn class="term">INFO</dfn>  : The lowest level of severity. This is for hazards
                                 that have no impact on program execution and hence are not contributing to
                                 data access hazards. It is still a good idea to find and eliminate
                                 such hazards
                                 
                              </li>
                              <li class="li"><dfn class="term">WARNING</dfn> : Hazards at this level of severity are determined
                                 to be programming model hazards, however may be intentionally created
                                 by the programmer. An example of this are hazards due to warp level
                                 programming that make the assumption that threads are proceeding
                                 in groups. Such hazards are typically only encountered by advanced
                                 programmers. In cases where a beginner programmer encounters such
                                 errors, he should treat them as sources of hazards.
                                 
                              </li>
                              <li class="li">ERROR : The highest level of severity. Correspond to hazards that are
                                 very likely candidates for causing data access races. Programmers
                                 would be well advised to examine errors at this level of severity.
                                 
                              </li>
                           </ul>
                           
                           In this case, the message is at the ERROR level of severity.
                           
                        </div>
                        <div class="p">
                           The next piece of information here is the type of hazard. The racecheck tool
                           detects three types of hazards:
                           
                           <ul class="ul">
                              <li class="li">WAW or Write-After-Write hazards</li>
                              <li class="li">WAR or Write-After-Read hazards</li>
                              <li class="li">RAW or Read-After-Write hazards</li>
                           </ul>
                           
                           The type of hazard indicates the accesses types of the two threads that were in
                           contention. In this example, the hazard is of Write-After-Write type.
                           
                        </div>
                        <p class="p">
                           The next piece of information is the address in shared memory that was being
                           accessed. This is the offset in per block shared memory that was being accessed
                           by both threads. Since the racecheck tool is byte accurate, the message is only
                           for the byte of memory at given address. In this example, the byte being accessed
                           is byte 0x0 in shared memory.
                           
                        </p>
                        <p class="p">
                           Finally, the first line contains the block index of the thread block to which
                           the two racing threads belong.
                           
                        </p>
                        <div class="p">
                           The second line contains information about the first thread to write to this
                           location.
                           <pre class="pre screen" xml:space="preserve">Write Thread (0, 0, 0) at 0x00000088 in raceWAW.cu:18:WAW(void)</pre>
                           
                           
                           The first item on this line indicates the type of access being performed by
                           this thread to the shared memory address. In this example, the thread was
                           writing to the location.
                           The next component is the index of the thread the thread block. In this case,
                           the thread is at index (0,0,0). Following this, we have the byte offset of the
                           instruction which did the access in the kernel. In this example, the offset
                           is 0x88. This is followed by the source file and line number
                           (if line number information is available). The
                           final item on this line is the name of the kernel that was being executed.
                           </div>
                        <p class="p">
                           The third line contains similar information about the second thread
                           which was causing this hazard. This line has an identical format to
                           the previous line.
                           
                        </p>
                        <div class="p">
                           The fourth line contains information about the data in the two accesses.
                           <pre class="pre screen" xml:space="preserve">Current Value : 0, Incoming Value : 2</pre>
                           
                           If the second thread in the hazard was performing a write access, i.e.
                           the hazard is a Write-After-Write (WAW) or a Write-After-Read (WAR)
                           this line contains the value after the access by the first thread
                           as the <dfn class="term">Current Value</dfn> and the value that will be written
                           by the second access as the <dfn class="term">Incoming Value</dfn>.
                           In this case, the first thread wrote the value 0 to the shared memory location.
                           The second thread is attempting to write the value 2.
                           
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="cuda-memcheck-features"><a name="cuda-memcheck-features" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#cuda-memcheck-features" name="cuda-memcheck-features" shape="rect">5.&nbsp;CUDA-MEMCHECK Features</a></h2>
                  <div class="topic concept nested1" id="nonblocking-mode"><a name="nonblocking-mode" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nonblocking-mode" name="nonblocking-mode" shape="rect">5.1.&nbsp;Nonblocking Mode</a></h3>
                     <div class="body conbody">
                        <p class="p"> By default, on SM 2.0 and higher GPUs the standalone CUDA-MEMCHECK tool will launch
                           kernels in nonblocking mode. This allows the tool to support error reporting in
                           applications running concurrent kernels
                           
                        </p>
                        <p class="p">
                           To force kernels to execute serially, a user can use the
                           <samp class="ph codeph">--force-blocking-launches yes</samp> option. Blocking launch mode is
                           always enabled on Mac OS X 10.6 and on Windows XP. This flag has no effect on
                           GPUs less than SM 2.0. One side effect is that when in blocking mode, only the
                           first thread to hit an error in a kernel will be reported.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="stack-backtraces"><a name="stack-backtraces" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#stack-backtraces" name="stack-backtraces" shape="rect">5.2.&nbsp;Stack Backtraces</a></h3>
                     <div class="body conbody">
                        <p class="p">In standalone mode, CUDA-MEMCHECK can generate backtraces when given
                           <samp class="ph codeph">--show-backtrace</samp> option. Backtraces usually consist of
                           two sections - a saved host backtrace that leads upto the CUDA driver
                           call site, and a device backtrace at the time of the error. Each backtrace
                           contains a list of function calls showing the state of the stack at the
                           time the backtrace was created.
                           
                        </p>
                        <p class="p">To get function names in the host backtraces, the user application must be
                           built with support for symbol information in the host application. For more
                           information, see <a class="xref" href="index.html#compilation-options" shape="rect">Compilation Options</a></p>
                        <p class="p">
                           In CUDA 5, the host stack backtrace will show a maximum of 61 frames.
                           
                        </p>
                        <p class="p">
                           Backtraces are printed for most CUDA-MEMCHECK tool outputs, and the information
                           generated varies depending on the type of output. The table below explains the
                           kind of host and device backtrace seen under different conditions.
                           
                        </p>
                        <div class="tablenoborder"><a name="stack-backtraces__cuda-memcheck-stack-backtrace-information" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="stack-backtraces__cuda-memcheck-stack-backtrace-information" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 7. CUDA-MEMCHECK Stack Backtrace Information</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="25%" id="d54e1898" rowspan="1" colspan="1">Output Type</th>
                                    <th class="entry" valign="top" width="37.5%" id="d54e1901" rowspan="1" colspan="1">Host Backtrace</th>
                                    <th class="entry" valign="top" width="37.5%" id="d54e1904" rowspan="1" colspan="1">Device Backtrace</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" headers="d54e1898" rowspan="1" colspan="1">Memory access error</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1901" rowspan="1" colspan="1">Kernel launch on host</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1904" rowspan="1" colspan="1">Precise backtrace on device</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" headers="d54e1898" rowspan="1" colspan="1">Hardware exception</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1901" rowspan="1" colspan="1">Kernel launch on host</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1904" rowspan="1" colspan="1">Imprecise backtrace on device
                                       <a name="fnsrc_1" href="#fntarg_1" shape="rect"><sup>1</sup></a></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" headers="d54e1898" rowspan="1" colspan="1">Malloc/Free error</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1901" rowspan="1" colspan="1">Kernel launch on host</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1904" rowspan="1" colspan="1">Precise backtrace on device</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" headers="d54e1898" rowspan="1" colspan="1">cudaMalloc allocation leak</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1901" rowspan="1" colspan="1">Callsite of cudaMalloc</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1904" rowspan="1" colspan="1">N/A</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" headers="d54e1898" rowspan="1" colspan="1">CUDA API error</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1901" rowspan="1" colspan="1">Callsite of CUDA API call</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1904" rowspan="1" colspan="1">N/A</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" headers="d54e1898" rowspan="1" colspan="1">CUDA-MEMCHECK internal error</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1901" rowspan="1" colspan="1">Callsite leading to internal error</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1904" rowspan="1" colspan="1">N/A</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" headers="d54e1898" rowspan="1" colspan="1">Device heap allocation leak</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1901" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1904" rowspan="1" colspan="1">N/A</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" headers="d54e1898" rowspan="1" colspan="1">Shared memory hazard</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1901" rowspan="1" colspan="1">Kernel launch on host</td>
                                    <td class="entry" valign="top" width="37.5%" headers="d54e1904" rowspan="1" colspan="1">N/A</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="name-demangling"><a name="name-demangling" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#name-demangling" name="name-demangling" shape="rect">5.3.&nbsp;Name Demangling</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The CUDA-MEMCHECK suite now supports displaying mangled and demangled names for
                           CUDA kernels and CUDA device functions.
                           By default, tools display the fully demangled name, which contains the name
                           of the kernel as well as its prototype information. In the simple demangle
                           mode, the tools will only display the first part of the name. If demangling
                           is disabled, tools will display the complete mangled name of the kernel.
                           
                        </p>
                        <p class="p"></p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="dynamic-parallelism"><a name="dynamic-parallelism" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#dynamic-parallelism" name="dynamic-parallelism" shape="rect">5.4.&nbsp;Dynamic Parallelism</a></h3>
                     <div class="body conbody">
                        <p class="p">The CUDA-MEMCHECK tool suite supports dynamic parallelism. The
                           <dfn class="term">memcheck</dfn> tool supports precise error reporting of out of bounds
                           and misaligned accesses on global, local and shared memory accesses as well as
                           on global atomic instructions for applications using dynamic parallelism.
                           In addition, the imprecise hardware exception reporting mechanism is also fully
                           supported. Error detection on applications using dynamic parallelism requires
                           significantly more memory on the device and as a result, in memory constrained
                           environments, <dfn class="term">memcheck</dfn> may fail to initialize with an internal out
                           of memory error.
                           
                        </p>
                        <p class="p">
                           For limitations, see <a class="xref" href="index.html#known-issues" shape="rect">Known Issues</a>.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="error-actions"><a name="error-actions" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#error-actions" name="error-actions" shape="rect">5.5.&nbsp;Error Actions</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           On encountering an error, CUDA-MEMCHECK behavior depends on the type of error.
                           The default behavior of CUDA-MEMCHECK is to continue execution on purely host
                           side errors. Hardware exceptions detected by the memcheck tool cause the
                           CUDA context to be destroyed. Precise errors (such as memory access and
                           malloc/free errors) detected by the memcheck tool cause the kernel to be terminated.
                           This terminates the kernel without running any subsequent instructions and the
                           application continues launching other kernels in the CUDA context.
                           The handling of memory access and malloc/free errors detected by the memcheck tool
                           can be changed using the <samp class="ph codeph">--destroy-on-device-error</samp> option.
                           
                        </p>
                        <p class="p">
                           For racecheck detected hazards, the hazard is reported, but execution is
                           not affected.
                           
                        </p>
                        <div class="p">
                           For a full summary of error action, based on the type of the error see the
                           table below. The error action <dfn class="term">terminate kernel</dfn> refers to the
                           cases where the kernel is terminated early, and no subsequent instructions
                           are run. In such cases, the CUDA context is not destroyed and other kernels
                           continue execution and CUDA API calls can still be made.
                           
                           <div class="note note"><span class="notetitle">Note:</span> 
                              When kernel execution is terminated early, the application may not have
                              completed its computations on data. Any subsequent kernels that depend
                              on this data will have undefined behavior.
                              
                           </div>
                           
                           The action <dfn class="term">terminate CUDA context</dfn> refers to the cases where the
                           CUDA context is forcibly terminated. In such cases, all outstanding work for
                           the context is terminated and subsequent CUDA API calls will fail.
                           The action <dfn class="term">continue application</dfn> refers to cases where the
                           application execution is not impacted, and the kernel continues executing
                           instructions.
                           
                        </div>
                        <p class="p"></p>
                        <div class="tablenoborder"><a name="error-actions__cuda-memcheck-error-action" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="error-actions__cuda-memcheck-error-action" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 8. CUDA-MEMCHECK Error Actions</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="30.76923076923077%" id="d54e2108" rowspan="1" colspan="1">Error Type</th>
                                    <th class="entry" valign="top" width="15.384615384615385%" id="d54e2111" rowspan="1" colspan="1">Location</th>
                                    <th class="entry" valign="top" width="23.076923076923077%" id="d54e2114" rowspan="1" colspan="1">Action</th>
                                    <th class="entry" valign="top" width="30.76923076923077%" id="d54e2117" rowspan="1" colspan="1">Comments</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2108" rowspan="1" colspan="1">Memory access error</td>
                                    <td class="entry" valign="top" width="15.384615384615385%" headers="d54e2111" rowspan="1" colspan="1">Device</td>
                                    <td class="entry" valign="top" width="23.076923076923077%" headers="d54e2114" rowspan="1" colspan="1">Terminate kernel</td>
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2117" rowspan="1" colspan="1">User can choose to instead terminate the CUDA context</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2108" rowspan="1" colspan="1">Hardware exception</td>
                                    <td class="entry" valign="top" width="15.384615384615385%" headers="d54e2111" rowspan="1" colspan="1">Device</td>
                                    <td class="entry" valign="top" width="23.076923076923077%" headers="d54e2114" rowspan="1" colspan="1">Terminate CUDA context</td>
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2117" rowspan="1" colspan="1">Subsequent calls on the CUDA context will fail</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2108" rowspan="1" colspan="1">Malloc/Free error</td>
                                    <td class="entry" valign="top" width="15.384615384615385%" headers="d54e2111" rowspan="1" colspan="1">Device</td>
                                    <td class="entry" valign="top" width="23.076923076923077%" headers="d54e2114" rowspan="1" colspan="1">Terminate kernel</td>
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2117" rowspan="1" colspan="1">User can choose to instead terminate the CUDA context</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2108" rowspan="1" colspan="1">cudaMalloc allocation leak</td>
                                    <td class="entry" valign="top" width="15.384615384615385%" headers="d54e2111" rowspan="1" colspan="1">Host</td>
                                    <td class="entry" valign="top" width="23.076923076923077%" headers="d54e2114" rowspan="1" colspan="1">Continue application</td>
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2117" rowspan="1" colspan="1">Error reported. No other action taken.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2108" rowspan="1" colspan="1">CUDA API error</td>
                                    <td class="entry" valign="top" width="15.384615384615385%" headers="d54e2111" rowspan="1" colspan="1">Host</td>
                                    <td class="entry" valign="top" width="23.076923076923077%" headers="d54e2114" rowspan="1" colspan="1">Continue application</td>
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2117" rowspan="1" colspan="1">Error reported. No other action taken.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2108" rowspan="1" colspan="1">CUDA-MEMCHECK internal error</td>
                                    <td class="entry" valign="top" width="15.384615384615385%" headers="d54e2111" rowspan="1" colspan="1">Host</td>
                                    <td class="entry" valign="top" width="23.076923076923077%" headers="d54e2114" rowspan="1" colspan="1">Undefined</td>
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2117" rowspan="1" colspan="1">The application may behave in an undefined fashion</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2108" rowspan="1" colspan="1">Device heap allocation leak</td>
                                    <td class="entry" valign="top" width="15.384615384615385%" headers="d54e2111" rowspan="1" colspan="1">Device</td>
                                    <td class="entry" valign="top" width="23.076923076923077%" headers="d54e2114" rowspan="1" colspan="1">Continue application</td>
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2117" rowspan="1" colspan="1">Error reported. No other action taken.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2108" rowspan="1" colspan="1">Shared memory hazard</td>
                                    <td class="entry" valign="top" width="15.384615384615385%" headers="d54e2111" rowspan="1" colspan="1">Device</td>
                                    <td class="entry" valign="top" width="23.076923076923077%" headers="d54e2114" rowspan="1" colspan="1">Continue application</td>
                                    <td class="entry" valign="top" width="30.76923076923077%" headers="d54e2117" rowspan="1" colspan="1">Error reported. No other action taken.</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="escape-sequences"><a name="escape-sequences" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#escape-sequences" name="escape-sequences" shape="rect">5.6.&nbsp;Escape Sequences</a></h3>
                     <div class="body conbody">
                        <div class="p">
                           The <samp class="ph codeph">--save</samp> and <samp class="ph codeph">--log-file</samp> options to CUDA-MEMCHECK
                           accept the following escape sequences in the file name.
                           
                           <ul class="ul">
                              <li class="li">%% : Replaced with a literal %
                                 
                              </li>
                              <li class="li">%p : Replaced with the PID of the CUDA-MEMCHECK frontend application.
                                 
                              </li>
                              <li class="li">%q{ENVVAR} : Replaced with the contents of the environment variable 'ENVVAR'.
                                 If the variable does not exist, this is replaced with an empty string.
                                 
                              </li>
                              <li class="li"> Any other character following the % causes an error.
                                 
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="specifying-filters"><a name="specifying-filters" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#specifying-filters" name="specifying-filters" shape="rect">5.7.&nbsp;Specifying Filters</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           CUDA-MEMCHECK tools support filtering the choice of kernels which should be
                           checked. When a filter is specified, only kernels matching the filter
                           will be checked. Filters are specified using the <samp class="ph codeph">--filter</samp> option.
                           By default, CUDA-MEMCHECK tools will check all kernels in the application.
                           
                        </p>
                        <p class="p">
                           The <samp class="ph codeph">--filter</samp> option can be specified multiple times. If a kernel
                           satisfies any filter, it will be checked by the running CUDA-MEMCHECK tool.
                           
                        </p>
                        <p class="p">
                           The <samp class="ph codeph">--filter</samp> takes a filter specification consisting of a list of comma
                           separated key value pairs, specified as <samp class="ph codeph">key=value</samp>. In order for
                           a filter to be matched, all components of the filter specification must be
                           satisfied. If a filter is incorrectly specified in any component, the entire
                           filter is ignored. For a full summary of valid key values, see the table below.
                           If a key has multiple strings, any of the strings can be used to specify that
                           filter component.
                           
                        </p>
                        <div class="tablenoborder"><a name="specifying-filters__cuda-memcheck-filter-keys" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="specifying-filters__cuda-memcheck-filter-keys" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 9. CUDA-MEMCHECK Filter Keys</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="21.428571428571427%" id="d54e2331" rowspan="1" colspan="1">Name</th>
                                    <th class="entry" valign="top" width="21.428571428571427%" id="d54e2334" rowspan="1" colspan="1">Key String</th>
                                    <th class="entry" valign="top" width="28.57142857142857%" id="d54e2337" rowspan="1" colspan="1">Value</th>
                                    <th class="entry" valign="top" width="28.57142857142857%" id="d54e2340" rowspan="1" colspan="1">Comments</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2331" rowspan="1" colspan="1">Kernel Name</td>
                                    <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2334" rowspan="1" colspan="1">kernel-name, kne</td>
                                    <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2337" rowspan="1" colspan="1">Complete mangled kernel name</td>
                                    <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2340" rowspan="1" colspan="1">User specifies the complete mangled kernel name.
                                       Cannot be included in same filter specification as kernel-substring
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2331" rowspan="1" colspan="1">Kernel Substring</td>
                                    <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2334" rowspan="1" colspan="1">kernel-substring, kns</td>
                                    <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2337" rowspan="1" colspan="1">Any substring in mangled kernel name</td>
                                    <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2340" rowspan="1" colspan="1">User specifies a substring in the mangled kernel name.
                                       Cannot be included in same filter specification as kernel-name.
                                       
                                    </td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <p class="p">
                           When using the <samp class="ph codeph">kernel-name</samp> or <samp class="ph codeph">kernel-substring</samp> filters, CUDA-MEMCHECK tools
                           will check all <samp class="ph codeph">device</samp> function calls made by the kernel. When using CUDA Dynamic Parallelism (CDP),
                           CUDA-MEMCHECK tools will not check child kernels launched from a checked kernel unless the child kernel matches a filter.
                           If a GPU launched kernel that does not match a filter calls a device function that is reachable from a kernel that does
                           match a filter, the device function will behave as though it was checked. In the case of some tools, this can result
                           in undefined behavior.
                           
                        </p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="os-specific-behavior"><a name="os-specific-behavior" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#os-specific-behavior" name="os-specific-behavior" shape="rect">6.&nbsp;Operating System Specific Behavior</a></h2>
                  <div class="body conbody">
                     <p class="p">This section describes operating system specific behavior.</p>
                  </div>
                  <div class="topic concept nested1" id="os-specific-windows"><a name="os-specific-windows" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#os-specific-windows" name="os-specific-windows" shape="rect">6.1.&nbsp;Windows Specific Behavior</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li">
                              <p class="p">Timeout Detection and Recovery (TDR)</p>
                              <p class="p"> On Windows Vista and above, GPUs have a timeout associated with them. GPU applications
                                 that take longer than the threshold (default of 2 seconds) will be killed by the operating system.
                                 Since CUDA-MEMCHECK tools increase the runtime of kernels, it is possible for a CUDA kernel
                                 to exceed the timeout and therefore be terminated due to the TDR mechanism.
                                 
                              </p>
                              <p class="p">
                                 For the purposes of debugging, the number of seconds before which the timeout is hit
                                 can be modified by setting the the timeout value in seconds in the DWORD registry key
                                 <samp class="ph codeph">TdrDelay</samp> at
                                 <samp class="ph codeph">HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\GraphicsDrivers</samp></p>
                              <p class="p">
                                 More information about the registry keys to control the Timeout Detection and Recovery
                                 mechanism is available from MSDN at
                                 <a class="xref" href="http://msdn.microsoft.com/en-us/library/windows/hardware/ff569918%28v=vs.85%29.aspx" target="_blank" shape="rect">http://msdn.microsoft.com/en-us/library/windows/hardware/ff569918%28v=vs.85%29.aspx</a></p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="os-specific-android"><a name="os-specific-android" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#os-specific-android" name="os-specific-android" shape="rect">6.2.&nbsp;Android Specific Behavior</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li">
                              <p class="p">TMPDIR environment variable</p>
                              <p class="p">
                                 On Android, CUDA-MEMCHECK requires that the user specify
                                 a path to a directory that is readable and writable by
                                 the current user in the TMPDIR environment variable
                                 
                              </p>
                           </li>
                           <li class="li">
                              <p class="p">Host stack backtraces</p>
                              <p class="p">
                                 Host side function call stack backtraces are disabled
                                 on Android.
                                 
                              </p>
                           </li>
                           <li class="li">
                              <p class="p">Andoid GUI</p>
                              <p class="p">
                                 To ensure the GPU kernel is not terminated unexpectedly,
                                 the Android UI can be stopped by using the "stop" command
                                 in the <samp class="ph codeph">adb</samp> shell.
                                 
                              </p>
                           </li>
                           <li class="li">
                              CUDA-MEMCHECK tool cannot be used with APK binaries.
                              
                           </li>
                        </ul>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="cuda-fortran-support"><a name="cuda-fortran-support" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#cuda-fortran-support" name="cuda-fortran-support" shape="rect">7.&nbsp;CUDA Fortran Support</a></h2>
                  <div class="body conbody">
                     <p class="p">This section describes support for CUDA Fortran.</p>
                  </div>
                  <div class="topic concept nested1" id="unique_1124504307"><a name="unique_1124504307" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#unique_1124504307" name="unique_1124504307" shape="rect">CUDA Fortran Specific Behavior</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li">By default, error reports printed by CUDA-MEMCHECK contain 0-based C style values for thread index (threadIdx) and block index
                              (blockIdx).
                              For CUDA-MEMCHECK tools to use Fortran style 1-based offsets, use the <samp class="ph codeph">--language fortran</samp> option.
                              
                           </li>
                           <li class="li">The CUDA Fortran compiler may insert extra padding in shared memory. Accesses hitting this extra padding may not be reported
                              as an error.
                              
                           </li>
                        </ul>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="cuda-memcheck-tool-examples"><a name="cuda-memcheck-tool-examples" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#cuda-memcheck-tool-examples" name="cuda-memcheck-tool-examples" shape="rect">8.&nbsp;CUDA-MEMCHECK Tool Examples</a></h2>
                  <div class="topic concept nested1" id="example-use-of-memcheck"><a name="example-use-of-memcheck" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#example-use-of-memcheck" name="example-use-of-memcheck" shape="rect">8.1.&nbsp;Example Use of Memcheck</a></h3>
                     <div class="body conbody">
                        <div class="section">
                           <div class="p">
                              This section presents a walk-through of running the memcheck tool from
                              CUDA-MEMCHECK on a simple application called <samp class="ph codeph">memcheck_demo</samp>.
                              
                              <div class="note note"><span class="notetitle">Note:</span> Depending on the SM type of your GPU, your system output may vary.
                              </div>
                           </div>
                        </div>
                        <div class="example">
                           <h3 class="title sectiontitle">memcheck_demo.cu source code</h3><pre xml:space="preserve">#include &lt;stdio.h&gt;

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__device__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> x;

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__global__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span> unaligned_kernel(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>) {
    *(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span>*) ((<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">char</span>*)&amp;x + 1) = 42;
}

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__device__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span> out_of_bounds_function(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>) {
    *(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span>*) 0x87654320 = 42;
}

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__global__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span> out_of_bounds_kernel(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>) {
    out_of_bounds_function();
}

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span> run_unaligned(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>) {
    printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"Running unaligned_kernel\n"</span>);
    unaligned_kernel<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">&lt;&lt;&lt;</span>1,1<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">&gt;&gt;&gt;</span>();
    printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"Ran unaligned_kernel: %s\n"</span>,
    cudaGetErrorString(cudaGetLastError()));
    printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"Sync: %s\n"</span>, cudaGetErrorString(cudaThreadSynchronize()));
}

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span> run_out_of_bounds(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>) {
    printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"Running out_of_bounds_kernel\n"</span>);
    out_of_bounds_kernel<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">&lt;&lt;&lt;</span>1,1<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">&gt;&gt;&gt;</span>();
    printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"Ran out_of_bounds_kernel: %s\n"</span>,
    cudaGetErrorString(cudaGetLastError()));
    printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"Sync: %s\n"</span>, cudaGetErrorString(cudaThreadSynchronize()));
}

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> main() {
    <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> *devMem;

    printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"Mallocing memory\n"</span>);
    cudaMalloc((<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>**)&amp;devMem, 1024);

    run_unaligned();
    run_out_of_bounds();

    cudaDeviceReset();
    cudaFree(devMem);

    <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">return</span> 0;
}</pre></div>
                        <div class="section">
                           <div class="p">
                              This application is compiled for release builds as :
                              <pre class="pre screen" xml:space="preserve">
nvcc -gencode arch=compute_20,code=sm_20 -o memcheck_demo memcheck_demo.cu
</pre></div>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="memcheck-demo-output"><a name="memcheck-demo-output" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#memcheck-demo-output" name="memcheck-demo-output" shape="rect">8.1.1.&nbsp;<samp class="ph codeph">memcheck_demo</samp> Output
                              </a></h3>
                        <div class="body conbody">
                           <p class="p">When a CUDA application causes access violations, the kernel launch may
                              terminate with an error code of unspecified launch failure or a subsequent
                              cudaThreadSynchronize call which will fail with an error code of unspecified
                              launch failure.
                              
                           </p>
                           <p class="p">This sample application is causing two failures but there is no way to detect
                              where these kernels are causing the access violations, as illustrated in the
                              following output:
                              
                           </p><pre class="pre screen" xml:space="preserve"><strong class="ph b">$ ./memcheck_demo</strong>
Mallocing memory
Running unaligned_kernel
Ran unaligned_kernel: no error
Sync: unspecified launch failure
Running out_of_bounds_kernel
Ran out_of_bounds_kernel: unspecified launch failure
Sync: unspecified launch failure
</pre></div>
                     </div>
                     <div class="topic concept nested2" id="memcheck-demo-output-with-memcheck-release-build"><a name="memcheck-demo-output-with-memcheck-release-build" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#memcheck-demo-output-with-memcheck-release-build" name="memcheck-demo-output-with-memcheck-release-build" shape="rect">8.1.2.&nbsp;<samp class="ph codeph">memcheck_demo</samp> Output with Memcheck (Release Build)</a></h3>
                        <div class="body conbody">
                           <p class="p">In this case, since the application is built in release mode, the
                              CUDA-MEMCHECK output contains only the kernel names from the application causing
                              the access violation.  Though the kernel name and error type are detected, there
                              is no line number information on the failing kernel. Also included in the output
                              are the host and device backtraces for the call sites where the functions were
                              launched. In addition, CUDA API errors are reported, such as the invalid
                              <samp class="ph codeph">cudaFree()</samp> call in the application.
                              
                           </p><pre class="pre screen" xml:space="preserve">
<strong class="ph b">$ cuda-memcheck ./memcheck_demo</strong>
========= CUDA-MEMCHECK
Mallocing memory
Running unaligned_kernel
Ran unaligned_kernel: no error
Sync: no error
Running out_of_bounds_kernel
Ran out_of_bounds_kernel: no error
Sync: no error
========= Invalid __global__ write of size 4
=========     at 0x00000028 in unaligned_kernel(void)
=========     by thread (0,0,0) in block (0,0,0)
=========     Address 0x400100001 is misaligned
=========     Saved host backtrace up to driver entry point at kernel launch time
=========     Host Frame:/usr/local/lib/libcuda.so (cuLaunchKernel + 0x3ae) [0xddbee]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 [0xcd27]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaLaunch + 0x1bb) [0x3778b]
=========     Host Frame:memcheck_demo [0xdfc]
=========     Host Frame:memcheck_demo [0xc76]
=========     Host Frame:memcheck_demo [0xc81]
=========     Host Frame:memcheck_demo [0xb03]
=========     Host Frame:memcheck_demo [0xc27]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0x9b9]
=========
========= Invalid __global__ write of size 4
=========     at 0x00000010 in out_of_bounds_kernel(void)
=========     by thread (0,0,0) in block (0,0,0)
=========     Address 0xffffffff87654320 is out of bounds
=========     Saved host backtrace up to driver entry point at kernel launch time
=========     Host Frame:/usr/local/lib/libcuda.so (cuLaunchKernel + 0x3ae) [0xddbee]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 [0xcd27]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaLaunch + 0x1bb) [0x3778b]
=========     Host Frame:memcheck_demo [0xdfc]
=========     Host Frame:memcheck_demo [0xca0]
=========     Host Frame:memcheck_demo [0xcab]
=========     Host Frame:memcheck_demo [0xbbc]
=========     Host Frame:memcheck_demo [0xc2c]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0x9b9]
=========
========= Program hit error 17 on CUDA API call to cudaFree 
=========     Saved host backtrace up to driver entry point at error
=========     Host Frame:/usr/local/lib/libcuda.so [0x28f850]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaFree + 0x20d) [0x364ed]
=========     Host Frame:memcheck_demo [0xc3d]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0x9b9]
=========
========= ERROR SUMMARY: 3 errors

</pre></div>
                     </div>
                     <div class="topic concept nested2" id="memcheck-demo-output-with-memcheck-debug-build"><a name="memcheck-demo-output-with-memcheck-debug-build" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#memcheck-demo-output-with-memcheck-debug-build" name="memcheck-demo-output-with-memcheck-debug-build" shape="rect">8.1.3.&nbsp;<samp class="ph codeph">memcheck_demo</samp> Output with Memcheck (Debug Build)</a></h3>
                        <div class="body conbody">
                           <div class="p"> The application is now built with device side debug information and function
                              symbols as :
                              <pre class="pre screen" xml:space="preserve">
nvcc -G -Xcompiler -rdynamic -gencode arch=compute_20,code=sm_20 -o memcheck_demo memcheck_demo.cu
</pre></div>
                           <p class="p">Now run this application with CUDA-MEMCHECK and check the output. By default, the
                              application will run so that the kernel is terminated on memory access errors
                              but other work in the CUDA context can still proceed.
                              
                           </p>
                           <p class="p"> In the output below the first kernel no longer reports an unspecified launch failure
                              as its execution has been terminated early after CUDA-MEMCHECK detected the error.
                              The application continued to run the second  kernel. The error detected in the
                              second kernel causes it to terminate early. Finally, the application calls
                              <samp class="ph codeph">cudaDeviceReset()</samp>, which destroys the CUDA context and then
                              attempts to call <samp class="ph codeph">cudaFree()</samp>. This call returns an API error
                              that is caught and displayed by memcheck.
                              
                           </p><pre class="pre screen" xml:space="preserve">
<strong class="ph b">$ cuda-memcheck ./memcheck_demo</strong>
========= CUDA-MEMCHECK
Mallocing memory
Running unaligned_kernel
Ran unaligned_kernel: no error
Sync: no error
Running out_of_bounds_kernel
Ran out_of_bounds_kernel: no error
Sync: no error
========= Invalid __global__ write of size 4
=========     at 0x00000028 in memcheck_demo.cu:6:unaligned_kernel(void)
=========     by thread (0,0,0) in block (0,0,0)
=========     Address 0x400100001 is misaligned
=========     Saved host backtrace up to driver entry point at kernel launch time
=========     Host Frame:/usr/local/lib/libcuda.so (cuLaunchKernel + 0x3ae) [0xddbee]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 [0xcd27]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaLaunch + 0x1bb) [0x3778b]
=========     Host Frame:memcheck_demo (_Z10cudaLaunchIcE9cudaErrorPT_ + 0x18) [0x11a4]
=========     Host Frame:memcheck_demo (_Z35__device_stub__Z16unaligned_kernelvv + 0x1d) [0x101d]
=========     Host Frame:memcheck_demo (_Z16unaligned_kernelv + 0x9) [0x1028]
=========     Host Frame:memcheck_demo (_Z13run_unalignedv + 0x76) [0xeaa]
=========     Host Frame:memcheck_demo (main + 0x28) [0xfce]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0xd79]
=========
========= Invalid __global__ write of size 4
=========     at 0x00000028 in memcheck_demo.cu:10:out_of_bounds_function(void)
=========     by thread (0,0,0) in block (0,0,0)
=========     Address 0x87654320 is out of bounds
=========     Device Frame:memcheck_demo.cu:15:out_of_bounds_kernel(void) (out_of_bounds_kernel(void) : 0x10)
=========     Saved host backtrace up to driver entry point at kernel launch time
=========     Host Frame:/usr/local/lib/libcuda.so (cuLaunchKernel + 0x3ae) [0xddbee]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 [0xcd27]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaLaunch + 0x1bb) [0x3778b]
=========     Host Frame:memcheck_demo (_Z10cudaLaunchIcE9cudaErrorPT_ + 0x18) [0x11a4]
=========     Host Frame:memcheck_demo (_Z39__device_stub__Z20out_of_bounds_kernelvv + 0x1d) [0x1047]
=========     Host Frame:memcheck_demo (_Z20out_of_bounds_kernelv + 0x9) [0x1052]
=========     Host Frame:memcheck_demo (_Z17run_out_of_boundsv + 0x76) [0xf63]
=========     Host Frame:memcheck_demo (main + 0x2d) [0xfd3]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0xd79]
=========
========= Program hit error 17 on CUDA API call to cudaFree
=========     Saved host backtrace up to driver entry point at error
=========     Host Frame:/usr/local/lib/libcuda.so [0x28f850]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaFree + 0x20d) [0x364ed]
=========     Host Frame:memcheck_demo (main + 0x3e) [0xfe4]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0xd79]
=========
========= ERROR SUMMARY: 3 errors
</pre></div>
                     </div>
                     <div class="topic concept nested2" id="leak-checking-in-cuda-memcheck"><a name="leak-checking-in-cuda-memcheck" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#leak-checking-in-cuda-memcheck" name="leak-checking-in-cuda-memcheck" shape="rect">8.1.4.&nbsp;Leak Checking in CUDA-MEMCHECK</a></h3>
                        <div class="body conbody">
                           <p class="p">To print information about the allocations that have not been freed at the time
                              the CUDA context is destroyed, we can specify the <samp class="ph codeph">--leak-check full</samp>
                              option to CUDA-MEMCHECK.
                           </p>
                           <p class="p">When running the program with the leak check option, the user is presented with
                              a list of allocations that were not destroyed, along with the size of the allocation
                              and the address on the device of the allocation. For allocations made on the host,
                              each leak report will also print a backtrace corresponding to the saved host stack
                              at the time the allocation was first made. Also presented is a summary of the total
                              number of bytes leaked and the corresponding number of allocations.
                           </p>
                           <p class="p">In this example, the program created an allocation using
                              <samp class="ph codeph">cudaMalloc()</samp> and has not called <samp class="ph codeph">cudaFree()</samp>
                              to release it, leaking memory. Notice that CUDA-MEMCHECK still prints errors
                              it encountered while running the application.
                           </p><pre class="pre screen" xml:space="preserve">
<strong class="ph b">$ cuda-memcheck --leak-check full memcheck_demo</strong>
========= CUDA-MEMCHECK
Mallocing memory
Running unaligned_kernel
Ran unaligned_kernel: no error
Sync: no error
Running out_of_bounds_kernel
Ran out_of_bounds_kernel: no error
Sync: no error
========= Invalid __global__ write of size 4
=========     at 0x00000060 in memcheck_demo.cu:6:unaligned_kernel(void)
=========     by thread (0,0,0) in block (0,0,0)
=========     Address 0x400100001 is misaligned
=========     Saved host backtrace up to driver entry point at kernel launch time
=========     Host Frame:/usr/local/lib/libcuda.so (cuLaunchKernel + 0x3ae) [0xddbee]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 [0xcd27]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaLaunch + 0x1bb) [0x3778b]
=========     Host Frame:memcheck_demo (_Z10cudaLaunchIcE9cudaErrorPT_ + 0x18) [0x122c]
=========     Host Frame:memcheck_demo (_Z35__device_stub__Z16unaligned_kernelvv + 0x1d) [0x10a6]
=========     Host Frame:memcheck_demo (_Z16unaligned_kernelv + 0x9) [0x10b1]
=========     Host Frame:memcheck_demo (_Z13run_unalignedv + 0x76) [0xf33]
=========     Host Frame:memcheck_demo (main + 0x28) [0x1057]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0xde9]
=========
========= Invalid __global__ write of size 4
=========     at 0x00000028 in memcheck_demo.cu:10:out_of_bounds_function(void)
=========     by thread (0,0,0) in block (0,0,0)
=========     Address 0x87654320 is out of bounds
=========     Device Frame:memcheck_demo.cu:15:out_of_bounds_kernel(void) (out_of_bounds_kernel(void) : 0x10)
=========     Saved host backtrace up to driver entry point at kernel launch time
=========     Host Frame:/usr/local/lib/libcuda.so (cuLaunchKernel + 0x3ae) [0xddbee]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 [0xcd27]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaLaunch + 0x1bb) [0x3778b]
=========     Host Frame:memcheck_demo (_Z10cudaLaunchIcE9cudaErrorPT_ + 0x18) [0x122c]
=========     Host Frame:memcheck_demo (_Z39__device_stub__Z20out_of_bounds_kernelvv + 0x1d) [0x10d0]
=========     Host Frame:memcheck_demo (_Z20out_of_bounds_kernelv + 0x9) [0x10db]
=========     Host Frame:memcheck_demo (_Z17run_out_of_boundsv + 0x76) [0xfec]
=========     Host Frame:memcheck_demo (main + 0x2d) [0x105c]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0xde9]
=========
========= Leaked 1024 bytes at 0x400200000
=========     Saved host backtrace up to driver entry point at cudaMalloc time
=========     Host Frame:/usr/local/lib/libcuda.so (cuMemAlloc_v2 + 0x236) [0xe9746]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 [0x26dd7]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 [0xb37b]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaMalloc + 0x17a) [0x36e6a]
=========     Host Frame:memcheck_demo (main + 0x23) [0x1052]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0xde9]
=========
========= Program hit error 17 on CUDA API call to cudaFree 
=========     Saved host backtrace up to driver entry point at error
=========     Host Frame:/usr/local/lib/libcuda.so [0x28f850]
=========     Host Frame:/usr/local/lib/libcudart.so.5.0 (cudaFree + 0x20d) [0x364ed]
=========     Host Frame:memcheck_demo (main + 0x3e) [0x106d]
=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1eb1d]
=========     Host Frame:memcheck_demo [0xde9]
=========
========= LEAK SUMMARY: 1024 bytes leaked in 1 allocations
========= ERROR SUMMARY: 3 errors
</pre></div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="integrated-cuda-memcheck-example"><a name="integrated-cuda-memcheck-example" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#integrated-cuda-memcheck-example" name="integrated-cuda-memcheck-example" shape="rect">8.2.&nbsp;Integrated CUDA-MEMCHECK Example</a></h3>
                     <div class="body conbody">
                        <p class="p">This example shows how to enable CUDA-MEMCHECK from within CUDA-GDB and how
                           to detect errors within the debugger so you can access the line number
                           information and check the state of the variables
                           
                        </p>
                        <p class="p">In this example the unaligned kernel has a misaligned memory access in
                           block 1 lane 1, which gets trapped as an illegal lane address at line 6
                           from within CUDA-GDB. Note that CUDA-GDB displays the address and
                           that caused the bad access.
                           
                        </p><pre class="pre screen" xml:space="preserve">
(cuda-gdb) <strong class="ph b">set cuda memcheck on</strong>
(cuda-gdb) <strong class="ph b">run</strong>
Starting program: memcheck_demo 
[Thread debugging using libthread_db enabled]
Mallocing memory
[New Thread 0x7ffff6fe1710 (LWP 7783)]
[Context Create of context 0x6218a0 on Device 0]
[Launch of CUDA Kernel 0 (memset32_post&lt;&lt;&lt;(1,1,1),(64,1,1)&gt;&gt;&gt;) on Device 0]
Running unaligned_kernel
[Launch of CUDA Kernel 1 (unaligned_kernel&lt;&lt;&lt;(1,1,1),(1,1,1)&gt;&gt;&gt;) on Device 0]
Memcheck detected an illegal access to address (@global)0x400100001

Program received signal CUDA_EXCEPTION_1, Lane Illegal Address.
[Switching focus to CUDA kernel 1, grid 2, block (0,0,0), thread (0,0,0), device 0, sm 0, warp 0, lane 0]
0x000000000078b8b0 in unaligned_kernel&lt;&lt;&lt;(1,1,1),(1,1,1)&gt;&gt;&gt; () at memcheck_demo.cu:6
6           *(int*) ((char*)&amp;x + 1) = 42;
(cuda-gdb) <strong class="ph b">print &amp;x</strong>
$1 = (@global int *) 0x400100000
(cuda-gdb) <strong class="ph b">continue</strong>
Continuing.
[Termination of CUDA Kernel 1 (unaligned_kernel&lt;&lt;&lt;(1,1,1),(1,1,1)&gt;&gt;&gt;) on Device 0]
[Termination of CUDA Kernel 0 (memset32_post&lt;&lt;&lt;(1,1,1),(64,1,1)&gt;&gt;&gt;) on Device 0]

Program terminated with signal CUDA_EXCEPTION_1, Lane Illegal Address.
The program no longer exists.
(cuda-gdb)
</pre></div>
                  </div>
               </div>
               <div class="topic concept nested0" id="memory-access-error-reporting"><a name="memory-access-error-reporting" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#memory-access-error-reporting" name="memory-access-error-reporting" shape="rect">A.&nbsp;Memory Access Error Reporting</a></h2>
                  <div class="body conbody">
                     <div class="p">The memcheck tool will report memory access errors when run standalone
                        
                        or in integrated mode with CUDA-GDB. The table below describes the types of
                        
                        accesses that are checked and the SM version where such checks happen
                        
                        
                        
                        <div class="tablenoborder"><a name="memory-access-error-reporting__memcheck-memory-access-error-detection-support" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="memory-access-error-reporting__memcheck-memory-access-error-detection-support" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 10. Memcheck memory access error detection support</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2741" rowspan="1" colspan="1">Error Type</th>
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e2744" rowspan="1" colspan="1">SM 1.x</th>
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e2747" rowspan="1" colspan="1">SM 2.x</th>
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e2750" rowspan="1" colspan="1">SM 3.0</th>
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e2753" rowspan="1" colspan="1">SM 3.5</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2741" rowspan="1" colspan="1">Global</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2744" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2747" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2750" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2753" rowspan="1" colspan="1">Yes</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2741" rowspan="1" colspan="1">Shared</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2744" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2747" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2750" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2753" rowspan="1" colspan="1">Yes</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2741" rowspan="1" colspan="1">Local</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2744" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2747" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2750" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2753" rowspan="1" colspan="1">Yes</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2741" rowspan="1" colspan="1">Global Atomic</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2744" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2747" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2750" rowspan="1" colspan="1">Yes</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2753" rowspan="1" colspan="1">Yes</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2741" rowspan="1" colspan="1">Load through texture</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2744" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2747" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2750" rowspan="1" colspan="1">N/A</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e2753" rowspan="1" colspan="1">Yes</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="hardware-exception-reporting"><a name="hardware-exception-reporting" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#hardware-exception-reporting" name="hardware-exception-reporting" shape="rect">B.&nbsp;Hardware Exception Reporting</a></h2>
                  <div class="body conbody">
                     <p class="p">The CUDA‐MEMCHECK tool will report hardware exceptions when run as a
                        
                        standalone or as part of CUDA‐GDB. The table below enumerates the
                        
                        supported exceptions, their precision and scope, as well as a brief
                        
                        description of their cause. For more detailed information, see the
                        
                        documentation for CUDA‐GDB.
                        
                        
                     </p>
                     <div class="tablenoborder"><a name="hardware-exception-reporting__cuda-exception-codes" shape="rect">
                           <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="hardware-exception-reporting__cuda-exception-codes" class="table" frame="border" border="1" rules="all">
                           <caption><span class="tablecap">Table 11. CUDA Exception Codes</span></caption>
                           <thead class="thead" align="left">
                              <tr class="row">
                                 <th class="entry" valign="top" width="28.57142857142857%" id="d54e2885" rowspan="1" colspan="1">Exception code</th>
                                 <th class="entry" valign="top" width="14.285714285714285%" id="d54e2888" rowspan="1" colspan="1">Precision of the Error</th>
                                 <th class="entry" valign="top" width="21.428571428571427%" id="d54e2891" rowspan="1" colspan="1">Scope of the Error</th>
                                 <th class="entry" valign="top" width="35.714285714285715%" id="d54e2894" rowspan="1" colspan="1">Description</th>
                              </tr>
                           </thead>
                           <tbody class="tbody">
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_1 : "Lane Illegal Address"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Per lane/thread error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when a thread accesses an illegal (out of bounds) global address.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_2 : "Lane User StackOverflow"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Per lane/thread error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when a thread exceeds its stack memory limit.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_3: "Device Hardware Stack Overflow" </td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Not precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Global error on the GPU</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when the application triggers a global hardware stack overflow. The main cause of this error is large amounts
                                    of divergence in the presence of function calls.
                                 </td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_4: "Warp Illegal Instruction"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Not precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Warp error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when any thread within a warp has executed an illegal instruction.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_5: "Warp Out-of-range Address"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Not precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Warp error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when any thread within a warp accesses an address that is outside the valid range of local or shared memory regions.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_6: "Warp Misaligned Address"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Not precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Warp error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when any thread within a warp accesses an address in the local or shared memory segments that is not correctly
                                    aligned.
                                 </td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_7: "Warp Invalid Address Space"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Not precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Warp error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when any thread within a warp executes an instruction that accesses a memory space not permitted for that instruction.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_8: "Warp Invalid PC"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Not precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Warp error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when any thread within a warp advances its PC beyond the 40-bit address space.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_9: "Warp Hardware Stack Overflow"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Not precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Warp error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when any thread in a warp triggers a hardware stack overflow. This should be a rare occurrence.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_10: "Device Illegal Address"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Not precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Global error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when a thread accesses an illegal (out of bounds) global address.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_11: "Lane Misaligned Address"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Per lane/thread error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when a thread accesses a global address that is not correctly aligned.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_12: "Warp Assert"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Per warp</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when any thread in the warp hits a device side assertion.</td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">CUDA_EXCEPTION_13: "Lane Syscall Error"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Per lane</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">This occurs when a particular thread causes an syscall error, such as calling <samp class="ph codeph">free()</samp> in a kernel on an already free'd pointer.
                                 </td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="28.57142857142857%" headers="d54e2885" rowspan="1" colspan="1">"Unknown Exception"</td>
                                 <td class="entry" valign="top" width="14.285714285714285%" headers="d54e2888" rowspan="1" colspan="1">Not precise</td>
                                 <td class="entry" valign="top" width="21.428571428571427%" headers="d54e2891" rowspan="1" colspan="1">Global error</td>
                                 <td class="entry" valign="top" width="35.714285714285715%" headers="d54e2894" rowspan="1" colspan="1">The precise cause of the exception is unknown. Potentially, this may be due to Device Hardware Stack overflows or a kernel
                                    generating an exception very close to its termination.
                                 </td>
                              </tr>
                           </tbody>
                        </table>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="release-notes"><a name="release-notes" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#release-notes" name="release-notes" shape="rect">C.&nbsp;Release Notes</a></h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="new-features-6.5"><a name="new-features-6.5" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#new-features-6.5" name="new-features-6.5" shape="rect">C.1.&nbsp;New Features in 6.5</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li"> More information printed for API errors
                              
                           </li>
                           <li class="li"> Support for escape sequences in file name to <samp class="ph codeph">--log-file</samp> and <samp class="ph codeph">--save</samp>.
                              
                           </li>
                           <li class="li"> Support for controlling which kernels are checked using <samp class="ph codeph">--filter</samp>. For more information see <a class="xref" href="index.html#specifying-filters" shape="rect">Specifying Filters</a>.
                              
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="new-features-6.0"><a name="new-features-6.0" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#new-features-6.0" name="new-features-6.0" shape="rect">C.2.&nbsp;New Features in 6.0</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li"> Support for Unified Memory
                              
                           </li>
                           <li class="li"> Support for CUDA Multi Process Service (MPS)
                              
                           </li>
                           <li class="li"> Support for additional error detection with cudaMemcpy and cudaMemset
                              
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="new-features-5.5"><a name="new-features-5.5" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#new-features-5.5" name="new-features-5.5" shape="rect">C.3.&nbsp;New Features in 5.5</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li"> Analysis mode in racecheck tool. For more information,
                              		     see <a class="xref" href="index.html#racecheck-tool" shape="rect">Racecheck Tool</a></li>
                           <li class="li"> Support for racecheck on SM 3.5 GPUs.
                              
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="new-features-5.0"><a name="new-features-5.0" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#new-features-5.0" name="new-features-5.0" shape="rect">C.4.&nbsp;New Features in 5.0</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li">Reporting of data access hazards in shared memory accesses.
                              		    This is supported on Fermi SM 2.x and Kepler SM 3.0 GPUs. This functionality
                              		    is not supported on Windows XP.
                              		    For more information, see <a class="xref" href="index.html#racecheck-tool" shape="rect">Racecheck Tool</a>.
                              		
                           </li>
                           <li class="li">Support for SM 3.0 and SM 3.5 GPUs. For more information, see
                              <a class="xref" href="index.html#supported-devices" shape="rect">Supported Devices</a>.
                              
                           </li>
                           <li class="li">Support for dynamic parallelism. All memory access error detection
                              is supported for applications using dynamic parallelism.
                              For more information, see <a class="xref" href="index.html#dynamic-parallelism" shape="rect">Dynamic Parallelism</a>.
                              
                           </li>
                           <li class="li">Precise error detection for local loads/stores, shared loads/stores,
                              global atomics/reductions. On SM 3.5, added precise memory access error
                              detection for noncoherent global loads through the texture unit.
                              For more information, see <a class="xref" href="index.html#memory-access-error-reporting" shape="rect">Memory Access Error Reporting</a>.
                              
                           </li>
                           <li class="li">Error detection in device side malloc()/free(), such
                              as double free() or invalid free() on the GPU.
                              For more information, see <a class="xref" href="index.html#device-side-allocation-checking" shape="rect">Device Side Allocation Checking</a>.
                              
                           </li>
                           <li class="li">Leak checking for allocations on the device heap.
                              For more information, see <a class="xref" href="index.html#leak-checking" shape="rect">Leak Checking</a>.
                              
                           </li>
                           <li class="li">Display of a saved stack backtrace on the host
                              and captured backtrace on the device for different errors.
                              For more information, see <a class="xref" href="index.html#stack-backtraces" shape="rect">Stack Backtraces</a>.
                              
                           </li>
                           <li class="li">Reporting of CUDA API errors in the user's application.
                              For more information, see <a class="xref" href="index.html#api-error-checking" shape="rect">CUDA API Error Checking</a>.
                              
                           </li>
                           <li class="li">Added display of mangled, demangled, and full prototype of the kernel.
                              For more information, see <a class="xref" href="index.html#name-demangling" shape="rect">Name Demangling</a>.
                              
                           </li>
                           <li class="li">Increased functionality in integrated mode with CUDA-GDB. Added reporting of
                              the address and address space being accessed that caused a precise exception.
                              Added checking of device side malloc() and free() when in integrated mode.
                              For more information, see <a class="xref" href="index.html#integrated-mode" shape="rect">Integrated Mode</a>.
                              
                           </li>
                           <li class="li">Support for applications compiled separately that use the device
                              side linker.
                              
                           </li>
                           <li class="li">Support for applications compiled with the -lineinfo flag.
                              
                           </li>
                           <li class="li">New style of command line options.
                              For more information, see <a class="xref" href="index.html#command-line-options" shape="rect">Command Line Options</a>.
                              
                           </li>
                           <li class="li">Changed default behavior. CUDA-MEMCHECK will
                              display backtraces by default and will report API errors by default.
                              For more information, see <a class="xref" href="index.html#command-line-options" shape="rect">Command Line Options</a>.
                              
                           </li>
                        </ul>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="known-issues"><a name="known-issues" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#known-issues" name="known-issues" shape="rect">D.&nbsp;Known Issues</a></h2>
                  <div class="body conbody">
                     <p class="p">The following are known issues with the current release.</p>
                     <ul class="ul">
                        <li class="li">Applications run much slower under CUDA-MEMCHECK tools. This may cause
                           some kernel launches to fail with a launch timeout error when running with
                           CUDA-MEMCHECK enabled.
                           
                        </li>
                        <li class="li">On Windows XP, the standalone CUDA-MEMCHECK tools will always run in blocking
                           launch mode.
                           
                        </li>
                        <li class="li">When running CUDA-MEMCHECK tools in integrated mode with CUDA-GDB,
                           only the <dfn class="term">memcheck</dfn> tool is enabled. Also, the following
                           features are disabled:
                           
                           <ul class="ul">
                              <li class="li">Nonblocking launches</li>
                              <li class="li">Leak checking</li>
                              <li class="li">API error checking</li>
                           </ul>
                        </li>
                        <li class="li">
                           CUDA-MEMCHECK tools do not support CUDA/Direct3D interop.
                           
                        </li>
                        <li class="li">
                           The memcheck tool does not support CUDA API error checking for
                           API calls made on the GPU using dynamic parallelism.
                           
                        </li>
                        <li class="li">
                           The racecheck tool does not support dynamic parallelism.
                           
                        </li>
                        <li class="li">
                           In cases where a CUDA application spawns child processes that in turn
                           use CUDA, CUDA-MEMCHECK tools may not report errors from the child
                           processes.
                           
                        </li>
                        <li class="li">
                           Tools in the CUDA-MEMCHECK suite cannot interoperate with the following applications:
                           
                           <ul class="ul">
                              <li class="li">Nvidia legacy command line profiler (CUDA_PROFILE/COMPUTE_PROFILE)</li>
                              <li class="li">nvprof</li>
                              <li class="li">Nvidia Visual Profiler</li>
                              <li class="li">Nvidia Nsight Visual Studio Edition</li>
                           </ul>
                           
                           If such tools are detected, CUDA-MEMCHECK will terminate with
                           an internal error that initialization failed. Please make sure that the tools listed
                           above are not running. In case the message persists, make sure the following environment
                           variables are not set :
                           
                           <ul class="ul">
                              <li class="li">COMPUTE_PROFILE</li>
                              <li class="li">CUDA_PROFILE</li>
                              <li class="li">CUDA_INJECTION32_DLL</li>
                              <li class="li">CUDA_INJECTION64_DLL</li>
                           </ul>
                        </li>
                     </ul>
                  </div>
               </div>
               <div class="topic concept nested0" id="notices-header"><a name="notices-header" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#notices-header" name="notices-header" shape="rect">Notices</a></h2>
                  <div class="topic reference nested1" id="notice"><a name="notice" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#notice" name="notice" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Notice</h3>
                           <p class="p">ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND
                              SEPARATELY, "MATERIALS") ARE BEING PROVIDED "AS IS." NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE
                              WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS
                              FOR A PARTICULAR PURPOSE. 
                           </p>
                           <p class="p">Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the
                              consequences of use of such information or for any infringement of patents or other rights of third parties that may result
                              from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications
                              mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information
                              previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems
                              without express written approval of NVIDIA Corporation.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="trademarks"><a name="trademarks" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#trademarks" name="trademarks" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Trademarks</h3>
                           <p class="p">NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation
                              in the U.S. and other countries.  Other company and product names may be trademarks of
                              the respective companies with which they are associated.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="copyright-past-to-present"><a name="copyright-past-to-present" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#copyright-past-to-present" name="copyright-past-to-present" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Copyright</h3>
                           <p class="p">© <span class="ph">2007</span>-<span class="ph">2014</span> NVIDIA
                              Corporation. All rights reserved.
                           </p>
                           <p class="p">This product includes software developed by the Syncro Soft SRL (http://www.sync.ro/).</p>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="fn"><a name="fntarg_1" href="#fnsrc_1" shape="rect"><sup>1</sup></a>  In some cases, there may be no device backtrace
               </div>
               
               <hr id="contents-end"></hr>
               
            </article>
         </div>
      </div>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/formatting/common.min.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-write.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-tracker.js"></script>
      <script type="text/javascript">var switchTo5x=true;</script><script type="text/javascript" src="http://w.sharethis.com/button/buttons.js"></script><script type="text/javascript">stLight.options({publisher: "998dc202-a267-4d8e-bce9-14debadb8d92", doNotHash: false, doNotCopy: false, hashAddressBar: false});</script></body>
</html>