Sophie

Sophie

distrib > Mageia > 7 > x86_64 > media > nonfree-updates > by-pkgid > b86a85131cc739c1c53d0b55840a4328 > files > 1566

nvidia-cuda-toolkit-devel-10.1.168-1.2.mga7.nonfree.x86_64.rpm

<!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-us" xml:lang="en-us">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
      <meta http-equiv="X-UA-Compatible" content="IE=edge"></meta>
      <meta name="copyright" content="(C) Copyright 2005"></meta>
      <meta name="DC.rights.owner" content="(C) Copyright 2005"></meta>
      <meta name="DC.Type" content="concept"></meta>
      <meta name="DC.Title" content="NVIDIA CUDA Toolkit Release Notes"></meta>
      <meta name="abstract" content="The Release Notes for the CUDA Toolkit."></meta>
      <meta name="description" content="The Release Notes for the CUDA Toolkit."></meta>
      <meta name="DC.Coverage" content="Release Notes"></meta>
      <meta name="DC.subject" content="CUDA Toolkit, CUDA Toolkit 6.0, CUDA Toolkit 6.0 libraries, CUDA Toolkit 6.0 release, CUDA Toolkit 6.0 installation, CUDA Toolkit issues, CUDA Toolkit core files, CUDA Toolkit resolved issues, CUDA Toolkit known issues, CUDA Toolkit documentation"></meta>
      <meta name="keywords" content="CUDA Toolkit, CUDA Toolkit 6.0, CUDA Toolkit 6.0 libraries, CUDA Toolkit 6.0 release, CUDA Toolkit 6.0 installation, CUDA Toolkit issues, CUDA Toolkit core files, CUDA Toolkit resolved issues, CUDA Toolkit known issues, CUDA Toolkit documentation"></meta>
      <meta name="DC.Format" content="XHTML"></meta>
      <meta name="DC.Identifier" content="abstract"></meta>
      <link rel="stylesheet" type="text/css" href="../common/formatting/commonltr.css"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/site.css"></link>
      <title>Release Notes :: CUDA Toolkit Documentation</title>
      <!--[if lt IE 9]>
      <script src="../common/formatting/html5shiv-printshiv.min.js"></script>
      <![endif]-->
      <script type="text/javascript" charset="utf-8" src="//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.ba-hashchange.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.scrollintoview.min.js"></script>
      <script type="text/javascript" src="../search/htmlFileList.js"></script>
      <script type="text/javascript" src="../search/htmlFileInfoList.js"></script>
      <script type="text/javascript" src="../search/nwSearchFnt.min.js"></script>
      <script type="text/javascript" src="../search/stemmers/en_stemmer.min.js"></script>
      <script type="text/javascript" src="../search/index-1.js"></script>
      <script type="text/javascript" src="../search/index-2.js"></script>
      <script type="text/javascript" src="../search/index-3.js"></script>
      <link rel="canonical" href="http://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/qwcode.highlight.css"></link>
   </head>
   <body>
      
      <header id="header"><span id="company">NVIDIA</span><span id="site-title">CUDA Toolkit Documentation</span><form id="search" method="get" action="search">
            <input type="text" name="search-text"></input><fieldset id="search-location">
               <legend>Search In:</legend>
               <label><input type="radio" name="search-type" value="site"></input>Entire Site</label>
               <label><input type="radio" name="search-type" value="document"></input>Just This Document</label></fieldset>
            <button type="reset">clear search</button>
            <button id="submit" type="submit">search</button></form>
      </header>
      <div id="site-content">
         <nav id="site-nav">
            <div class="category closed"><a href="../index.html" title="The root of the site.">CUDA Toolkit 
                  
                  
                  v10.1.168</a></div>
            <div class="category"><a href="index.html" title="Release Notes">Release Notes</a></div>
            <ul>
               <li>
                  <div class="section-link"><a href="#major-components">1.&nbsp;CUDA Toolkit Major Components</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#title-new-features">2.&nbsp;CUDA Release Notes </a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#cuda-general-new-features">2.1.&nbsp;General CUDA</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#title-new-cuda-tools">2.2.&nbsp;CUDA Tools</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#cuda-compiler-new-features">2.2.1.&nbsp;CUDA Compilers</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cuda-profiler-new-features">2.2.2.&nbsp;CUDA Profiler</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cuda-memcheck-new-features">2.2.3.&nbsp;CUDA-MEMCHECK</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#title-new-cuda-libraries">2.3.&nbsp;CUDA Libraries</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#cublas-new-features">2.3.1.&nbsp;cuBLAS Library</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cusolver-new-features">2.3.2.&nbsp;cuSOLVER Library</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cusparse-new-features">2.3.3.&nbsp;cuSPARSE Library</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cufft-new-features">2.3.4.&nbsp;cuFFT Library</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#curand-new-features">2.3.5.&nbsp;cuRAND Library</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#npp-new-features">2.3.6.&nbsp;NPP Library</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvjpeg-new-features">2.3.7.&nbsp;nvJPEG Library</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#deprecated-features">2.4.&nbsp;Deprecated Features </a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#title-resolved-issues">2.5.&nbsp;Resolved Issues </a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#cuda-compiler-resolved-issues">2.5.1.&nbsp;CUDA Compilers</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cuda-libraries-resolved-issues">2.5.2.&nbsp;CUDA Libraries</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#title-known-issues">2.6.&nbsp;Known Issues</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#cuda-general-known-issues">2.6.1.&nbsp;General CUDA</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cuda-tools-known-issues">2.6.2.&nbsp;CUDA Tools</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cuda-libraries-knonwn-issues">2.6.3.&nbsp;CUDA Libraries</a></div>
                           </li>
                        </ul>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#thrust-release-notes">3.&nbsp;Thrust v1.9.4 Release Notes</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#thrust-new-features">3.1.&nbsp;New Features</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#thrust-new-examples">3.2.&nbsp;New Examples</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#thrust-title-other-enhancements">3.3.&nbsp;Other Enhancements</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#thrust-tagged-pointer-enhancements">3.3.1.&nbsp;Tagged Pointer Enhancements</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#thrust-iterator-enhancements">3.3.2.&nbsp;Iterator Enhancements</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#thrust-testing-enhancements">3.3.3.&nbsp;Testing Enhancements</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#thrust-resolved-issues">3.4.&nbsp;Resolved Issues</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#tegra-cuda-release-notes">4.&nbsp;CUDA Tegra Release Notes</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#tegra-cuda-new-features">4.1.&nbsp;New Features </a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#tegra-cuda-known-issues">4.2.&nbsp;Known Issues and Limitations</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#tegra-cuda-resolved-issues">4.3.&nbsp;Resolved Issues</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#tegra-cuda-deprecated-issues">4.4.&nbsp;Deprecated Issues</a></div>
                     </li>
                  </ul>
               </li>
            </ul>
         </nav>
         <div id="resize-nav"></div>
         <nav id="search-results">
            <h2>Search Results</h2>
            <ol></ol>
         </nav>
         
         <div id="contents-container">
            <div id="breadcrumbs-container">
               <div id="release-info">Release Notes
                  (<a href="../../pdf/CUDA_Toolkit_Release_Notes.pdf">PDF</a>)
                  -
                   
                  
                  
                  v10.1.168
                  (<a href="https://developer.nvidia.com/cuda-toolkit-archive">older</a>)
                  -
                  Last updated April 24, 2019
                  -
                  <a href="mailto:CUDAIssues@nvidia.com?subject=CUDA Toolkit Documentation Feedback: Release Notes">Send Feedback</a></div>
            </div>
            <article id="contents">
               <div class="topic nested0" id="abstract"><a name="abstract" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#abstract" name="abstract" shape="rect">NVIDIA CUDA Toolkit Release Notes</a></h2>
                  <div class="body conbody">
                     <p class="shortdesc">The Release Notes for the CUDA Toolkit.</p>
                  </div>
               </div>
               <div class="topic concept nested0" id="major-components"><a name="major-components" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#major-components" name="major-components" shape="rect">1.&nbsp;CUDA Toolkit Major Components</a></h2>
                  <div class="body conbody">
                     <p class="p">This section provides an overview of the major components of the CUDA Toolkit and points
                        			to their locations after installation.
                     </p>
                     <dl class="dl">
                        <dt class="dt dlterm">Compiler</dt>
                        <dd class="dd">The CUDA-C and CUDA-C++ compiler, <samp class="ph codeph">nvcc</samp>, is found in the
                           						<samp class="ph codeph">bin/</samp> directory. It is built on top of the NVVM optimizer,
                           					which is itself built on top of the LLVM compiler infrastructure. Developers who
                           					want to target NVVM directly can do so using the Compiler SDK, which is
                           					available in the <samp class="ph codeph">nvvm/</samp> directory.
                        </dd>
                        <dd class="dd">Please note that the following files are compiler-internal and subject to change without any prior notice.<a name="major-components__ul_iyz_wy3_tm" shape="rect">
                              <!-- --></a><ul class="ul" id="major-components__ul_iyz_wy3_tm">
                              <li class="li liexpand">any file in <samp class="ph codeph">include/crt</samp> and <samp class="ph codeph">bin/crt</samp></li>
                              <li class="li liexpand"><samp class="ph codeph">include/common_functions.h</samp>, <samp class="ph codeph">include/device_double_functions.h</samp>, <samp class="ph codeph">include/device_functions.h</samp>, <samp class="ph codeph">include/host_config.h</samp>, <samp class="ph codeph">include/host_defines.h</samp>, and <samp class="ph codeph">include/math_functions.h</samp></li>
                              <li class="li liexpand"><samp class="ph codeph">nvvm/bin/cicc</samp></li>
                              <li class="li liexpand"><samp class="ph codeph">bin/cudafe++</samp>, <samp class="ph codeph">bin/bin2c</samp>, and <samp class="ph codeph">bin/fatbinary</samp></li>
                           </ul>
                        </dd>
                        <dt class="dt dlterm">Tools</dt>
                        <dd class="dd">The following development tools are available in the <samp class="ph codeph">bin/</samp> directory (except
                           					for Nsight Visual Studio Edition (VSE) which is installed as a plug-in to Microsoft
                           					Visual Studio, Nsight Compute and Nsight Systems are available in a separate
                           						directory).<a name="major-components__ul_iyz_wy3_tn" shape="rect">
                              <!-- --></a><ul class="ul" id="major-components__ul_iyz_wy3_tn">
                              <li class="li liexpand">IDEs: <samp class="ph codeph">nsight</samp> (Linux, Mac), Nsight VSE (Windows)
                              </li>
                              <li class="li liexpand">Debuggers: <samp class="ph codeph">cuda-memcheck</samp>, <samp class="ph codeph">cuda-gdb</samp>
                                 							(Linux), Nsight VSE (Windows)
                              </li>
                              <li class="li liexpand">Profilers: Nsight Systems, Nsight Compute, <samp class="ph codeph">nvprof</samp>,
                                 								<samp class="ph codeph">nvvp</samp>, Nsight VSE (Windows)
                              </li>
                              <li class="li liexpand">Utilities: <samp class="ph codeph">cuobjdump</samp>, <samp class="ph codeph">nvdisasm</samp>,
                                 								<samp class="ph codeph">gpu-library-advisor</samp></li>
                           </ul>
                        </dd>
                        <dt class="dt dlterm">Libraries</dt>
                        <dd class="dd">The scientific and utility libraries listed below are available in the <samp class="ph codeph">lib/</samp>
                           					directory (DLLs on Windows are in <samp class="ph codeph">bin/</samp>), and their interfaces
                           					are available in the <samp class="ph codeph">include/</samp> directory.<a name="major-components__ul_ljm_jsj_tm" shape="rect">
                              <!-- --></a><ul class="ul" id="major-components__ul_ljm_jsj_tm">
                              <li class="li liexpand"><samp class="ph codeph">cublas</samp> (BLAS)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">cublas_device</samp> (BLAS Kernel Interface)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">cuda_occupancy</samp> (Kernel Occupancy Calculation [header file implementation])
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">cudadevrt</samp> (CUDA Device Runtime)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">cudart</samp> (CUDA Runtime)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">cufft</samp> (Fast Fourier Transform [FFT])
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">cupti</samp> (CUDA Profiling Tools Interface)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">curand</samp> (Random Number Generation)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">cusolver</samp> (Dense and Sparse Direct Linear Solvers and
                                 							Eigen Solvers)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">cusparse</samp> (Sparse Matrix)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">nvJPEG</samp> (JPEG encoding/decoding)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">npp</samp> (NVIDIA Performance Primitives [image and signal processing])
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">nvblas</samp> ("Drop-in" BLAS)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">nvcuvid</samp> (CUDA Video Decoder [Windows, Linux])
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">nvgraph</samp> (CUDA nvGRAPH [accelerated graph analytics])
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">nvml</samp> (NVIDIA Management Library)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">nvrtc</samp> (CUDA Runtime Compilation)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">nvtx</samp> (NVIDIA Tools Extension)
                              </li>
                              <li class="li liexpand"><samp class="ph codeph">thrust</samp> (Parallel Algorithm Library [header file implementation])
                              </li>
                           </ul>
                        </dd>
                        <dt class="dt dlterm">CUDA Samples</dt>
                        <dd class="dd">Code samples that illustrate how to use various CUDA and library APIs are
                           					available in the <samp class="ph codeph">samples/</samp> directory on Linux and Mac, and are
                           					installed to <samp class="ph codeph">C:\ProgramData\NVIDIA Corporation\CUDA Samples</samp> on
                           					Windows. On Linux and Mac, the <samp class="ph codeph">samples/</samp> directory is read-only
                           					and the samples must be copied to another location if they are to be modified.
                           					Further instructions can be found in the <cite class="cite">Getting Started Guides</cite> for
                           					Linux and Mac.
                        </dd>
                        <dt class="dt dlterm">Documentation</dt>
                        <dd class="dd">The most current version of these release notes can be found
                           					online at <a class="xref" href="http://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html" target="_blank" shape="rect">http://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html</a>. Also, the <samp class="ph codeph">version.txt</samp>
                           					file in the root directory of the toolkit will contain the version and build
                           					number of the installed toolkit.
                        </dd>
                        <dd class="dd">Documentation can be found in PDF form in the <samp class="ph codeph">doc/pdf/</samp>
                           					directory, or in HTML form at <samp class="ph codeph">doc/html/index.html</samp> and online at
                           						<a class="xref" href="http://docs.nvidia.com/cuda/index.html" target="_blank" shape="rect">http://docs.nvidia.com/cuda/index.html</a>.
                        </dd>
                        <dt class="dt dlterm">CUDA Driver</dt>
                        <dd class="dd">Running a CUDA application requires the system with at least one CUDA capable GPU and a driver that is compatible with the
                           CUDA Toolkit. See <a class="xref" href="index.html#major-components__table-cuda-toolkit-driver-versions" shape="rect">Table 1</a>. For more information various GPU products that are CUDA capable, visit <a class="xref" href="https://developer.nvidia.com/cuda-gpus" target="_blank" shape="rect">https://developer.nvidia.com/cuda-gpus</a>. Each release of the CUDA Toolkit requires a minimum version of the CUDA driver. The CUDA driver is backward compatible,
                           meaning that applications compiled against a particular version of the CUDA will continue to work on subsequent (later) driver
                           releases. More information on compatibility can be found at <a class="xref" href="https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#cuda-runtime-and-driver-api-version" target="_blank" shape="rect">https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#cuda-runtime-and-driver-api-version</a>.
                           
                        </dd>
                        <dd class="dd">
                           <div class="tablenoborder"><a name="major-components__table-cuda-toolkit-driver-versions" shape="rect">
                                 <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="major-components__table-cuda-toolkit-driver-versions" class="table" frame="border" border="1" rules="all">
                                 <caption><span class="tablecap">Table 1. CUDA Toolkit and Compatible Driver Versions</span></caption>
                                 <thead class="thead" align="left">
                                    <tr class="row">
                                       <th class="entry" align="center" valign="top" id="d54e397" rowspan="1" colspan="1">CUDA Toolkit</th>
                                       <th class="entry" align="center" valign="top" id="d54e400" rowspan="1" colspan="1">Linux x86_64 Driver Version</th>
                                       <th class="entry" align="center" valign="top" id="d54e403" rowspan="1" colspan="1">Windows x86_64 Driver Version</th>
                                    </tr>
                                 </thead>
                                 <tbody class="tbody">
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 10.1.105</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 418.39</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 418.96</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 10.0.130</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 410.48</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 411.31</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 9.2 (9.2.148 Update 1)</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 396.37</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 398.26</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 9.2 (9.2.88)</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 396.26</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 397.44</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 9.1 (9.1.85)</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 390.46</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 391.29</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 9.0 (9.0.76)</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 384.81</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 385.54</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 8.0 (8.0.61 GA2)</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 375.26</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 376.51</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 8.0 (8.0.44)</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 367.48</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 369.30</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 7.5 (7.5.16)</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 352.31</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 353.66</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" headers="d54e397" rowspan="1" colspan="1">CUDA 7.0 (7.0.28)</td>
                                       <td class="entry" valign="top" headers="d54e400" rowspan="1" colspan="1">&gt;= 346.46</td>
                                       <td class="entry" valign="top" headers="d54e403" rowspan="1" colspan="1">&gt;= 347.62</td>
                                    </tr>
                                 </tbody>
                              </table>
                           </div>
                        </dd>
                        <dd class="dd"> 
                           For convenience, the NVIDIA driver is installed as part of the CUDA Toolkit installation. Note that this driver is for development
                           purposes and is not recommended for use in production with Tesla GPUs. For running CUDA applications in production with Tesla
                           GPUs, it is recommended to download the latest driver for Tesla GPUs from the NVIDIA driver downloads site at <a class="xref" href="http://www.nvidia.com/drivers" target="_blank" shape="rect">http://www.nvidia.com/drivers</a>. 
                           
                        </dd>
                        <dd class="dd">
                           During the installation of the CUDA Toolkit, the installation of the NVIDIA driver may be skipped on Windows (when using the
                           interactive or silent installation) or on Linux (by using meta packages). For more information on customizing the install
                           process on Windows, see <a class="xref" href="http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html#install-cuda-software" target="_blank" shape="rect">http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html#install-cuda-software</a>. For meta packages on Linux, see <a class="xref" href="https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#package-manager-metas" target="_blank" shape="rect">https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#package-manager-metas</a></dd>
                        <dt class="dt dlterm">CUDA-GDB Sources</dt>
                        <dd class="dd">CUDA-GDB sources are available as follows:</dd>
                        <dd class="dd"><a name="major-components__ul_br5_hgn_lm" shape="rect">
                              <!-- --></a><ul class="ul" id="major-components__ul_br5_hgn_lm">
                              <li class="li liexpand">For CUDA Toolkit 7.0 and newer, in the installation directory <samp class="ph codeph">extras/</samp>.
                                 							The directory is created by default during the toolkit installation
                                 							unless the <samp class="ph codeph">.rpm</samp> or <samp class="ph codeph">.deb</samp> package
                                 							installer is used. In this case, the <samp class="ph codeph">cuda-gdb-src</samp>
                                 							package must be manually installed.
                              </li>
                              <li class="li liexpand">For CUDA Toolkit 6.5, 6.0, and 5.5, at <a class="xref" href="https://github.com/NVIDIA/cuda-gdb" target="_blank" shape="rect">https://github.com/NVIDIA/cuda-gdb</a>.
                              </li>
                              <li class="li liexpand">For CUDA Toolkit 5.0 and earlier, at <a class="xref" href="ftp://download.nvidia.com/CUDAOpen64/" target="_blank" shape="rect">ftp://download.nvidia.com/CUDAOpen64/</a>.
                              </li>
                              <li class="li liexpand">Upon request by sending an e-mail to <a class="xref" href="mailto:oss-requests@nvidia.com" target="_blank" shape="rect">mailto:oss-requests@nvidia.com</a>.
                              </li>
                           </ul>
                        </dd>
                     </dl>
                  </div>
               </div>
               <div class="topic concept nested0" id="title-new-features"><a name="title-new-features" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#title-new-features" name="title-new-features" shape="rect">2.&nbsp;CUDA Release Notes
                        </a></h2>
                  <div class="body conbody">
                     <p class="p">The release notes for the CUDA Toolkit can be found online at <a class="xref" href="http://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html" target="_blank" shape="rect">http://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html</a>. 
                        
                     </p>
                  </div>
                  <div class="topic concept nested1" id="cuda-general-new-features"><a name="cuda-general-new-features" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#cuda-general-new-features" name="cuda-general-new-features" shape="rect">2.1.&nbsp;General CUDA</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li liexpand">Introducing NVIDIA® Nsight™ Systems, a system-wide performance analysis tool designed
                              				to visualize an application’s algorithms. This tool will help you identify the largest
                              				opportunities to optimize, and efficiently tune to scale across any quantity or size of
                              				CPUs and GPUs—from a large server to the smallest SoC. See more <a class="xref" href="https://docs.nvidia.com/nsight-systems/" target="_blank" shape="rect">here</a>. 
                           </li>
                           <li class="li liexpand">Added 6.4 version of the Parallel Thread Execution instruction set architecture (ISA).
                              				For more details on new (<samp class="ph codeph">noreturn</samp>, <samp class="ph codeph">mma</samp>) and deprecated
                              				instructions (<samp class="ph codeph">satfinite</samp>, non-sync versions of <samp class="ph codeph">shfl</samp> and
                              					<samp class="ph codeph">vote</samp>), see <a class="xref" href="https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-isa-version-6-4" target="_blank" shape="rect"><u class="ph u">this section</u></a> in the PTX
                              				documentation.
                           </li>
                           <li class="li liexpand">
                              <p dir="ltr" class="p" id="cuda-general-new-features__docs-internal-guid-240fefc7-7fff-500a-957f-8fa75b4a034c"><a name="cuda-general-new-features__docs-internal-guid-240fefc7-7fff-500a-957f-8fa75b4a034c" shape="rect">
                                    <!-- --></a>The following
                                 					new operating systems are supported by CUDA. See the System Requirements section in
                                 					the NVIDIA CUDA Installation <a class="xref" href="https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html" target="_blank" shape="rect"><u class="ph u"><a class="xref" href="https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html" target="_blank" shape="rect">Guide</a></u></a> for Linux for a full
                                 					list of supported operating systems.
                              </p><a name="cuda-general-new-features__ul_bbb_3vj_sgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="cuda-general-new-features__ul_bbb_3vj_sgb">
                                 <li class="li liexpand" dir="ltr">Ubuntu 18.10</li>
                                 <li class="li liexpand" dir="ltr">RHEL 7.6</li>
                                 <li class="li liexpand" dir="ltr">Fedora 29</li>
                                 <li class="li liexpand" dir="ltr">SUSE SLES 12.4</li>
                                 <li class="li liexpand" dir="ltr">Windows Server 2019</li>
                                 <li class="li liexpand" dir="ltr">Windows 10 (October 2018 Update)</li>
                              </ul>
                           </li>
                           <li class="li liexpand">Improved the scalability of <samp class="ph codeph">cudaFree</samp>* APIs on multi-GPU systems.
                           </li>
                           <li class="li liexpand">Added support for cooperative group kernels (using the
                              					<samp class="ph codeph">cudaLaunchCooperativeKernel</samp> API) with MPS.
                           </li>
                           <li class="li liexpand">Relaxed IPC restrictions so that P2P can be enabled between devices that are not set by
                              				CUDA_VISIBLE_DEVICES.
                           </li>
                           <li class="li liexpand">In CUDA 10.1 the <a class="xref" href="https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g3f51e3575c2178246db0a94a430e0038" target="_blank" shape="rect"><u class="ph u"><a class="xref" href="https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g3f51e3575c2178246db0a94a430e0038" target="_blank" shape="rect">CUDA Runtime API error codes</a></u></a>
                              				are renumbered to match, wherever possible, their CUDA Driver API equivalents. 
                           </li>
                           <li class="li liexpand">Added GPU accounting, on Volta only, to keep track of open compute contexts and GPU
                              				utilization. This data is updated when the driver is loaded, but can be retrieved in
                              				driver-loaded and driver-unloaded modes via Out of Band (OOB).
                           </li>
                           <li class="li liexpand">Added an out-of-band mechanism to fetch the instantaneous GPU and memory utilization See <a class="xref" href="https://apps.nvinfo.nvidia.com/pid/contentlibraries/detail?id=1001187" target="_blank" shape="rect"><u class="ph u"><a class="xref" href="https://apps.nvinfo.nvidia.com/pid/contentlibraries/detail?id=1001187" target="_blank" shape="rect">SMBPBI spec</a></u></a> for the documentation.
                           </li>
                           <li class="li liexpand">Added the ability to query GPU NVLink error rates / counts via out of band (OOB) with or without a driver present. See <a class="xref" href="https://apps.nvinfo.nvidia.com/pid/contentlibraries/detail?id=1001187" target="_blank" shape="rect"><u class="ph u"><a class="xref" href="https://apps.nvinfo.nvidia.com/pid/contentlibraries/detail?id=1001187" target="_blank" shape="rect">SMBPBI spec</a></u></a> for the documentation.
                           </li>
                           <li class="li liexpand">Added support for installing CUDA using runfiles on POWER (ppc64le) platforms.</li>
                           <li class="li liexpand">Added new CUDA samples for CUDA Graph APIs.</li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="title-new-cuda-tools"><a name="title-new-cuda-tools" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#title-new-cuda-tools" name="title-new-cuda-tools" shape="rect">2.2.&nbsp;CUDA Tools</a></h3>
                     <div class="topic concept nested2" id="cuda-compiler-new-features"><a name="cuda-compiler-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-compiler-new-features" name="cuda-compiler-new-features" shape="rect">2.2.1.&nbsp;CUDA Compilers</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li liexpand">The following compilers are supported as host compilers in <samp class="ph codeph">nvcc</samp>: 
                                 				
                                 <ul class="ul">
                                    <li class="li liexpand">GCC 8.x</li>
                                    <li class="li liexpand">Clang 7.0</li>
                                    <li class="li liexpand" dir="ltr">Microsoft Visual Studio 2017 (RTW, and all updates)</li>
                                    <li class="li liexpand" dir="ltr">Microsoft Visual Studio 2019 (Preview releases)</li>
                                    <li class="li liexpand" dir="ltr">PGI 19.x</li>
                                    <li class="li liexpand" dir="ltr">ICC 19.0</li>
                                    <li class="li liexpand">Xcode 10.1 (10B61)</li>
                                 </ul>
                              </li>
                              <li class="li liexpand">New functions <samp class="ph codeph">__isShared()</samp>, <samp class="ph codeph">__isConstant()</samp> and
                                 					<samp class="ph codeph">__isLocal()</samp> have been added, to check if a generic pointer points to
                                 				an object in<samp class="ph codeph"> __shared__</samp>, <samp class="ph codeph">__constant__ </samp>or local memory,
                                 				respectively. These functions are documented in the CUDA C Programming Guide, along with
                                 				the existing <samp class="ph codeph">__isGlobal()</samp> function.
                              </li>
                              <li class="li liexpand">The existing API functions <samp class="ph codeph">nvrtcGetLoweredName</samp> and
                                 					<samp class="ph codeph">nvrtcAddNameExpression</samp> have been enhanced to allow looking up the
                                 				mangled (lowered) name of <samp class="ph codeph">__constant__ </samp>and <samp class="ph codeph">__device__</samp>
                                 				variables. Details and example here: <a class="xref" href="https://docs.nvidia.com/cuda/nvrtc/index.html#accessing-lowered-names" target="_blank" shape="rect"><u class="ph u">https://docs.nvidia.com/cuda/nvrtc/index.html#accessing-lowered-names</u></a>.
                              </li>
                              <li class="li liexpand">
                                 <p class="p"><samp class="ph codeph">nvcc</samp> now supports the "-MF" and "-MM" flags related to dependency
                                    					generation. See below the description of the new flags from "nvcc --help":
                                 </p><a name="cuda-compiler-new-features__ul_upx_3xj_sgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="cuda-compiler-new-features__ul_upx_3xj_sgb">
                                    <li class="li liexpand"><samp class="ph codeph">--generate-nonsystem-dependencies (-MM) </samp>: Same as
                                       							<samp class="ph codeph">--generate-dependencies</samp> but skip header files found in system
                                       						directories (Linux only).
                                    </li>
                                    <li class="li liexpand"><samp class="ph codeph">--dependency-output (-MF)</samp>: Specify the output file for the
                                       						dependency file generated with -M or -MM. If this option is not specified, the
                                       						output is the same as if -E has been specified. 
                                    </li>
                                 </ul>
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cuda-profiler-new-features"><a name="cuda-profiler-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-profiler-new-features" name="cuda-profiler-new-features" shape="rect">2.2.2.&nbsp;CUDA Profiler</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li liexpand">For new features in Visual Profiler and <samp class="ph codeph">nvprof</samp>, see the <a class="xref" href="https://docs.nvidia.com/cuda/profiler-users-guide/index.html#whats-new" target="_blank" shape="rect"><u class="ph u">What's New</u></a> section in the Profiler
                                 				User’s Guide.
                              </li>
                              <li class="li liexpand">For new features available in CUPTI, see the <a class="xref" href="https://docs.nvidia.com/cupti/Cupti/r_overview.html#r_whats_new" target="_blank" shape="rect"><u class="ph u"><a class="xref" href="https://docs.nvidia.com/cupti/Cupti/r_overview.html#r_whats_new" target="_blank" shape="rect">What's New</a></u></a> section in the
                                 				CUPTI documentation.
                              </li>
                              <li class="li liexpand">For system wide profiling, use Nsight Systems. Refer to the Nsight Systems <a class="xref" href="https://docs.nvidia.com/nsight-systems/" target="_blank" shape="rect"><u class="ph u">Release Notes</u></a>.
                              </li>
                              <li class="li liexpand">For profiling specific CUDA kernels, use Nsight Compute. Refer to the Nsight Compute
                                 					<a class="xref" href="https://docs.nvidia.com/nsight-compute/ReleaseNotes/index.html" target="_blank" shape="rect"><u class="ph u">Release Notes</u></a>.
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cuda-memcheck-new-features"><a name="cuda-memcheck-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-memcheck-new-features" name="cuda-memcheck-new-features" shape="rect">2.2.3.&nbsp;CUDA-MEMCHECK</a></h3>
                        <div class="body conbody">
                           <div class="p"><a name="cuda-memcheck-new-features__ul_q4v_pyj_sgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="cuda-memcheck-new-features__ul_q4v_pyj_sgb">
                                 <li class="li">For new features in CUDA-MEMCHECK, see the <a class="xref" href="http://docs.nvidia.com/cuda/cuda-memcheck/index.html#release-notes" target="_blank" shape="rect">Release Notes</a> in the CUDA-MEMCHECK
                                    					documentation.
                                 </li>
                              </ul>
                           </div>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="title-new-cuda-libraries"><a name="title-new-cuda-libraries" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#title-new-cuda-libraries" name="title-new-cuda-libraries" shape="rect">2.3.&nbsp;CUDA Libraries</a></h3>
                     <div class="body conbody">
                        <div class="abstract"></div>
                        <p dir="ltr" class="p" id="title-new-cuda-libraries__docs-internal-guid-a9015722-7fff-a276-36ca-fd9580dbeda9"><a name="title-new-cuda-libraries__docs-internal-guid-a9015722-7fff-a276-36ca-fd9580dbeda9" shape="rect">
                              <!-- --></a>This release of the CUDA
                           			toolkit is packaged with libraries that deliver new and extended functionality, bug fixes,
                           			and performance improvements for single and multi-GPU environments. 
                        </p>
                        <p dir="ltr" class="p">Also in this release the <samp class="ph codeph">soname</samp> of the libraries has been modified
                           			to not include the minor toolkit version number. For example, the cuFFT library
                           				<samp class="ph codeph">soname</samp> has changed from <samp class="ph codeph">libcufft.so.10.1</samp> to
                           				<samp class="ph codeph">libcufft.so.10</samp>. This is done to facilitate any future library updates
                           			that do not include API breaking changes without the need to relink.
                        </p>
                     </div>
                     <div class="topic concept nested2" id="cublas-new-features"><a name="cublas-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cublas-new-features" name="cublas-new-features" shape="rect">2.3.1.&nbsp;cuBLAS Library</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li liexpand">With this release, on Linux systems, the cuBLAS libraries listed below are now installed in
                                 				the <samp class="ph codeph">/usr/lib/&lt;arch&gt;-linux-gnu/</samp> or <samp class="ph codeph">/usr/lib64/</samp>
                                 				directories as shared and static libraries. Their interfaces are available in the
                                 					<samp class="ph codeph">/usr/include</samp> directory: <a name="cublas-new-features__ul_ism_2zj_sgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="cublas-new-features__ul_ism_2zj_sgb">
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">cublas (BLAS)</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">cublasLt (new Matrix Multiply library)</p>
                                    </li>
                                 </ul>
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p" id="cublas-new-features__docs-internal-guid-fc1e5612-7fff-7cc2-de35-d998dcc14647"><a name="cublas-new-features__docs-internal-guid-fc1e5612-7fff-7cc2-de35-d998dcc14647" shape="rect">
                                       <!-- --></a>Note that the
                                    					new installation locations of cuBLAS libraries are different from the past versions.
                                    					In the past versions the libraries were installed in directories under the main
                                    					toolkit installation directory. 
                                 </p>
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p">Package managers on Linux OSs will remove the previous version of cuBLAS
                                    					and update to the new libraries in the new location. For linking and execution make
                                    					sure the new location is specified within your paths such as LD_LIBRARY_PATH.
                                 </p>
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p" id="cublas-new-features__docs-internal-guid-18ba3ff3-7fff-d139-7b14-58b1cf3855c6"><a name="cublas-new-features__docs-internal-guid-18ba3ff3-7fff-d139-7b14-58b1cf3855c6" shape="rect">
                                       <!-- --></a>With this update, the
                                    					versioning scheme of the cuBLAS library has changed to a 4-digit version. Because of
                                    					this change, version numbers might differ between the CUDA toolkit and cuBLAS
                                    					libraries in future releases. For the new 4-digit version:
                                 </p><a name="cublas-new-features__ul_njf_kzj_sgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="cublas-new-features__ul_njf_kzj_sgb">
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">The first three digits follow semantic versioning, and </p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">The last digit is the build number. </p>
                                    </li>
                                 </ul>
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p" id="cublas-new-features__docs-internal-guid-132f4565-7fff-2e4d-4b1f-289d0e7c8dc9"><a name="cublas-new-features__docs-internal-guid-132f4565-7fff-2e4d-4b1f-289d0e7c8dc9" shape="rect">
                                       <!-- --></a>A new library,
                                    					the cuBLASLt, is added. The cuBLASLt is a new lightweight library dedicated to
                                    					GEneral Matrix-to-matrix Multiply (GEMM) operations with a new flexible API. This new
                                    					library adds flexibility in matrix data layouts, input types, compute types, and also
                                    					in choosing the algorithmic implementations and heuristics through parameter
                                    					programmability. Read more at:<a class="xref" href="http://docs.nvidia.com/cuda/cublas/index.html#using-the-cublasLt-api" target="_blank" shape="rect"><u class="ph u">
                                          							http://docs.nvidia.com/cuda/cublas/index.html#using-the-cublasLt-api</u></a>.
                                 </p>
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p" id="cublas-new-features__docs-internal-guid-8985de4b-7fff-a0d6-0e30-6d1d0a72a07a"><a name="cublas-new-features__docs-internal-guid-8985de4b-7fff-a0d6-0e30-6d1d0a72a07a" shape="rect">
                                       <!-- --></a>The new
                                    					cuBLASLt library is packaged as a separate binary and a header file. Also, the
                                    					cuBLASLt now adds support for:
                                 </p><a name="cublas-new-features__ul_oks_5zj_sgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="cublas-new-features__ul_oks_5zj_sgb">
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">Utilization of IMMA tensor core operations on Turing GPUs for int8
                                          							input matrices.
                                       </p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">FP16 half-precision CGEMM split-complex matrix multiplies using
                                          							tensor cores on Volta and Turing GPUs.
                                       </p>
                                    </li>
                                 </ul>
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cusolver-new-features"><a name="cusolver-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cusolver-new-features" name="cusolver-new-features" shape="rect">2.3.2.&nbsp;cuSOLVER Library</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li liexpand">For symmetric dense eigensolver:<a name="cusolver-new-features__ul_aj2_r1k_sgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="cusolver-new-features__ul_aj2_r1k_sgb">
                                    <li dir="ltr" class="li">
                                       <p dir="ltr" class="p">Added a new selective eigensolver functionality for standard and
                                          							generalized eigenvalue problems: SYEVDX and SYGVDX
                                       </p>
                                    </li>
                                    <li dir="ltr" class="li">
                                       <p dir="ltr" class="p">Improved the performance for full eigenspectrum eigensolver.</p>
                                    </li>
                                 </ul>
                              </li>
                              <li class="li liexpand">Added a new batched GESVDA API that computes the approximate singular value
                                 				decomposition of a tall skinny <samp class="ph codeph">m×n</samp> matrix A.
                              </li>
                              <li class="li liexpand">Added a new POTRI API that computes the inverse of a symmetric positive definite
                                 				matrix, using the Cholesky factorization computed by DPOTRF.
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cusparse-new-features"><a name="cusparse-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cusparse-new-features" name="cusparse-new-features" shape="rect">2.3.3.&nbsp;cuSPARSE Library</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li">Added a new generic Sparse x Dense Matrix Multiply (SpMM) APIs that encapsulates the
                                 				functionality of many legacy APIs.
                              </li>
                              <li class="li">
                                 <p dir="ltr" class="p" id="cusparse-new-features__docs-internal-guid-4ae8e68a-7fff-d641-8a28-f4d9aa758dcc"><a name="cusparse-new-features__docs-internal-guid-4ae8e68a-7fff-d641-8a28-f4d9aa758dcc" shape="rect">
                                       <!-- --></a>Added a new
                                    					COO matrix-matrix multiplication (cooMM) implementation with:
                                 </p><a name="cusparse-new-features__ul_mth_z1k_sgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="cusparse-new-features__ul_mth_z1k_sgb">
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">Deterministic and non-deterministic variants</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">Batched SpMM</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">Support for multiple data type combinations</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">Speed-ups w.r.t. csrMM for matrices with highly irregular
                                          							nnzs/row
                                       </p>
                                    </li>
                                 </ul>
                              </li>
                              <li class="li">Added two new algorithms for <samp class="ph codeph">csr2csc</samp> format conversions with improved
                                 				performance and reduced memory use.
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cufft-new-features"><a name="cufft-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cufft-new-features" name="cufft-new-features" shape="rect">2.3.4.&nbsp;cuFFT Library</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li">Improved the performance and scalability for the following use cases:
                                 				<a name="cufft-new-features__ul_fbl_fbk_sgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="cufft-new-features__ul_fbl_fbk_sgb">
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">multi-GPU non-power of 2 transforms</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">R2C and Z2D odd sized transforms</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">2D transforms with small sizes and large batch counts</p>
                                    </li>
                                 </ul>
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="curand-new-features"><a name="curand-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#curand-new-features" name="curand-new-features" shape="rect">2.3.5.&nbsp;cuRAND Library</a></h3>
                        <div class="body conbody"><a name="curand-new-features__ul_dm4_vbw_cn" shape="rect">
                              <!-- --></a><ul class="ul" id="curand-new-features__ul_dm4_vbw_cn">
                              <li class="li liexpand">Improved the performance of the following random number generators:
                                 				<a name="curand-new-features__ul_ap5_lbk_sgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="curand-new-features__ul_ap5_lbk_sgb">
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">MTGP32</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">MRG32k3a</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">Sobol32 and Scrambled Sobol32</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">Sobol64 and Scrambled Sobol64</p>
                                    </li>
                                 </ul>
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="npp-new-features"><a name="npp-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#npp-new-features" name="npp-new-features" shape="rect">2.3.6.&nbsp;NPP Library</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li liexpand">Some of the most commonly used image processing functions were extended to support
                                 				the FP16 (<samp class="ph codeph">__half</samp>) data type on GPU architectures Volta and beyond.
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p" id="npp-new-features__docs-internal-guid-f00c3201-7fff-45c4-1ec3-2d5fcf30ead2"><a name="npp-new-features__docs-internal-guid-f00c3201-7fff-45c4-1ec3-2d5fcf30ead2" shape="rect">
                                       <!-- --></a>Added support
                                    					for application-managed stream contexts. Application-managed stream contexts make NPP
                                    					truely stateless internally, allowing for rapid, stream context switching with no
                                    					overhead. 
                                 </p>
                              </li>
                              <li class="li liexpand">While it is recommended that all new NPP application code use application-managed
                                 				stream contexts, existing application code can continue to use
                                 					<samp class="ph codeph">nppSetStream()</samp> and <samp class="ph codeph">nppGetStream()</samp> to manage stream
                                 				contexts (also with no overhead now). But over time NPP will likely deprecate the older
                                 				non-application-managed stream context API.
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="nvjpeg-new-features"><a name="nvjpeg-new-features" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvjpeg-new-features" name="nvjpeg-new-features" shape="rect">2.3.7.&nbsp;nvJPEG Library</a></h3>
                        <div class="body conbody"><a name="nvjpeg-new-features__ul_dm4_vbw_cn" shape="rect">
                              <!-- --></a><ul class="ul" id="nvjpeg-new-features__ul_dm4_vbw_cn">
                              <li class="li liexpand">Added baseline encoding functionality to the library that will be extended in future
                                 				releases.
                              </li>
                              <li class="li liexpand">Added new batched decoding that uses GPU acceleration for all phases of computation.
                                 				This delivers significant performance gains for large batches of images where most
                                 				images are baseline encoded JPEG images.
                              </li>
                              <li class="li liexpand">Added new APIs for pinned memory allocator and for memory overallocations.</li>
                              <li class="li liexpand">The nvJPEG library is now added to the Linux ppc64le CUDA Toolkit distributions.</li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="deprecated-features"><a name="deprecated-features" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#deprecated-features" name="deprecated-features" shape="rect">2.4.&nbsp;Deprecated Features
                           </a></h3>
                     <div class="body conbody">
                        <div class="p">The following features are deprecated in the current release of the CUDA software. The
                           features still work in the current release, but their documentation may have been
                           removed, and they will become officially unsupported in a future release. We recommend
                           that developers employ alternative solutions to these features in their software. 
                           <dl class="dl">
                              <dt class="dt dlterm">General CUDA</dt>
                              <dd class="dd">
                                 <ul class="ul">
                                    <li class="li liexpand">Nsight Eclipse Edition standalone is deprecated in CUDA 10.1, and will be
                                       								dropped in the release that immediately follows CUDA 10.1. 
                                    </li>
                                    <li class="li liexpand">Support for RHEL 6.x is deprecated with CUDA 10.1. It may be dropped
                                       								in a future release of CUDA. Customers are encouraged to adopt RHEL 7.x to
                                       								use new versions of CUDA.
                                    </li>
                                    <li class="li liexpand">
                                       <p dir="ltr" class="p" id="deprecated-features__docs-internal-guid-8cc53236-7fff-dbc1-b8a5-be729a1a938f"><a name="deprecated-features__docs-internal-guid-8cc53236-7fff-dbc1-b8a5-be729a1a938f" shape="rect">
                                             <!-- --></a>The following compilers are no longer supported as host compilers for
                                          										<samp class="ph codeph">nvcc</samp></p><a name="deprecated-features__ul_qfz_xhn_tgb" shape="rect">
                                          <!-- --></a><ul class="ul" id="deprecated-features__ul_qfz_xhn_tgb">
                                          <li class="li liexpand" dir="ltr">PGI 17.x</li>
                                          <li class="li liexpand" dir="ltr">Microsoft Visual Studio 2010</li>
                                          <li class="li liexpand" dir="ltr">Clang versions lower than 3.7</li>
                                       </ul>
                                    </li>
                                    <li class="li liexpand">Microsoft Visual Studio versions 2011, 2012 and 2013 are now deprecated as host compilers
                                       								for nvcc. Support for these compilers may be removed in a future release of
                                       								CUDA.
                                    </li>
                                    <li class="li liexpand">32-bit tools are no longer supported starting with CUDA 10.0.</li>
                                    <li class="li liexpand">NVIDIA GPU Library Advisor (<samp class="ph codeph">gpu-library-advisor</samp>) is now
                                       								deprecated and will be removed in a future release of the toolkit.
                                    </li>
                                    <li class="li liexpand">The non-sync definitions of warp shuffle functions ( <samp class="ph codeph">__shfl</samp>,
                                       									<samp class="ph codeph">__shfl_up</samp>, <samp class="ph codeph">__shfl_down</samp>, and
                                       									<samp class="ph codeph">__shfl_xor</samp> ) and warp vote functions
                                       									(<samp class="ph codeph">__any</samp>, <samp class="ph codeph">__all</samp>,
                                       									<samp class="ph codeph">__ballot</samp>) have been removed when compilation is
                                       								targeting devices with compute capability 7.x and higher.
                                       								
                                    </li>
                                    <li class="li liexpand">For WMMA operations with floating point accumulators, the
                                       									<samp class="ph codeph">satf</samp> (saturate-to-finite value) mode parameter is
                                       								deprecated. Using it can lead to unexpected results. See <a class="xref" href="http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#wmma-description" target="_blank" shape="rect">http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#wmma-description</a>
                                       								for details.
                                    </li>
                                 </ul>
                              </dd>
                              <dt class="dt dlterm">CUDA Libraries</dt>
                              <dd class="dd">
                                 <ul class="ul">
                                    <li class="li liexpand">The nvGRAPH library is deprecated. The library will no longer be
                                       								shipped in future releases of the CUDA toolkit.
                                    </li>
                                    <li class="li liexpand">The <samp class="ph codeph">nppGetGpuComputeCapability</samp> function will be deprecated
                                       								in the next NPP release. Users should instead call the
                                       									<samp class="ph codeph">cudaGetDevice()</samp> to get the GPU device ID, then call the
                                       								function <samp class="ph codeph">cudaDeviceGetAttribute()</samp> twice, once with the
                                       									<samp class="ph codeph">cudaDevAttrComputeCapabilityMajor</samp> parameter, and once
                                       								with the <samp class="ph codeph">cudaDevAttrComputeCapabilityMinor</samp> parameter.
                                    </li>
                                 </ul>
                              </dd>
                           </dl>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="title-resolved-issues"><a name="title-resolved-issues" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#title-resolved-issues" name="title-resolved-issues" shape="rect">2.5.&nbsp;Resolved Issues
                           </a></h3>
                     <div class="topic concept nested2" id="cuda-compiler-resolved-issues"><a name="cuda-compiler-resolved-issues" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-compiler-resolved-issues" name="cuda-compiler-resolved-issues" shape="rect">2.5.1.&nbsp;CUDA Compilers</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li liexpand">In CUDA 9.2 <samp class="ph codeph">nvprune</samp> crashes when running on a library with bss
                                 				sections--for example, while pruning <samp class="ph codeph">libcusparse_static.a</samp>. This is
                                 				fixed in CUDA 10.1.
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p" id="cuda-compiler-resolved-issues__docs-internal-guid-d2652bbd-7fff-76ed-bab9-8874d1adc389"><a name="cuda-compiler-resolved-issues__docs-internal-guid-d2652bbd-7fff-76ed-bab9-8874d1adc389" shape="rect">
                                       <!-- --></a>Within a
                                    					template function, the CUDA compiler previously incorrectly allowed the use of an
                                    					undeclared function. In CUDA 10.1, this compiler bug has been fixed and may cause
                                    					diagnostics to be emitted for code that was previously incorrectly accepted. For
                                    					example: 
                                 </p><pre xml:space="preserve">//-- template &lt;typename T&gt; 
__global__ void foo(T in) { bar(in); } 
int main() { foo&lt;&lt;&lt;1,1&gt;&gt;&gt;(1); } 
__device__ void bar(int) { } 
//-- </pre><p dir="ltr" class="p">This example was accepted by the previous CUDA compiler, but will generate
                                    					a compile error with CUDA 10.1 compiler, because the function <samp class="ph codeph">bar()</samp>
                                    					is used in <samp class="ph codeph">foo()</samp> before it has been declared.
                                 </p>
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cuda-libraries-resolved-issues"><a name="cuda-libraries-resolved-issues" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-libraries-resolved-issues" name="cuda-libraries-resolved-issues" shape="rect">2.5.2.&nbsp;CUDA Libraries</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li liexpand">In earlier releases, cuBLAS GEMM calls might randomly crash when running multiple
                                 				host threads sharing one cuBLAS handle despite adhering to recommended usage in <a class="xref" href="https://docs.nvidia.com/cuda/cublas/index.html#thread-safety2" target="_blank" shape="rect"> Thread Safety. </a> This bug affects cuBLAS in earlier CUDA
                                 				releases, and is fixed in CUDA 10.1. 
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p" id="cuda-libraries-resolved-issues__docs-internal-guid-cfe1e5ab-7fff-0882-648f-ba0e6debcc4d"><a name="cuda-libraries-resolved-issues__docs-internal-guid-cfe1e5ab-7fff-0882-648f-ba0e6debcc4d" shape="rect">
                                       <!-- --></a>This bug
                                    					affects cuSOLVER dense linear algebra functionality in CUDA 10.0, and is fixed in
                                    					CUDA 10.1. An internal routine (LARFT) in cuSOLVER dense linear algebra has a race
                                    					condition if the user stream is not null. Although this routine is not in the
                                    					cuSOLVER public API, it affects all routines based on householder reflection,
                                    					including ORGBR, ORMTR, ORGTR, ORMQR, ORMQL, ORGQR AND SYEVD, SYGVD. 
                                 </p>
                                 <p dir="ltr" class="p">The following routines are <strong class="ph b">not </strong>affected:
                                 </p><a name="cuda-libraries-resolved-issues__ul_pmn_1rt_tgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="cuda-libraries-resolved-issues__ul_pmn_1rt_tgb">
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">GEQRF and GESVD.</p>
                                    </li>
                                    <li class="li liexpand" dir="ltr">
                                       <p dir="ltr" class="p">Also, GESVDJ and SYEVDJ are not affected because they are based-on
                                          							the Jacobi method.
                                       </p>
                                    </li>
                                 </ul>
                              </li>
                              <li class="li liexpand">The cuSOLVER routine SYEVD in CUDA 10.0 has a bug that may potentially impact single
                                 				and single complex eigenvalue problems. In affected cases, the eigensolver may deliver
                                 				inaccurate eigenvalues and eigenvectors. This bug only affects cuSOLVER dense linear
                                 				algebra functionality in CUDA 10.0 and is fixed in CUDA 10.1.
                              </li>
                              <li class="li liexpand">In CUDA 10.0 cuBLAS library, a bug in the batched LU factorization,
                                 					<samp class="ph codeph">cublas[S|D|C|Z]getrfBatched</samp>, may lead to wrong result or
                                 				inconsistent result from run to run. CUDA 10.1 fixes the issue.
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="title-known-issues"><a name="title-known-issues" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#title-known-issues" name="title-known-issues" shape="rect">2.6.&nbsp;Known Issues</a></h3>
                     <div class="topic concept nested2" id="cuda-general-known-issues"><a name="cuda-general-known-issues" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-general-known-issues" name="cuda-general-known-issues" shape="rect">2.6.1.&nbsp;General CUDA</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li">On systems with a new install of Ubuntu 18.04.2, note that the installation of CUDA
                                 				10.1 and NVIDIA 418 drivers may result in the following error:
                                 <p class="p"><samp class="ph codeph">The following
                                       						packages have unmet dependencies:
                                       						</samp></p>
                                 <p class="p"><samp class="ph codeph">xserver-xorg-video-nvidia-418 : Depends:
                                       						</samp></p>
                                 <p class="p"><samp class="ph codeph">xserver-xorg-core (&gt;=
                                       						2:1.19.6-1ubuntu2~)</samp></p>
                                 <p class="p"><samp class="ph codeph">E: Unable to correct problems, you have
                                       						held broken packages.</samp></p>
                                 <div class="p">To recover from this error, install the
                                    						<em class="ph i">xserver-xorg-core</em> package and proceed with the installation of CUDA.
                                    						<pre class="pre screen" xml:space="preserve">$ sudo apt-get install xserver-xorg-core</pre><div class="note note"><span class="notetitle">Note:</span> This error is only
                                       						observed on systems with a new install of Ubuntu 18.04.2.  The error is not
                                       						observed on systems that are upgraded from 18.04.1 to 18.04.2. 
                                    </div>
                                 </div>
                              </li>
                              <li class="li">Xwayland is not compatible with <samp class="ph codeph">nvidia-settings</samp> and graphical CUDA
                                 				samples. Recommend switching to Xorg session.
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cuda-tools-known-issues"><a name="cuda-tools-known-issues" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-tools-known-issues" name="cuda-tools-known-issues" shape="rect">2.6.2.&nbsp;CUDA Tools</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li liexpand">When using separate compilation and object linking for the device code (<samp class="ph codeph">nvcc
                                    					--relocatable-device-code=true or nvcc --device-c</samp>) the resulting binary may
                                 				crash at runtime when <strong class="ph b">all</strong> conditions below are true:<a name="cuda-tools-known-issues__ul_s13_4nx_xgb" shape="rect">
                                    <!-- --></a><ol class="ol" id="cuda-tools-known-issues__ul_s13_4nx_xgb">
                                    <li class="li liexpand">Some of the objects linked into the binary are generated by a previously released
                                       						compiler (i.e., compiler from CUDA 10.0 Toolkit or earlier), and
                                    </li>
                                    <li class="li liexpand">Objects generated by the previously released compiler contain CUDA kernel
                                       						function definition (i.e., “<samp class="ph codeph">__global__</samp>” functions). 
                                    </li>
                                 </ol>
                                 <p class="p">A possible workaround in such case is to generate all the objects with the CUDA
                                    					10.1 compiler.
                                 </p>
                              </li>
                              <li class="li liexpand">For known issues in cuda-memcheck, see the <a class="xref" href="http://docs.nvidia.com/cuda/cuda-memcheck/index.html#known-issues" target="_blank" shape="rect"><u class="ph u">Known Issues</u></a> section in the
                                 				cuda-memcheck documentation.
                              </li>
                              <li class="li liexpand">For known issues in Nsight Compute, see the <a class="xref" href="https://docs.nvidia.com/nsight-compute/ReleaseNotes/index.html#known-issues" target="_blank" shape="rect"><u class="ph u">Known Issues</u></a> section.
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p" id="cuda-tools-known-issues__docs-internal-guid-e224861a-7fff-b846-609d-3072ee0d6d78"><a name="cuda-tools-known-issues__docs-internal-guid-e224861a-7fff-b846-609d-3072ee0d6d78" shape="rect">
                                       <!-- --></a>When enabling
                                    					the "Auto Profile" option in the Nsight Compute UI, profiling across different GPU
                                    					architectures may fail.
                                 </p>
                                 <p dir="ltr" class="p">To workaround this issue, profile the relevant kernels using the Nsight
                                    					Compute CLI, or disable "Auto Profile" in the UI and manually profile these
                                    					kernels.
                                 </p>
                              </li>
                              <li class="li liexpand">The tools <samp class="ph codeph">nv-nsight-cu</samp> and <samp class="ph codeph">nv-nsight-cu-cli</samp> will not
                                 				work on any Linux platform with GLIBC version lower than 2.15. Hence these tools will
                                 				not work on RHEL 6.10 and CentOS 6.10, which use GLIBC 2.12.
                              </li>
                              <li class="li liexpand">For known issues in CUDA Profiling tools <samp class="ph codeph">nvprof</samp> and Visual Profiler, see the
                                 					<a class="xref" href="https://docs.nvidia.com/cuda/profiler-users-guide/index.html#profiler-known-issues" target="_blank" shape="rect"><u class="ph u">Profiler Known Issues</u></a> section in the
                                 				Nsight Eclipse Edition Getting Started Guide.
                              </li>
                              <li class="li liexpand">For known issues in the CUPTI library, see the <a class="xref" href="https://docs.nvidia.com/cupti/Cupti/r_limitations.html#r_limitations" target="_blank" shape="rect"><u class="ph u">Limitations</u></a> section in the CUPTI
                                 				document.
                              </li>
                              <li class="li liexpand">For known issues in Nsight Eclipse, see the <a class="xref" href="https://docs.nvidia.com/cuda/nsight-eclipse-edition-getting-started-guide/index.html#unique_370340335" target="_blank" shape="rect"><u class="ph u">Nsight Eclipse Known Issues</u></a> section in
                                 				the Profiler User’s Guide.
                              </li>
                              <li class="li liexpand">
                                 <p dir="ltr" class="p" id="cuda-tools-known-issues__docs-internal-guid-2e3948a5-7fff-bed0-207f-9cb89259d18f"><a name="cuda-tools-known-issues__docs-internal-guid-2e3948a5-7fff-bed0-207f-9cb89259d18f" shape="rect">
                                       <!-- --></a>On Windows
                                    					CUPTI samples and other applications using the CUPTI APIs will result in the error
                                    					"cupti.dll was not found". This is due to a mismatch in the CUPTI dynamic library
                                    					name referenced in the import library “cupti.lib”. 
                                 </p>
                                 To workaround this issue
                                 				rename the CUPTI dynamic library under the CUDA Toolkit directory (Default location is:
                                 				“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\extras\CUPTI\lib64”)
                                 					<strong class="ph b">from</strong> “cupti64_101.dll” <strong class="ph b">to</strong> “cupti.dll”.
                              </li>
                              <li class="li liexpand">A call to <samp class="ph codeph">cuptiFinalize()</samp>/<samp class="ph codeph">cuptiProfilerDeInitialize()</samp>
                                 				API can result in a hang with 410.x driver. <strong class="ph b">Please use a 418 or later
                                    				driver.</strong></li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cuda-libraries-knonwn-issues"><a name="cuda-libraries-knonwn-issues" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-libraries-knonwn-issues" name="cuda-libraries-knonwn-issues" shape="rect">CUDA Libraries</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li liexpand">cuSOLVER dense linear algebra routine GETRF might exit with error code 702 on GPUs
                                 				that have only 2 SMs Jetson TX1 GPUs. 
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="thrust-release-notes"><a name="thrust-release-notes" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#thrust-release-notes" name="thrust-release-notes" shape="rect">3.&nbsp;Thrust v1.9.4 Release Notes</a></h2>
                  <div class="body conbody">
                     <p dir="ltr" class="p" id="thrust-release-notes__docs-internal-guid-ec7800e8-7fff-bd56-2d69-2518c757d5cd"><a name="thrust-release-notes__docs-internal-guid-ec7800e8-7fff-bd56-2d69-2518c757d5cd" shape="rect">
                           <!-- --></a>Thrust v1.9.4 adds
                        			asynchronous interfaces for parallel algorithms, a new allocator system including caching
                        			allocators and unified memory support, as well as a variety of other enhancements, mostly
                        			related to C++11/C++14/C++17/C++20 support. 
                     </p>
                     <p dir="ltr" class="p">The new asynchronous algorithms in the <samp class="ph codeph">thrust::async</samp> namespace
                        			return <samp class="ph codeph">thrust::event</samp> or <samp class="ph codeph">thrust::future</samp> objects, which can
                        			be waited upon to synchronize with the completion of the parallel operation.
                     </p>
                  </div>
                  <div class="topic concept nested1" id="thrust-new-features"><a name="thrust-new-features" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#thrust-new-features" name="thrust-new-features" shape="rect">3.1.&nbsp;New Features</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li liexpand">
                              <p dir="ltr" class="p" id="thrust-new-features__docs-internal-guid-fec1b1c9-7fff-3d9f-1e07-024dfb751eac"><a name="thrust-new-features__docs-internal-guid-fec1b1c9-7fff-3d9f-1e07-024dfb751eac" shape="rect">
                                    <!-- --></a><samp class="ph codeph">thrust::event</samp> and <samp class="ph codeph">thrust::future&lt;T&gt;</samp>,
                                 					uniquely-owned asynchronous handles consisting of a state (ready or not ready),
                                 					content (some value; for <samp class="ph codeph">thrust::future</samp> only), and an optional set
                                 					of objects that should be destroyed only when the future's value is ready and has
                                 					been consumed.
                              </p><a name="thrust-new-features__ul_iqn_hhz_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_iqn_hhz_vgb">
                                 <li class="li liexpand" dir="ltr">The design is loosely based on C++11's
                                    						<samp class="ph codeph">std::future</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr">They can be <samp class="ph codeph">.wait</samp>'d on, and the value of a future can
                                    						be waited on and retrieved with .get or .extract.
                                 </li>
                                 <li class="li liexpand" dir="ltr">Multiple <samp class="ph codeph">thrust::event</samp>s and
                                    							<samp class="ph codeph">thrust::future</samp>s can be combined with
                                    							<samp class="ph codeph">thrust::when_all</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::future</samp>s can be converted to
                                    							<samp class="ph codeph">thrust::event</samp>s.
                                 </li>
                                 <li class="li liexpand" dir="ltr">Currently, these primitives are only implemented for the CUDA backend
                                    						and are C++11 only.
                                 </li>
                              </ul>
                           </li>
                           <li class="li liexpand">New asynchronous algorithms that return
                              					<samp class="ph codeph">thrust::event</samp>/<samp class="ph codeph">thrust::future</samp>s, implemented as C++20
                              				range style customization points:<a name="thrust-new-features__ul_h3x_f3z_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_h3x_f3z_vgb">
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::async::reduce</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::async::reduce_into</samp>, which takes a target location to
                                    						store the reduction result into.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph"> thrust::async::copy</samp>, including a two-policy overload
                                    						that allows explicit cross system copies which execution policy properties can be
                                    						attached to.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph"> thrust::async::transform</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::async::for_each</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::async::stable_sort</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::async::sort</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr">By default the asynchronous algorithms use the new caching allocators.
                                    						Deallocation of temporary storage is deferred until the destruction of the
                                    						returned <samp class="ph codeph">thrust::future</samp>. The content of
                                    							<samp class="ph codeph">thrust::future</samp>s is stored in either device or universal memory
                                    						and transferred to the host only upon request to prevent unnecessary data
                                    						migration.
                                 </li>
                                 <li class="li liexpand" dir="ltr">Asynchronous algorithms are currently only implemented for the CUDA
                                    						system and are C++11 only.
                                 </li>
                              </ul>
                           </li>
                           <li class="li liexpand"><samp class="ph codeph">exec.after(f, g, ...)</samp>, a new execution policy method that takes a set
                              				of <samp class="ph codeph">thrust::event</samp>/<samp class="ph codeph">thrust::future</samp>s and returns an
                              				execution policy that operations on that execution policy should depend upon.
                           </li>
                           <li class="li liexpand">New logic and mindset for the type requirements for cross-system sequence copies
                              				(currently only used by <samp class="ph codeph">thrust::async::copy</samp>), based on:<a name="thrust-new-features__ul_y4g_s3z_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_y4g_s3z_vgb">
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::is_contiguous_iterator</samp> and
                                    						THRUST_PROCLAIM_CONTIGUOUS_ITERATOR for detecting/indicating that an iterator
                                    						points to contiguous storage.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::is_trivially_relocatable</samp> and
                                    						THRUST_PROCLAIM_TRIVIALLY_RELOCATABLE for detecting/indicating that a type is
                                    							<samp class="ph codeph">memcpy</samp>-able (based on principles from <a class="xref" href="https://wg21.link/P1144" target="_blank" shape="rect"><u class="ph u">https://wg21.link/P1144</u></a> ).
                                 </li>
                                 <li class="li liexpand" dir="ltr">The new approach reduces buffering, increases performance, and
                                    						increases correctness.
                                 </li>
                                 <li class="li liexpand" dir="ltr">The fast path is now enabled when copying fp16 and CUDA vector types
                                    						with <samp class="ph codeph">thrust::async::copy</samp>.
                                 </li>
                              </ul>
                           </li>
                           <li class="li liexpand">All Thrust synchronous algorithms for the CUDA backend now actually synchronize.
                              				Previously, any algorithm that did not allocate temporary storage (counterexample:
                              					<samp class="ph codeph">thrust::sort</samp>) and did not have a computation-dependent result
                              				(counterexample: <samp class="ph codeph">thrust::reduce</samp>) would actually be launched
                              				asynchronously. 
                              <p dir="ltr" class="p">Additionally, synchronous algorithms that allocated
                                 					temporary storage would become asynchronous if a custom allocator was supplied that
                                 					did not synchronize on allocation/deallocation, unlike <samp class="ph codeph">cudaMalloc</samp> /
                                 						<samp class="ph codeph">cudaFree</samp>. So, now <samp class="ph codeph">thrust::for_each</samp>,
                                 						<samp class="ph codeph">thrust::transform</samp>, <samp class="ph codeph">thrust::sort</samp>, etc are truly
                                 					synchronous. 
                              </p>
                              <p dir="ltr" class="p">In some cases this may be a performance regression; if
                                 					you need asynchrony, use the new asynchronous algorithms.
                              </p>
                           </li>
                           <li class="li liexpand">Thrust's allocator framework has been rewritten. It now uses a memory resource system,
                              				similar to C++17's <samp class="ph codeph">std::pmr</samp> but supporting static polymorphism. Memory
                              				resources are objects that allocate untyped storage and allocators are cheap handles to
                              				memory resources in this new model. The new facilities live in <samp class="ph codeph">&lt;thrust/mr/*&gt;
                                 					</samp>.<a name="thrust-new-features__ul_zfr_hjz_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_zfr_hjz_vgb">
                                 <li class="li liexpand" dir="ltr"> t<samp class="ph codeph">hrust::mr::memory_resource&lt;Pointer&gt;</samp>, the memory
                                    						resource base class, which takes a (possibly tagged) pointer to void type as a
                                    						parameter.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::mr::allocator&lt;T, MemoryResource&gt;</samp>, an allocator backed
                                    						by a memory resource object.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::mr::polymorphic_adaptor_resource&lt;Pointer&gt;</samp>, a
                                    						type-erased memory resource adaptor.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::mr::polymorphic_allocator&lt;T&gt;</samp>, a C++17-style
                                    						polymorphic allocator backed by a type-erased memory resource object.
                                 </li>
                                 <li class="li liexpand" dir="ltr">New tunable C++17-style caching memory resources,
                                    							<samp class="ph codeph">thrust::mr::(disjoint_)?(un)?synchronized_pool_resource</samp>,
                                    						designed to cache both small object allocations and large repetitive temporary
                                    						allocations. The disjoint variants use separate storage for management of the
                                    						pool, which is necessary if the memory being allocated cannot be accessed on the
                                    						host (e.g. device memory).
                                 </li>
                                 <li class="li liexpand" dir="ltr">System-specific allocators were rewritten to use the new memory
                                    						resource framework.
                                 </li>
                                 <li class="li liexpand" dir="ltr">New <samp class="ph codeph">thrust::device_memory_resource</samp> for allocating
                                    						device memory. 
                                 </li>
                                 <li class="li liexpand" dir="ltr">New <samp class="ph codeph">thrust::universal_memory_resource</samp> for allocating
                                    						memory that can be accessed from both the host and device (e.g.
                                    							<samp class="ph codeph">cudaMallocManaged</samp>).
                                 </li>
                                 <li class="li liexpand" dir="ltr">New <samp class="ph codeph">thrust::universal_host_pinned_memory_resource</samp> for
                                    						allocating memory that can be accessed from the host and the device but always
                                    						resides in host memory (e.g. <samp class="ph codeph">cudaMallocHost</samp>).
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::get_per_device_resource</samp> and
                                    							<samp class="ph codeph">thrust::per_device_allocator</samp>, which lazily create and retrieve
                                    						a per-device singleton memory resource.
                                 </li>
                                 <li class="li liexpand" dir="ltr">Rebinding mechanisms (<samp class="ph codeph">rebind_traits</samp> and
                                    							<samp class="ph codeph">rebind_alloc</samp> ) for
                                    						<samp class="ph codeph">thrust::allocator_traits</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::device_make_unique</samp>, a factory function for creating a
                                    							<samp class="ph codeph">std::unique_ptr</samp> to a newly allocated object in device
                                    						memory.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">&lt;thrust/detail/memory_algorithms&gt;</samp> , a C++11 implementation of
                                    						the C++17 uninitialized memory algorithms.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::allocate_unique</samp> and friends, based on the proposed
                                    							C++23<samp class="ph codeph"> std::allocate_unique</samp> (<a class="xref" href="https://wg21.link/P0211" target="_blank" shape="rect">https://wg21.link/P0211</a>).
                                 </li>
                              </ul>
                           </li>
                           <li class="li liexpand">New type traits and metaprogramming facilities. Type traits are slowly being migrated
                              				out of<samp class="ph codeph"> thrust::detail::</samp> and <samp class="ph codeph">&lt;thrust/detail/*&gt;</samp> ;
                              				their new home will be <samp class="ph codeph">thrust::</samp> and
                              					<samp class="ph codeph">&lt;thrust/type_traits/*&gt;</samp>.<a name="thrust-new-features__ul_sjz_5jz_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_sjz_5jz_vgb">
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::is_execution_policy</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::is_operator_less_or_greater_function_object</samp>,
                                    						which detects <samp class="ph codeph">thrust::less</samp>, <samp class="ph codeph">thrust::greater</samp>,
                                    							<samp class="ph codeph">std::less</samp>, and <samp class="ph codeph">std::greater</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::is_operator_plus_function_object</samp>, which detects
                                    							<samp class="ph codeph">thrust::plus</samp> and <samp class="ph codeph">std::plus</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::remove_cvref(_t)?</samp>, a C++11 implementation of C++20's
                                    							<samp class="ph codeph">thrust::remove_cvref(_t)?</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::void_t</samp>, and various other new type traits.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::integer_sequence</samp> and friends, a C++11 implementation of
                                    							C++20's<samp class="ph codeph"> std::integer_sequence</samp>. 
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::conjunction</samp>, <samp class="ph codeph">thrust::disjunction</samp>, and
                                    							<samp class="ph codeph">thrust::disjunction</samp>, a C++11 implementation of C++17's logical
                                    						metafunctions.
                                 </li>
                                 <li class="li liexpand" dir="ltr">Some Thrust type traits (such as
                                    							<samp class="ph codeph">thrust::is_constructible</samp>) have been redefined in terms of
                                    						C++11's type traits when they are available.
                                 </li>
                              </ul>
                           </li>
                           <li class="li liexpand"><samp class="ph codeph">&lt;thrust/detail/tuple_algorithms.h&gt;</samp>, new <samp class="ph codeph">std::tuple</samp>
                              					algorithms:<a name="thrust-new-features__ul_hsn_hkz_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_hsn_hkz_vgb">
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::tuple_transform</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::tuple_for_each</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::tuple_subset</samp>.
                                 </li>
                              </ul>
                           </li>
                           <li class="li liexpand">Miscellaneous new <samp class="ph codeph">std::</samp>-like facilities:<a name="thrust-new-features__ul_gwd_rkz_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_gwd_rkz_vgb">
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::optional</samp>, a C++11 implementation of C++17's
                                    							<samp class="ph codeph">std::optional</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::addressof</samp>, an implementation of C++11's
                                    							<samp class="ph codeph">std::addressof</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::next</samp> and <samp class="ph codeph">thrust::prev</samp>, an implementation
                                    						of C++11's <samp class="ph codeph">std::next</samp> and <samp class="ph codeph">std::prev</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::square</samp>, a <samp class="ph codeph">&lt;functional&gt;</samp> style unary
                                    						function object that multiplies its argument by itself.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">&lt;thrust/limits.h&gt; </samp>and <samp class="ph codeph">thrust::numeric_limits</samp>,
                                    						a customized version of <samp class="ph codeph">&lt;limits&gt;</samp> and
                                    							<samp class="ph codeph">std::numeric_limits</samp>.
                                 </li>
                              </ul>
                           </li>
                           <li class="li liexpand">
                              <p dir="ltr" class="p" id="thrust-new-features__docs-internal-guid-44cd20b4-7fff-fdb6-8fb7-b0634dc8721d"><a name="thrust-new-features__docs-internal-guid-44cd20b4-7fff-fdb6-8fb7-b0634dc8721d" shape="rect">
                                    <!-- --></a><samp class="ph codeph">&lt;thrust/detail/preprocessor.h&gt;</samp>, new general purpose preprocessor
                                 					facilities:
                              </p><a name="thrust-new-features__ul_idh_zkz_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_idh_zkz_vgb">
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_CAT[2-5]</samp>, concatenates two to five tokens.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_EXPAND(_ARGS)?</samp>, performs double expansion.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_ARITY</samp> and <samp class="ph codeph">THRUST_PP_DISPATCH</samp>, tools
                                    						for macro overloading.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_BOOL</samp>, boolean conversion.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_INC</samp> and <samp class="ph codeph">THRUST_PP_DEC</samp>,
                                    						increment/decrement.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_HEAD</samp>, a variadic macro that expands to the first
                                    						argument.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_TAIL</samp>, a variadic macro that expands to all its
                                    						arguments after the first.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_IIF</samp>, bitwise conditional.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_COMMA_IF</samp>, and <samp class="ph codeph">THRUST_PP_HAS_COMMA</samp>,
                                    						facilities for adding and detecting comma tokens.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_PP_IS_VARIADIC_NULLARY</samp>, returns true if called with a
                                    						nullary <samp class="ph codeph">__VA_ARGS__</samp> .
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_CURRENT_FUNCTION</samp>, expands to the name of the current
                                    						function.
                                 </li>
                              </ul>
                           </li>
                           <li class="li liexpand">New C++11 compatibility macros:<a name="thrust-new-features__ul_skb_3lz_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_skb_3lz_vgb">
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_NODISCARD</samp>, expands to <samp class="ph codeph">[[nodiscard]]</samp> when
                                    						available and the best equivalent otherwise.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_CONSTEXPR</samp>, expands to <samp class="ph codeph">constexpr</samp> when
                                    						available and the best equivalent otherwise.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_OVERRIDE</samp>, expands to <samp class="ph codeph">override</samp> when
                                    						available and the best equivalent otherwise.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_DEFAULT</samp>, expands to <samp class="ph codeph">= default;</samp> when
                                    						available and the best equivalent otherwise.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_NOEXCEPT</samp>, expands to <samp class="ph codeph">noexcept</samp> when
                                    						available and the best equivalent otherwise.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_FINAL</samp>, expands to <samp class="ph codeph">final</samp> when available
                                    						and the best equivalent otherwise.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_INLINE_CONSTANT</samp>, expands to <samp class="ph codeph">inline
                                       							constexpr</samp> when available and the best equivalent otherwise.
                                 </li>
                              </ul>
                           </li>
                           <li class="li liexpand"><samp class="ph codeph">&lt;thrust/detail/type_deduction.h&gt;</samp>, new C++11-only type deduction
                              					helpers:<a name="thrust-new-features__ul_by3_gmz_vgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="thrust-new-features__ul_by3_gmz_vgb">
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_DECLTYPE_RETURNS*</samp>, expand to function definitions with
                                    						suitable conditional noexcept qualifiers and trailing return types.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_FWD(x)</samp>, expands to
                                    							<samp class="ph codeph">::std::forward&lt;decltype(x)&gt;(x)</samp>.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_MVCAP</samp>, expands to a lambda move capture.
                                 </li>
                                 <li class="li liexpand" dir="ltr"><samp class="ph codeph">THRUST_RETOF</samp>, expands to a <samp class="ph codeph">decltype</samp> computing
                                    						the return type of an invocable.
                                 </li>
                              </ul>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="thrust-new-examples"><a name="thrust-new-examples" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#thrust-new-examples" name="thrust-new-examples" shape="rect">3.2.&nbsp;New Examples</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li liexpand"><samp class="ph codeph">mr_basic</samp> demonstrates how to use the new memory resource allocator
                              				system.
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="thrust-title-other-enhancements"><a name="thrust-title-other-enhancements" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#thrust-title-other-enhancements" name="thrust-title-other-enhancements" shape="rect">3.3.&nbsp;Other Enhancements</a></h3>
                     <div class="topic concept nested2" id="thrust-tagged-pointer-enhancements"><a name="thrust-tagged-pointer-enhancements" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#thrust-tagged-pointer-enhancements" name="thrust-tagged-pointer-enhancements" shape="rect">3.3.1.&nbsp;Tagged Pointer Enhancements</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li"><a name="thrust-tagged-pointer-enhancements__ul_q32_1qz_vgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="thrust-tagged-pointer-enhancements__ul_q32_1qz_vgb">
                                    <li class="li liexpand" dir="ltr">New <samp class="ph codeph">thrust::pointer_traits</samp> specialization for
                                       							<samp class="ph codeph">void const*</samp>.
                                    </li>
                                    <li class="li liexpand" dir="ltr"><samp class="ph codeph">nullptr</samp> support to Thrust tagged pointers.
                                    </li>
                                    <li class="li liexpand" dir="ltr">New explicit operator bool for Thrust tagged pointers when using C++11
                                       						for <samp class="ph codeph">std::unique_ptr</samp> interoperability.
                                    </li>
                                    <li class="li liexpand" dir="ltr">Added <samp class="ph codeph">thrust::reinterpret_pointer_cast</samp> and
                                       							<samp class="ph codeph">thrust::static_pointer_cast</samp> for casting Thrust tagged
                                       						pointers.
                                    </li>
                                 </ul>
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="thrust-iterator-enhancements"><a name="thrust-iterator-enhancements" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#thrust-iterator-enhancements" name="thrust-iterator-enhancements" shape="rect">3.3.2.&nbsp;Iterator Enhancements</a></h3>
                        <div class="body conbody">
                           <ul class="ul">
                              <li class="li"><a name="thrust-iterator-enhancements__ul_fqp_fqz_vgb" shape="rect">
                                    <!-- --></a><ul class="ul" id="thrust-iterator-enhancements__ul_fqp_fqz_vgb">
                                    <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::iterator_system</samp> is now SFINAE friendly.
                                    </li>
                                    <li class="li liexpand" dir="ltr">Removed <samp class="ph codeph">cv</samp> qualifiers from iterator types when using
                                       							<samp class="ph codeph">thrust::iterator_system</samp>.
                                    </li>
                                    <li class="li liexpand" dir="ltr">Static assert enhancements:</li>
                                    <li class="li liexpand" dir="ltr">New <samp class="ph codeph">THRUST_STATIC_ASSERT_MSG</samp>, takes an optional string
                                       						constant to be used as the error message when possible.
                                    </li>
                                    <li class="li liexpand" dir="ltr">Update <samp class="ph codeph">THRUST_STATIC_ASSERT(_MSG)</samp> to use C++11's
                                       							<samp class="ph codeph">static_assert</samp> when it's available.
                                    </li>
                                    <li class="li liexpand" dir="ltr">Introduce a way to test for static assertions.</li>
                                 </ul>
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="thrust-testing-enhancements"><a name="thrust-testing-enhancements" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#thrust-testing-enhancements" name="thrust-testing-enhancements" shape="rect">3.3.3.&nbsp;Testing Enhancements</a></h3>
                        <div class="body conbody"><a name="thrust-testing-enhancements__ul_z41_lqz_vgb" shape="rect">
                              <!-- --></a><ul class="ul" id="thrust-testing-enhancements__ul_z41_lqz_vgb">
                              <li class="li liexpand" dir="ltr">Additional scalar and sequence types, including non-builtin types and vectors
                                 				with unified memory allocators, have been added to the list of types used by generic
                                 				unit tests.
                              </li>
                              <li class="li liexpand" dir="ltr">The generation of random input data has been improved to increase the range
                                 				of values used and catch more corner cases.
                              </li>
                              <li class="li liexpand" dir="ltr">New <samp class="ph codeph">truncate_to_max_representable</samp> utility for avoiding the
                                 				generation of ranges that cannot be represented by the underlying element type in
                                 				generic unit test code.
                              </li>
                              <li class="li liexpand" dir="ltr">The test driver now synchronizes with CUDA devices and check for errors after
                                 				each test, when switching devices, and after each raw kernel launch.
                              </li>
                              <li class="li liexpand" dir="ltr">The <samp class="ph codeph">warningtester</samp> uber header is now compiled with NVCC to
                                 				avoid needing to disable CUDA-specific code with the preprocessor.
                              </li>
                              <li class="li liexpand" dir="ltr">Fixed the unit test framework's <samp class="ph codeph">ASSERT_*</samp> to print
                                 					<samp class="ph codeph">char</samp>s as <samp class="ph codeph">int</samp>s.
                              </li>
                              <li class="li liexpand" dir="ltr">New <samp class="ph codeph">DECLARE_INTEGRAL_VARIABLE_UNITTEST</samp> test declaration
                                 				macro.
                              </li>
                              <li class="li liexpand" dir="ltr">New <samp class="ph codeph">DECLARE_VARIABLE_UNITTEST_WITH_TYPES_AND_NAME</samp> test
                                 				declaration macro.
                              </li>
                              <li class="li liexpand" dir="ltr"><samp class="ph codeph">thrust::system_error</samp> in the CUDA backend now print out its
                                 					<samp class="ph codeph">cudaError_t</samp> enumerator in addition to the diagnostic message.
                              </li>
                              <li class="li liexpand" dir="ltr">Stopped using conditionally signed types like <samp class="ph codeph">char</samp>.
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="thrust-resolved-issues"><a name="thrust-resolved-issues" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#thrust-resolved-issues" name="thrust-resolved-issues" shape="rect">3.4.&nbsp;Resolved Issues</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li liexpand"> Fixed compilation error when using <samp class="ph codeph">__device__ lambdas</samp> with reduce
                              				on MSVC. 
                           </li>
                           <li class="li liexpand"> Static asserted that <samp class="ph codeph">thrust::generate</samp> / <samp class="ph codeph">thrust::fill</samp>
                              				doesn't operate on <samp class="ph codeph">const</samp> iterators. 
                           </li>
                           <li class="li liexpand"> Fixed compilation failure with <samp class="ph codeph">thrust::zip_iterator</samp> and
                              					<samp class="ph codeph">thrust::complex&lt;float&gt;</samp>. 
                           </li>
                           <li class="li liexpand">Fixed dispatch for the CUDA backend's <samp class="ph codeph">thrust::reduce</samp> to use two
                              				functions (one with the <samp class="ph codeph">pragma</samp> for disabling <samp class="ph codeph">exec</samp>
                              				checks, one with <samp class="ph codeph">THRUST_RUNTIME_FUNCTION</samp>) instead of one. This fixes a
                              				regression with device compilation that started in CUDA 9.2.
                           </li>
                           <li class="li liexpand">Added missing <samp class="ph codeph">__host__ __device__</samp> annotations to a
                              					<samp class="ph codeph">thrust::complex::operator= </samp>to satisfy GoUDA.
                           </li>
                           <li class="li liexpand">Made <samp class="ph codeph">thrust::vector_base::clear</samp> not depend on the element type being
                              				default constructible.
                           </li>
                           <li class="li liexpand">Removed flaky <samp class="ph codeph">simple_cuda_streams </samp>example.
                           </li>
                           <li class="li liexpand">Added missing <samp class="ph codeph">thrust::device_vector</samp> constructor that takes an
                              				allocator parameter.
                           </li>
                           <li class="li liexpand">Updated the <samp class="ph codeph">range_view</samp> example to not use device-side launch.
                           </li>
                           <li class="li liexpand">Ensured that sized unit tests that use <samp class="ph codeph">counting_iterator</samp> perform
                              				proper truncation.
                           </li>
                           <li class="li liexpand">Refactored questionable <samp class="ph codeph">copy_if</samp> unit tests.
                           </li>
                        </ul>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="tegra-cuda-release-notes"><a name="tegra-cuda-release-notes" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#tegra-cuda-release-notes" name="tegra-cuda-release-notes" shape="rect">4.&nbsp;CUDA Tegra Release Notes</a></h2>
                  <div class="body conbody">
                     <div class="p">The release notes for CUDA Tegra contain only information that is specific to the
                        				following:<a name="tegra-cuda-release-notes__ul_xvm_nyt_tgb" shape="rect">
                           <!-- --></a><ul class="ul" id="tegra-cuda-release-notes__ul_xvm_nyt_tgb">
                           <li class="li liexpand">CUDA Tegra Driver, and </li>
                           <li class="li liexpand">Mobile version of other CUDA components such as: compilers, tools, libraries, and
                              					samples. 
                           </li>
                        </ul>
                     </div>
                     <p class="p">The release notes for the desktop version of CUDA also apply to CUDA Tegra. On
                        		Tegra, the CUDA Toolkit version is 10.1. 
                     </p>
                  </div>
                  <div class="topic concept nested1" id="tegra-cuda-new-features"><a name="tegra-cuda-new-features" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#tegra-cuda-new-features" name="tegra-cuda-new-features" shape="rect">4.1.&nbsp;New Features
                           </a></h3>
                     <div class="body conbody">
                        <dl class="dl">
                           <dt class="dt dlterm">CUDA Tegra Driver</dt>
                           <dd class="dd">
                              <ul class="ul">
                                 <li class="li liexpand">Support is added for GPUDirect RDMA on AGX Jetson platform. This now
                                    							enables a direct path for data exchange between the GPU and third-party peer
                                    							devices using standard features of PCI Express.
                                 </li>
                                 <li class="li liexpand">Support for Android P added.</li>
                                 <li class="li liexpand">Error Reporting enriched in CUDA on mobile RM.</li>
                                 <li class="li liexpand">Support added for Ubuntu 18.04 for the AGX Drive platform.</li>
                                 <li class="li liexpand">Resumed the support for Ubuntu 16.04 host on the AGX Jetson platform.</li>
                                 <li class="li liexpand">Performance optimizations that were previously enabled for QNX are now also
                                    							available on Linux through user-mode submits.
                                 </li>
                              </ul>
                           </dd>
                           <dt class="dt dlterm">CUDA Compiler</dt>
                           <dd class="dd">
                              <ul class="ul">
                                 <li class="li liexpand">Support added for GCC 7.3 on AGX Jetson platforms.</li>
                                 <li class="li liexpand">Support added for CLANG 6.0.2 for NVCC on Android platforms.</li>
                              </ul>
                           </dd>
                        </dl>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="tegra-cuda-known-issues"><a name="tegra-cuda-known-issues" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#tegra-cuda-known-issues" name="tegra-cuda-known-issues" shape="rect">4.2.&nbsp;Known Issues and Limitations
                           		</a></h3>
                     <div class="body conbody">
                        <dl class="dl">
                           <dt class="dt dlterm">CUDA Tegra Driver</dt>
                           <dd class="dd">
                              <ul class="ul">
                                 <li class="li liexpand">Only the below color formats are supported for Vulkan-CUDA interoperability on
                                    							Jetson and Android:<a name="tegra-cuda-known-issues__ul_drm_kb5_tgb" shape="rect">
                                       <!-- --></a><ul class="ul" id="tegra-cuda-known-issues__ul_drm_kb5_tgb">
                                       <li class="li liexpand">VK_FORMAT_R8_UNORM </li>
                                       <li class="li liexpand">VK_FORMAT_R8_SNORM </li>
                                       <li class="li liexpand">VK_FORMAT_R8_UINT </li>
                                       <li class="li liexpand">VK_FORMAT_R8_SINT </li>
                                       <li class="li liexpand">VK_FORMAT_R8_SRGB </li>
                                       <li class="li liexpand">VK_FORMAT_R8G8_UNORM </li>
                                       <li class="li liexpand">VK_FORMAT_R8G8_SNORM </li>
                                       <li class="li liexpand">VK_FORMAT_R8G8_UINT </li>
                                       <li class="li liexpand">VK_FORMAT_R8G8_SINT </li>
                                       <li class="li liexpand">VK_FORMAT_R8G8_SRGB </li>
                                       <li class="li liexpand">VK_FORMAT_R16_UNORM </li>
                                       <li class="li liexpand">VK_FORMAT_R16_SNORM </li>
                                       <li class="li liexpand">VK_FORMAT_R16_UINT </li>
                                       <li class="li liexpand">VK_FORMAT_R16_SINT </li>
                                       <li class="li liexpand">VK_FORMAT_R16_SFLOAT 
                                          <p class="p">Other formats are currently not
                                             									supported.
                                          </p>
                                       </li>
                                    </ul>
                                 </li>
                              </ul>
                           </dd>
                        </dl>
                        <dl class="dl">
                           <dt class="dt dlterm">CUDA Tools</dt>
                           <dd class="dd"><a name="tegra-cuda-known-issues__ul_ek3_dc5_tgb" shape="rect">
                                 <!-- --></a><ul class="ul" id="tegra-cuda-known-issues__ul_ek3_dc5_tgb">
                                 <li class="li liexpand">On NVIDIA DRIVE OS Linux systems, when using Nsight Compute CLI with
                                    								"<samp class="ph codeph">--mode attach</samp>" to attach to another process on the same
                                    							machine, "-<samp class="ph codeph">-hostname 127.0.0.1</samp>" must be passed. This is
                                    							because the default value of <samp class="ph codeph">"localhost"</samp> for the
                                    								<samp class="ph codeph">"--hostname"</samp> parameter does not work. 
                                 </li>
                              </ul>
                           </dd>
                        </dl>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="tegra-cuda-resolved-issues"><a name="tegra-cuda-resolved-issues" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#tegra-cuda-resolved-issues" name="tegra-cuda-resolved-issues" shape="rect">4.3.&nbsp;Resolved Issues
                           		</a></h3>
                     <div class="body conbody">
                        <dl class="dl">
                           <dt class="dt dlterm">General CUDA</dt>
                           <dd class="dd">
                              <ul class="ul">
                                 <li class="li liexpand"><strong class="ph b">CUDA-GDB on Linux:</strong> The <samp class="ph codeph">set cuda memcheck on</samp> command in
                                    							cuda-gdb does not have any effect. This is fixed in CUDA 10.1.
                                 </li>
                                 <li class="li liexpand"><strong class="ph b">CUDA-GDB on QNX:</strong><samp class="ph codeph">ntoaarch64-gdb</samp> and <samp class="ph codeph">cuda-qnx-gdb</samp> may hang when
                                    							executing the run command. This is fixed in CUDA 10.1.
                                 </li>
                              </ul>
                           </dd>
                        </dl>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="tegra-cuda-deprecated-issues"><a name="tegra-cuda-deprecated-issues" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#tegra-cuda-deprecated-issues" name="tegra-cuda-deprecated-issues" shape="rect">Deprecated Issues
                           		</a></h3>
                     <div class="body conbody">
                        <dl class="dl">
                           <dt class="dt dlterm">General CUDA</dt>
                           <dd class="dd">
                              <ul class="ul">
                                 <li class="li liexpand">Deprecating support for Pascal product on Android. </li>
                              </ul>
                           </dd>
                        </dl>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="notices-header"><a name="notices-header" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#notices-header" name="notices-header" shape="rect">Notices</a></h2>
                  <div class="topic reference nested1" id="acknowledgments"><a name="acknowledgments" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#acknowledgments" name="acknowledgments" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Acknowledgments</h3>
                           <p class="p">NVIDIA extends thanks to Professor Mike Giles of Oxford University for providing the
                              initial code for the optimized version of the device implementation of the
                              double-precision <samp class="ph codeph">exp()</samp> function found in this release of the CUDA
                              toolkit. 
                           </p>
                           <p class="p">NVIDIA acknowledges Scott Gray for his work on small-tile GEMM kernels for Pascal.
                              These kernels were originally developed for OpenAI and included since cuBLAS
                              8.0.61.2.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="notice"><a name="notice" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#notice" name="notice" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Notice</h3>
                           <p class="p">ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND
                              SEPARATELY, "MATERIALS") ARE BEING PROVIDED "AS IS." NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE
                              WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS
                              FOR A PARTICULAR PURPOSE. 
                           </p>
                           <p class="p">Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the
                              consequences of use of such information or for any infringement of patents or other rights of third parties that may result
                              from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications
                              mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information
                              previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems
                              without express written approval of NVIDIA Corporation.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="trademarks"><a name="trademarks" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#trademarks" name="trademarks" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Trademarks</h3>
                           <p class="p">NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation
                              in the U.S. and other countries.  Other company and product names may be trademarks of
                              the respective companies with which they are associated.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="copyright-past-to-present"><a name="copyright-past-to-present" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#copyright-past-to-present" name="copyright-past-to-present" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Copyright</h3>
                           <p class="p">© <span class="ph">2007</span>-<span class="ph">2019</span> NVIDIA
                              Corporation. All rights reserved.
                           </p>
                           <p class="p">This product includes software developed by the Syncro Soft SRL (http://www.sync.ro/).</p>
                        </div>
                     </div>
                  </div>
               </div>
               
               <hr id="contents-end"></hr>
               
            </article>
         </div>
      </div>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/formatting/common.min.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-write.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-tracker.js"></script>
      <script type="text/javascript">var switchTo5x=true;</script><script type="text/javascript" src="http://w.sharethis.com/button/buttons.js"></script><script type="text/javascript">stLight.options({publisher: "998dc202-a267-4d8e-bce9-14debadb8d92", doNotHash: false, doNotCopy: false, hashAddressBar: false});</script><script type="text/javascript">_satellite.pageBottom();</script></body>
</html>