Sophie

Sophie

distrib > Mageia > 7 > x86_64 > media > nonfree-updates > by-pkgid > b86a85131cc739c1c53d0b55840a4328 > files > 3628

nvidia-cuda-toolkit-devel-10.1.168-1.2.mga7.nonfree.x86_64.rpm

<!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-us" xml:lang="en-us">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
      <meta http-equiv="X-UA-Compatible" content="IE=edge"></meta>
      <meta name="copyright" content="(C) Copyright 2005"></meta>
      <meta name="DC.rights.owner" content="(C) Copyright 2005"></meta>
      <meta name="DC.Type" content="concept"></meta>
      <meta name="DC.Title" content="NVRTC"></meta>
      <meta name="abstract" content="The User guide to NVRTC."></meta>
      <meta name="DC.Coverage" content="CUDA API References"></meta>
      <meta name="DC.subject" content="CUDA Runtime Compilation, CUDA NVRTC, NVRTC"></meta>
      <meta name="keywords" content="CUDA Runtime Compilation, CUDA NVRTC, NVRTC"></meta>
      <meta name="DC.Format" content="XHTML"></meta>
      <meta name="DC.Identifier" content="abstract"></meta>
      <meta name="description" content=""></meta>
      <link rel="stylesheet" type="text/css" href="../common/formatting/commonltr.css"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/site.css"></link>
      <title>NVRTC (Runtime Compilation) :: CUDA Toolkit Documentation</title>
      <!--[if lt IE 9]>
      <script src="../common/formatting/html5shiv-printshiv.min.js"></script>
      <![endif]-->
      <script type="text/javascript" charset="utf-8" src="//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.ba-hashchange.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.scrollintoview.min.js"></script>
      <script type="text/javascript" src="../search/htmlFileList.js"></script>
      <script type="text/javascript" src="../search/htmlFileInfoList.js"></script>
      <script type="text/javascript" src="../search/nwSearchFnt.min.js"></script>
      <script type="text/javascript" src="../search/stemmers/en_stemmer.min.js"></script>
      <script type="text/javascript" src="../search/index-1.js"></script>
      <script type="text/javascript" src="../search/index-2.js"></script>
      <script type="text/javascript" src="../search/index-3.js"></script>
      <link rel="canonical" href="http://docs.nvidia.com/cuda/nvrtc/index.html"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/qwcode.highlight.css"></link>
   </head>
   <body>
      
      <header id="header"><span id="company">NVIDIA</span><span id="site-title">CUDA Toolkit Documentation</span><form id="search" method="get" action="search">
            <input type="text" name="search-text"></input><fieldset id="search-location">
               <legend>Search In:</legend>
               <label><input type="radio" name="search-type" value="site"></input>Entire Site</label>
               <label><input type="radio" name="search-type" value="document"></input>Just This Document</label></fieldset>
            <button type="reset">clear search</button>
            <button id="submit" type="submit">search</button></form>
      </header>
      <div id="site-content">
         <nav id="site-nav">
            <div class="category closed"><a href="../index.html" title="The root of the site.">CUDA Toolkit 
                  
                  
                  v10.1.168</a></div>
            <div class="category"><a href="index.html" title="NVRTC (Runtime Compilation)">NVRTC (Runtime Compilation)</a></div>
            <ul>
               <li>
                  <div class="section-link"><a href="#introduction">1.&nbsp;Introduction</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#getting-started">2.&nbsp;Getting Started</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#system-requirements">2.1.&nbsp;System Requirements</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#installation">2.2.&nbsp;Installation</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#modules">3.&nbsp;User Interface</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#group__error">3.1.&nbsp;Error Handling</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#group__query">3.2.&nbsp;General Information Query</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#group__compilation">3.3.&nbsp;Compilation</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#group__options">3.4.&nbsp;Supported Compile Options</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#group__hosthelper">3.5.&nbsp;Host Helper</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#language">4.&nbsp;Language</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#execution-space">4.1.&nbsp;Execution Space</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#separate-compilation">4.2.&nbsp;Separate Compilation</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#dynamic-parallelism">4.3.&nbsp;Dynamic Parallelism</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#integer-size">4.4.&nbsp;Integer Size</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#predefined-macros">4.5.&nbsp;Predefined Macros</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#predefined-types">4.6.&nbsp;Predefined Types</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#builtin-functions">4.7.&nbsp;Builtin Functions</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#basic-usage">5.&nbsp;Basic Usage</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#accessing-lowered-names">6.&nbsp;Accessing Lowered Names</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#accessing-lowered-names-intro">6.1.&nbsp;Introduction</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#accessing-lowered-names-example">6.2.&nbsp;Example</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#accessing-lowered-names-notes">6.3.&nbsp;Notes</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#host-template-interface">7.&nbsp;Interfacing With Template Host Code</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#host-template-interface-intro">7.1.&nbsp;Introduction</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#host-template-interface-example">7.2.&nbsp;Example</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#example-saxpy">A.&nbsp;Example: SAXPY</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#code-saxpy-cpp">A.1.&nbsp;Code (saxpy.cpp)</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#build-instruction">A.2.&nbsp;Build Instruction</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#example-lowered-name">B.&nbsp;Example: Using Lowered Name</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#code-lowered-name-cpp">B.1.&nbsp;Code (lowered-name.cpp)</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#lowered-name-build-instruction">B.2.&nbsp;Build Instruction</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#example-host-type-name">C.&nbsp;Example: Using nvrtcGetTypeName</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#code-host-type-name-cpp">C.1.&nbsp;Code (host-type-name.cpp)</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#host-type-name-build-instruction">C.2.&nbsp;Build Instruction</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#example-dynamic-parallelism">D.&nbsp;Example: Dynamic Parallelism</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#code-dynamic-parallelism-cpp">D.1.&nbsp;Code (dynamic-parallelism.cpp)</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#dynamic-parallelism-build-instruction">D.2.&nbsp;Build Instruction</a></div>
                     </li>
                  </ul>
               </li>
            </ul>
         </nav>
         <div id="resize-nav"></div>
         <nav id="search-results">
            <h2>Search Results</h2>
            <ol></ol>
         </nav>
         
         <div id="contents-container">
            <div id="breadcrumbs-container">
               <div id="release-info">NVRTC (Runtime Compilation)
                  (<a href="../../pdf/NVRTC_User_Guide.pdf">PDF</a>)
                  -
                   
                  
                  
                  v10.1.168
                  (<a href="https://developer.nvidia.com/cuda-toolkit-archive">older</a>)
                  -
                  Last updated April 24, 2019
                  -
                  <a href="mailto:CUDAIssues@nvidia.com?subject=CUDA Toolkit Documentation Feedback: NVRTC (Runtime Compilation)">Send Feedback</a></div>
            </div>
            <article id="contents">
               <div class="topic nested1" id="abstract"><a name="abstract" shape="rect">
                     <!-- --></a><h2 class="topictitle2">NVRTC</h2>
                  <div class="body conbody">
                     <p class="shortdesc">The User guide to NVRTC.</p>
                  </div>
               </div>
               <div class="topic concept nested0" id="introduction"><a name="introduction" shape="rect">
                     <!-- --></a><h2 class="topictitle2">1.&nbsp;Introduction</h2>
                  <div class="body conbody">
                     <p class="p">
                        NVRTC is a runtime compilation library for CUDA C++.
                        It accepts CUDA C++ source code in character string form and creates
                        handles that can be used to obtain the PTX.
                        The PTX string generated by NVRTC can be loaded by
                        <a class="xref" href="http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MODULE.html#group__CUDA__MODULE_1g04ce266ce03720f479eab76136b90c0b" target="_blank" shape="rect">cuModuleLoadData</a>
                        and
                        <a class="xref" href="http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MODULE.html#group__CUDA__MODULE_1g9e8047e9dbf725f0cd7cafd18bfd4d12" target="_blank" shape="rect">cuModuleLoadDataEx</a>,
                        and linked with other modules by
                        <a class="xref" href="http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MODULE.html#group__CUDA__MODULE_1g3ebcd2ccb772ba9c120937a2d2831b77" target="_blank" shape="rect">cuLinkAddData</a>
                        of the CUDA Driver API.
                        This facility can often provide optimizations and performance not
                        possible in a purely offline static compilation.
                        
                     </p>
                     <div class="p">
                        In the absence of NVRTC (or any runtime compilation support in CUDA),
                        users needed to spawn a separate process to execute nvcc at runtime if
                        they wished to implement runtime compilation in their applications or
                        libraries, and, unfortunately, this approach has the following
                        drawbacks:
                        
                        <ul class="ul">
                           <li class="li">
                              The compilation overhead tends to be higher than necessary, and
                              
                           </li>
                           <li class="li">
                              End users are required to install nvcc and related tools which make
                              it complicated to distribute applications that use runtime
                              compilation.
                              
                           </li>
                        </ul>
                     </div>
                     <p class="p">
                        NVRTC addresses these issues by providing a library interface that
                        eliminates overhead associated with spawning separate processes, disk
                        I/O, etc., while keeping application deployment simple.
                        
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" id="getting-started"><a name="getting-started" shape="rect">
                     <!-- --></a><h2 class="topictitle2">2.&nbsp;Getting Started</h2>
                  <div class="topic concept nested1" id="system-requirements"><a name="system-requirements" shape="rect">
                        <!-- --></a><h3 class="topictitle3">2.1.&nbsp;System Requirements</h3>
                     <div class="body conbody">
                        <div class="p">
                           NVRTC requires the following system configuration:
                           
                           <ul class="ul">
                              <li class="li">
                                 Operating System: Linux x86_64, Linux ppc64le, Linux aarch64,
                                 Windows x86_64, or Mac OS X.
                                 
                              </li>
                              <li class="li">GPU: Any GPU with CUDA Compute Capability 2.0 or higher.</li>
                              <li class="li">CUDA Toolkit and Driver.</li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="installation"><a name="installation" shape="rect">
                        <!-- --></a><h3 class="topictitle3">2.2.&nbsp;Installation</h3>
                     <div class="body conbody">
                        <div class="p">
                           NVRTC is part of the CUDA Toolkit release and the components are
                           organized as follows in the CUDA toolkit installation directory:
                           
                           <ul class="ul">
                              <li class="li">
                                 On Windows:
                                 
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">include\nvrtc.h</samp></li>
                                    <li class="li"><samp class="ph codeph">bin\nvrtc64_<span class="keyword">Major Release Version</span><span class="keyword">Minor Release Version</span>.dll</samp></li>
                                    <li class="li"><samp class="ph codeph">bin\nvrtc-builtins64_<span class="keyword">Major Release Version</span><span class="keyword">Minor Release Version</span>.dll</samp></li>
                                    <li class="li"><samp class="ph codeph">lib\x64\nvrtc.lib</samp></li>
                                    <li class="li"><samp class="ph codeph">doc\pdf\NVRTC_User_Guide.pdf</samp></li>
                                 </ul>
                              </li>
                              <li class="li">
                                 On Linux:
                                 
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">include/nvrtc.h</samp></li>
                                    <li class="li"><samp class="ph codeph">lib64/libnvrtc.so</samp></li>
                                    <li class="li"><samp class="ph codeph">lib64/libnvrtc.so.<span class="keyword">Major Release Version</span>.<span class="keyword">Minor Release Version</span></samp></li>
                                    <li class="li"><samp class="ph codeph">lib64/libnvrtc.so.<span class="keyword">Major Release Version</span>.<span class="keyword">Minor Release Version</span>.&lt;build version&gt;</samp></li>
                                    <li class="li"><samp class="ph codeph">lib64/libnvrtc-builtins.so</samp></li>
                                    <li class="li"><samp class="ph codeph">lib64/libnvrtc-builtins.so.<span class="keyword">Major Release Version</span>.<span class="keyword">Minor Release Version</span></samp></li>
                                    <li class="li"><samp class="ph codeph">lib64/libnvrtc-builtins.so.<span class="keyword">Major Release Version</span>.<span class="keyword">Minor Release Version</span>.&lt;build version&gt;</samp></li>
                                    <li class="li"><samp class="ph codeph">doc/pdf/NVRTC_User_Guide.pdf</samp></li>
                                 </ul>
                              </li>
                              <li class="li">
                                 On Mac OS X:
                                 
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">include/nvrtc.h</samp></li>
                                    <li class="li"><samp class="ph codeph">lib/libnvrtc.dylib</samp></li>
                                    <li class="li"><samp class="ph codeph">lib/libnvrtc.<span class="keyword">Major Release Version</span>.<span class="keyword">Minor Release Version</span>.dylib</samp></li>
                                    <li class="li"><samp class="ph codeph">lib/libnvrtc-builtins.dylib</samp></li>
                                    <li class="li"><samp class="ph codeph">lib/libnvrtc-builtins.<span class="keyword">Major Release Version</span>.<span class="keyword">Minor Release Version</span>.dylib</samp></li>
                                    <li class="li"><samp class="ph codeph">doc/pdf/NVRTC_User_Guide.pdf</samp></li>
                                 </ul>
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic reference nested0" id="modules"><a name="modules" shape="rect">
                     <!-- --></a><h2 class="topictitle2">3.&nbsp;User Interface</h2>
                  <div class="body refbody">
                     <div class="section">
                        <p class="p">
                           This chapter presents the API of NVRTC.
                           Basic usage of the API is explained in
                           <a class="xref" href="index.html#basic-usage" shape="rect">Basic Usage</a>.
                           Note that the API may change in the production release based on user
                           feedback.
                           
                        </p>
                     </div>
                     <div class="section">
                        <ul class="ul">
                           <li class="li cpp_specialisation"><a class="xref" href="index.html#group__error" shape="rect">Error Handling</a></li>
                           <li class="li cpp_specialisation"><a class="xref" href="index.html#group__query" shape="rect">General Information Query</a></li>
                           <li class="li cpp_specialisation"><a class="xref" href="index.html#group__compilation" shape="rect">Compilation</a></li>
                           <li class="li cpp_specialisation"><a class="xref" href="index.html#group__options" shape="rect">Supported Compile Options</a></li>
                           <li class="li cpp_specialisation"><a class="xref" href="index.html#group__hosthelper" shape="rect">Host Helper</a></li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic reference apiRef apiPackage cppModule" id="group__error"><a name="group__error" shape="rect">
                        <!-- --></a><h3 class="topictitle3 cppModule">3.1.&nbsp;Error Handling</h3>
                     <div class="section">
                        <p>NVRTC defines the following enumeration type and function for API call error handling. </p>
                     </div>
                     <h4 class="fake_sectiontitle member_header">Enumerations</h4>
                     <dl class="members">
                        <dt><span class="member_type">enum&nbsp;</span><span class="member_name"><a href="#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" shape="rect">nvrtcResult</a></span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call
                              result. </span></dd>
                     </dl>
                     <h4 class="fake_sectiontitle member_header">Functions</h4>
                     <dl class="members">
                        <dt><span class="member_type">const char*&nbsp;</span><span class="member_name"><a href="#group__error_1g6aee73d771252bb7ddae9dd6626463c7" shape="rect">nvrtcGetErrorString</a> (  <a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a><span>&nbsp;</span><span class="keyword keyword apiItemName">result</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcGetErrorString is a helper function that returns a string describing the given nvrtcResult code, e.g., NVRTC_SUCCESS
                              to <tt class="ph tt code">"NVRTC_SUCCESS"</tt>. For unrecognized enumeration values, it returns <tt class="ph tt code">"NVRTC_ERROR unknown"</tt>. </span></dd>
                     </dl>
                     <div class="description">
                        <h4 class="sectiontitle">Enumerations</h4>
                        <dl class="description">
                           <dt class="description"><a name="group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" id="group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" shape="rect">
                                 <!-- --></a><span>enum nvrtcResult</span></dt>
                           <dd class="description">
                              <div class="section">
                                 <p></p>
                              </div>
                              <div class="enum-members">
                                 <h6 class="enumerator_header">
                                    Values
                                    
                                 </h6>
                                 <dl class="enumerator">
                                    <dt><span class="enum-member-name-def">NVRTC_SUCCESS = <span class="ph ph apiData">0</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_OUT_OF_MEMORY = <span class="ph ph apiData">1</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_PROGRAM_CREATION_FAILURE = <span class="ph ph apiData">2</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_INVALID_INPUT = <span class="ph ph apiData">3</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_INVALID_PROGRAM = <span class="ph ph apiData">4</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_INVALID_OPTION = <span class="ph ph apiData">5</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_COMPILATION = <span class="ph ph apiData">6</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = <span class="ph ph apiData">7</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = <span class="ph ph apiData">8</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = <span class="ph ph apiData">9</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = <span class="ph ph apiData">10</span></span></dt>
                                    <dd></dd>
                                    <dt><span class="enum-member-name-def">NVRTC_ERROR_INTERNAL_ERROR = <span class="ph ph apiData">11</span></span></dt>
                                    <dd></dd>
                                 </dl>
                              </div>
                           </dd>
                        </dl>
                     </div>
                     <div class="description">
                        <h4 class="sectiontitle">Functions</h4>
                        <dl class="description">
                           <dt class="description"><a name="group__error_1g6aee73d771252bb7ddae9dd6626463c7" id="group__error_1g6aee73d771252bb7ddae9dd6626463c7" shape="rect">
                                 <!-- --></a><span>const char* nvrtcGetErrorString (  <a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a><span>&nbsp;</span><span class="keyword keyword apiItemName">result</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcGetErrorString is a helper function that returns a string describing the given nvrtcResult code, e.g., NVRTC_SUCCESS
                                 to <tt class="ph tt code">"NVRTC_SUCCESS"</tt>. For unrecognized enumeration values, it returns <tt class="ph tt code">"NVRTC_ERROR unknown"</tt>. 
                              </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">result</span></tt></dt>
                                    <dd>CUDA Runtime Compilation API result code. </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">Message string for the given <a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">nvrtcResult</a> code. 
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                        </dl>
                     </div>
                  </div>
                  <div class="topic reference apiRef apiPackage cppModule" id="group__query"><a name="group__query" shape="rect">
                        <!-- --></a><h3 class="topictitle3 cppModule">3.2.&nbsp;General Information Query</h3>
                     <div class="section">
                        <p>NVRTC defines the following function for general information query. </p>
                     </div>
                     <h4 class="fake_sectiontitle member_header">Functions</h4>
                     <dl class="members">
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__query_1g7492ae22a599fc3a7cd654915b91d790" shape="rect">nvrtcVersion</a> (  int*<span>&nbsp;</span><span class="keyword keyword apiItemName">major</span>, int*<span>&nbsp;</span><span class="keyword keyword apiItemName">minor</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcVersion sets the output parameters <tt class="ph tt code">major</tt> and <tt class="ph tt code">minor</tt> with the CUDA Runtime Compilation version number. </span></dd>
                     </dl>
                     <div class="description">
                        <h4 class="sectiontitle">Functions</h4>
                        <dl class="description">
                           <dt class="description"><a name="group__query_1g7492ae22a599fc3a7cd654915b91d790" id="group__query_1g7492ae22a599fc3a7cd654915b91d790" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcVersion (  int*<span>&nbsp;</span><span class="keyword keyword apiItemName">major</span>, int*<span>&nbsp;</span><span class="keyword keyword apiItemName">minor</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcVersion sets the output parameters <tt class="ph tt code">major</tt> and <tt class="ph tt code">minor</tt> with the CUDA Runtime Compilation version number. 
                              </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">major</span></tt></dt>
                                    <dd>CUDA Runtime Compilation major version number. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">minor</span></tt></dt>
                                    <dd>CUDA Runtime Compilation minor version number. </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_INPUT</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                        </dl>
                     </div>
                  </div>
                  <div class="topic reference apiRef apiPackage cppModule" id="group__compilation"><a name="group__compilation" shape="rect">
                        <!-- --></a><h3 class="topictitle3 cppModule">3.3.&nbsp;Compilation</h3>
                     <div class="section">
                        <p>NVRTC defines the following type and functions for actual compilation. </p>
                     </div>
                     <h4 class="fake_sectiontitle member_header">Typedefs</h4>
                     <dl class="members">
                        <dt><span class="member_type">typedef _nvrtcProgram * &nbsp;</span><span class="member_name"><a href="#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" shape="rect">nvrtcProgram</a></span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcProgram is the unit of compilation, and an opaque handle for a program. </span></dd>
                     </dl>
                     <h4 class="fake_sectiontitle member_header">Functions</h4>
                     <dl class="members">
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__compilation_1g0b07e4173b28a10682f21edc7140844e" shape="rect">nvrtcAddNameExpression</a> (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, const char*<span>&nbsp;</span><span class="keyword keyword apiItemName">name_expression</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcAddNameExpression notes the given name expression denoting the address of a __global__ function or __device__/__constant__
                              variable. </span></dd>
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__compilation_1g1f3136029db1413e362154b567297e8b" shape="rect">nvrtcCompileProgram</a> (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, int <span>&nbsp;</span><span class="keyword keyword apiItemName">numOptions</span>, const char**<span>&nbsp;</span><span class="keyword keyword apiItemName">options</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcCompileProgram compiles the given program. </span></dd>
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__compilation_1g9ae65f68911d1cf0adda2af4ad8cb458" shape="rect">nvrtcCreateProgram</a> (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a>*<span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, const char*<span>&nbsp;</span><span class="keyword keyword apiItemName">src</span>, const char*<span>&nbsp;</span><span class="keyword keyword apiItemName">name</span>, int <span>&nbsp;</span><span class="keyword keyword apiItemName">numHeaders</span>, const char**<span>&nbsp;</span><span class="keyword keyword apiItemName">headers</span>, const char**<span>&nbsp;</span><span class="keyword keyword apiItemName">includeNames</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcCreateProgram creates an instance of nvrtcProgram with the given input parameters, and sets the output parameter <tt class="ph tt code">prog</tt> with it. </span></dd>
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__compilation_1gaa237c59615b7d4f48d5b308b5c9b140" shape="rect">nvrtcDestroyProgram</a> (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a>*<span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcDestroyProgram destroys the given program. </span></dd>
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__compilation_1g2543158bd0e25649254a9d86a81e4f50" shape="rect">nvrtcGetLoweredName</a> (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, const char*<span>&nbsp;</span><span class="keyword keyword apiItemName">name_expression</span>, const char**<span>&nbsp;</span><span class="keyword keyword apiItemName">lowered_name</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcGetLoweredName extracts the lowered (mangled) name for a __global__ function or __device__/__constant__ variable, and
                              updates *lowered_name to point to it. The memory containing the name is released when the NVRTC program is destroyed by nvrtcDestroyProgram.
                              The identical name expression must have been previously provided to nvrtcAddNameExpression. </span></dd>
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__compilation_1gc9a66bbbd47c256f4a8955517b3965da" shape="rect">nvrtcGetPTX</a> (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, char*<span>&nbsp;</span><span class="keyword keyword apiItemName">ptx</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcGetPTX stores the PTX generated by the previous compilation of <tt class="ph tt code">prog</tt> in the memory pointed by <tt class="ph tt code">ptx</tt>. </span></dd>
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__compilation_1gc622d6ffb6fff71e209407da19612c1a" shape="rect">nvrtcGetPTXSize</a> (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, size_t*<span>&nbsp;</span><span class="keyword keyword apiItemName">ptxSizeRet</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcGetPTXSize sets <tt class="ph tt code">ptxSizeRet</tt> with the size of the PTX generated by the previous compilation of <tt class="ph tt code">prog</tt> (including the trailing <tt class="ph tt code">NULL</tt>). </span></dd>
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__compilation_1g74c550e5cab81efbd59e4f72579edbd1" shape="rect">nvrtcGetProgramLog</a> (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, char*<span>&nbsp;</span><span class="keyword keyword apiItemName">log</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcGetProgramLog stores the log generated by the previous compilation of <tt class="ph tt code">prog</tt> in the memory pointed by <tt class="ph tt code">log</tt>. </span></dd>
                        <dt><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__compilation_1g59944bb118095ab53eec8994d056a18d" shape="rect">nvrtcGetProgramLogSize</a> (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, size_t*<span>&nbsp;</span><span class="keyword keyword apiItemName">logSizeRet</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcGetProgramLogSize sets <tt class="ph tt code">logSizeRet</tt> with the size of the log generated by the previous compilation of <tt class="ph tt code">prog</tt> (including the trailing <tt class="ph tt code">NULL</tt>). </span></dd>
                     </dl>
                     <div class="description">
                        <h4 class="sectiontitle">Typedefs</h4>
                        <dl class="description">
                           <dt class="description"><a name="group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" id="group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" shape="rect">
                                 <!-- --></a><span>typedef _nvrtcProgram *  nvrtcProgram</span></dt>
                           <dd class="description">
                              <div class="section">
                                 <p>nvrtcProgram is the unit of compilation, and an opaque handle for a program.  To compile a CUDA program string, an instance
                                    of nvrtcProgram must be created first with <a class="xref" href="index.html#group__compilation_1g9ae65f68911d1cf0adda2af4ad8cb458" title="nvrtcCreateProgram creates an instance of nvrtcProgram with the given input parameters, and sets the output parameter prog with it." shape="rect">nvrtcCreateProgram</a>, then compiled with <a class="xref" href="index.html#group__compilation_1g1f3136029db1413e362154b567297e8b" title="nvrtcCompileProgram compiles the given program." shape="rect">nvrtcCompileProgram</a>. 
                                 </p>
                              </div>
                           </dd>
                        </dl>
                     </div>
                     <div class="description">
                        <h4 class="sectiontitle">Functions</h4>
                        <dl class="description">
                           <dt class="description"><a name="group__compilation_1g0b07e4173b28a10682f21edc7140844e" id="group__compilation_1g0b07e4173b28a10682f21edc7140844e" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcAddNameExpression (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, const char*<span>&nbsp;</span><span class="keyword keyword apiItemName">name_expression</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcAddNameExpression notes the given name expression denoting the address of a __global__ function or __device__/__constant__
                                 variable. 
                              </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">prog</span></tt></dt>
                                    <dd>CUDA Runtime Compilation program. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">name_expression</span></tt></dt>
                                    <dd>constant expression denoting the address of a __global__ function or __device__/__constant__ variable. </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p>The identical name expression string must be provided on a subsequent call to nvrtcGetLoweredName to extract the lowered name.
                                    
                                    
                                 </p>
                                 <p class="p"></p>
                                 <p class="p apiDesc_subtitle"><strong class="ph b">See also:</strong></p>
                                 <p class="p see_subsection"><a class="xref" href="index.html#group__compilation_1g2543158bd0e25649254a9d86a81e4f50" title="nvrtcGetLoweredName extracts the lowered (mangled) name for a __global__ function or __device__/__constant__ variable, and updates *lowered_name to point to it. The memory containing the name is released when the NVRTC program is destroyed by nvrtcDestroyProgram. The identical name expression must have been previously provided to nvrtcAddNameExpression." shape="rect">nvrtcGetLoweredName</a></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                           <dt class="description"><a name="group__compilation_1g1f3136029db1413e362154b567297e8b" id="group__compilation_1g1f3136029db1413e362154b567297e8b" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcCompileProgram (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, int <span>&nbsp;</span><span class="keyword keyword apiItemName">numOptions</span>, const char**<span>&nbsp;</span><span class="keyword keyword apiItemName">options</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcCompileProgram compiles the given program. </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p>It supports compile options listed in <a class="xref" href="index.html#group__options" shape="rect">Supported Compile Options</a>. 
                                 </p>
                              </div>
                           </dd>
                           <dt class="description"><a name="group__compilation_1g9ae65f68911d1cf0adda2af4ad8cb458" id="group__compilation_1g9ae65f68911d1cf0adda2af4ad8cb458" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcCreateProgram (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a>*<span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, const char*<span>&nbsp;</span><span class="keyword keyword apiItemName">src</span>, const char*<span>&nbsp;</span><span class="keyword keyword apiItemName">name</span>, int <span>&nbsp;</span><span class="keyword keyword apiItemName">numHeaders</span>, const char**<span>&nbsp;</span><span class="keyword keyword apiItemName">headers</span>, const char**<span>&nbsp;</span><span class="keyword keyword apiItemName">includeNames</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcCreateProgram creates an instance of nvrtcProgram with the given input parameters, and sets the output parameter <tt class="ph tt code">prog</tt> with it. 
                              </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">prog</span></tt></dt>
                                    <dd>CUDA Runtime Compilation program. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">src</span></tt></dt>
                                    <dd>CUDA program source. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">name</span></tt></dt>
                                    <dd>CUDA program name.
                                       <tt class="ph tt code">name</tt> can be <tt class="ph tt code">NULL</tt>; <tt class="ph tt code">"default_program"</tt> is used when <tt class="ph tt code">name</tt> is <tt class="ph tt code">NULL</tt>. 
                                    </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">numHeaders</span></tt></dt>
                                    <dd>Number of headers used.
                                       <tt class="ph tt code">numHeaders</tt> must be greater than or equal to 0. 
                                    </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">headers</span></tt></dt>
                                    <dd>Sources of the headers.
                                       <tt class="ph tt code">headers</tt> can be <tt class="ph tt code">NULL</tt> when <tt class="ph tt code">numHeaders</tt> is 0. 
                                    </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">includeNames</span></tt></dt>
                                    <dd>Name of each header by which they can be included in the CUDA program source.
                                       <tt class="ph tt code">includeNames</tt> can be <tt class="ph tt code">NULL</tt> when <tt class="ph tt code">numHeaders</tt> is 0. 
                                    </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_OUT_OF_MEMORY</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_PROGRAM_CREATION_FAILURE</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_INPUT</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_PROGRAM</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p></p>
                                 <p class="p"></p>
                                 <p class="p apiDesc_subtitle"><strong class="ph b">See also:</strong></p>
                                 <p class="p see_subsection"><a class="xref" href="index.html#group__compilation_1gaa237c59615b7d4f48d5b308b5c9b140" title="nvrtcDestroyProgram destroys the given program." shape="rect">nvrtcDestroyProgram</a></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                           <dt class="description"><a name="group__compilation_1gaa237c59615b7d4f48d5b308b5c9b140" id="group__compilation_1gaa237c59615b7d4f48d5b308b5c9b140" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcDestroyProgram (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a>*<span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcDestroyProgram destroys the given program. </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">prog</span></tt></dt>
                                    <dd>CUDA Runtime Compilation program. </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_PROGRAM</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p></p>
                                 <p class="p"></p>
                                 <p class="p apiDesc_subtitle"><strong class="ph b">See also:</strong></p>
                                 <p class="p see_subsection"><a class="xref" href="index.html#group__compilation_1g9ae65f68911d1cf0adda2af4ad8cb458" title="nvrtcCreateProgram creates an instance of nvrtcProgram with the given input parameters, and sets the output parameter prog with it." shape="rect">nvrtcCreateProgram</a></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                           <dt class="description"><a name="group__compilation_1g2543158bd0e25649254a9d86a81e4f50" id="group__compilation_1g2543158bd0e25649254a9d86a81e4f50" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcGetLoweredName (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, const char*<span>&nbsp;</span><span class="keyword keyword apiItemName">name_expression</span>, const char**<span>&nbsp;</span><span class="keyword keyword apiItemName">lowered_name</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcGetLoweredName extracts the lowered (mangled) name for a __global__ function or __device__/__constant__ variable, and
                                 updates *lowered_name to point to it. The memory containing the name is released when the NVRTC program is destroyed by nvrtcDestroyProgram.
                                 The identical name expression must have been previously provided to nvrtcAddNameExpression. 
                              </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">prog</span></tt></dt>
                                    <dd>CUDA Runtime Compilation program. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">name_expression</span></tt></dt>
                                    <dd>constant expression denoting the address of a __global__ function or __device__/__constant__ variable. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">lowered_name</span></tt></dt>
                                    <dd>initialized by the function to point to a C string containing the lowered (mangled) name corresponding to the provided name
                                       expression. 
                                    </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p></p>
                                 <p class="p"></p>
                                 <p class="p apiDesc_subtitle"><strong class="ph b">See also:</strong></p>
                                 <p class="p see_subsection"><a class="xref" href="index.html#group__compilation_1g0b07e4173b28a10682f21edc7140844e" title="nvrtcAddNameExpression notes the given name expression denoting the address of a __global__ function or __device__/__constant__ variable." shape="rect">nvrtcAddNameExpression</a></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                           <dt class="description"><a name="group__compilation_1gc9a66bbbd47c256f4a8955517b3965da" id="group__compilation_1gc9a66bbbd47c256f4a8955517b3965da" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcGetPTX (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, char*<span>&nbsp;</span><span class="keyword keyword apiItemName">ptx</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcGetPTX stores the PTX generated by the previous compilation of <tt class="ph tt code">prog</tt> in the memory pointed by <tt class="ph tt code">ptx</tt>. 
                              </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">prog</span></tt></dt>
                                    <dd>CUDA Runtime Compilation program. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">ptx</span></tt></dt>
                                    <dd>Compiled result. </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_INPUT</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_PROGRAM</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p></p>
                                 <p class="p"></p>
                                 <p class="p apiDesc_subtitle"><strong class="ph b">See also:</strong></p>
                                 <p class="p see_subsection"><a class="xref" href="index.html#group__compilation_1gc622d6ffb6fff71e209407da19612c1a" title="nvrtcGetPTXSize sets ptxSizeRet with the size of the PTX generated by the previous compilation of prog (including the trailing NULL)." shape="rect">nvrtcGetPTXSize</a></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                           <dt class="description"><a name="group__compilation_1gc622d6ffb6fff71e209407da19612c1a" id="group__compilation_1gc622d6ffb6fff71e209407da19612c1a" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcGetPTXSize (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, size_t*<span>&nbsp;</span><span class="keyword keyword apiItemName">ptxSizeRet</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcGetPTXSize sets <tt class="ph tt code">ptxSizeRet</tt> with the size of the PTX generated by the previous compilation of <tt class="ph tt code">prog</tt> (including the trailing <tt class="ph tt code">NULL</tt>). 
                              </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">prog</span></tt></dt>
                                    <dd>CUDA Runtime Compilation program. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">ptxSizeRet</span></tt></dt>
                                    <dd>Size of the generated PTX (including the trailing <tt class="ph tt code">NULL</tt>). 
                                    </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_INPUT</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_PROGRAM</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p></p>
                                 <p class="p"></p>
                                 <p class="p apiDesc_subtitle"><strong class="ph b">See also:</strong></p>
                                 <p class="p see_subsection"><a class="xref" href="index.html#group__compilation_1gc9a66bbbd47c256f4a8955517b3965da" title="nvrtcGetPTX stores the PTX generated by the previous compilation of prog in the memory pointed by ptx." shape="rect">nvrtcGetPTX</a></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                           <dt class="description"><a name="group__compilation_1g74c550e5cab81efbd59e4f72579edbd1" id="group__compilation_1g74c550e5cab81efbd59e4f72579edbd1" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcGetProgramLog (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, char*<span>&nbsp;</span><span class="keyword keyword apiItemName">log</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcGetProgramLog stores the log generated by the previous compilation of <tt class="ph tt code">prog</tt> in the memory pointed by <tt class="ph tt code">log</tt>. 
                              </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">prog</span></tt></dt>
                                    <dd>CUDA Runtime Compilation program. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">log</span></tt></dt>
                                    <dd>Compilation log. </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_INPUT</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_PROGRAM</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p></p>
                                 <p class="p"></p>
                                 <p class="p apiDesc_subtitle"><strong class="ph b">See also:</strong></p>
                                 <p class="p see_subsection"><a class="xref" href="index.html#group__compilation_1g59944bb118095ab53eec8994d056a18d" title="nvrtcGetProgramLogSize sets logSizeRet with the size of the log generated by the previous compilation of prog (including the trailing NULL)." shape="rect">nvrtcGetProgramLogSize</a></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                           <dt class="description"><a name="group__compilation_1g59944bb118095ab53eec8994d056a18d" id="group__compilation_1g59944bb118095ab53eec8994d056a18d" shape="rect">
                                 <!-- --></a><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcGetProgramLogSize (  <a href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program. " shape="rect">nvrtcProgram</a><span>&nbsp;</span><span class="keyword keyword apiItemName">prog</span>, size_t*<span>&nbsp;</span><span class="keyword keyword apiItemName">logSizeRet</span> ) </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcGetProgramLogSize sets <tt class="ph tt code">logSizeRet</tt> with the size of the log generated by the previous compilation of <tt class="ph tt code">prog</tt> (including the trailing <tt class="ph tt code">NULL</tt>). 
                              </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">prog</span></tt></dt>
                                    <dd>CUDA Runtime Compilation program. </dd>
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">logSizeRet</span></tt></dt>
                                    <dd>Size of the compilation log (including the trailing <tt class="ph tt code">NULL</tt>). 
                                    </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_INPUT</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INVALID_PROGRAM</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p>Note that compilation log may be generated with warnings and informative messages, even when the compilation of <tt class="ph tt code">prog</tt> succeeds.
                                 </p>
                                 <p class="p"></p>
                                 <p class="p"></p>
                                 <p class="p apiDesc_subtitle"><strong class="ph b">See also:</strong></p>
                                 <p class="p see_subsection"><a class="xref" href="index.html#group__compilation_1g74c550e5cab81efbd59e4f72579edbd1" title="nvrtcGetProgramLog stores the log generated by the previous compilation of prog in the memory pointed by log." shape="rect">nvrtcGetProgramLog</a></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                        </dl>
                     </div>
                  </div>
                  <div class="topic reference apiRef apiPackage cppModule" id="group__options"><a name="group__options" shape="rect">
                        <!-- --></a><h3 class="topictitle3 cppModule">3.4.&nbsp;Supported Compile Options</h3>
                     <div class="section">
                        <p>NVRTC supports the compile options below. Option names with two preceding dashs (<tt class="ph tt code">--</tt>) are long option names and option names with one preceding dash (<tt class="ph tt code">-</tt>) are short option names. Short option names can be used instead of long option names. When a compile option takes an argument,
                           an assignment operator (<tt class="ph tt code">=</tt>) is used to separate the compile option argument from the compile option name, e.g., <tt class="ph tt code">"--gpu-architecture=compute_30"</tt>. Alternatively, the compile option name and the argument can be specified in separate strings without an assignment operator,
                           .e.g, <tt class="ph tt code">"--gpu-architecture"</tt><tt class="ph tt code">"compute_30"</tt>. Single-character short option names, such as <tt class="ph tt code">-D</tt>, <tt class="ph tt code">-U</tt>, and <tt class="ph tt code">-I</tt>, do not require an assignment operator, and the compile option name and the argument can be present in the same string with
                           or without spaces between them. For instance, <tt class="ph tt code">"-D=&lt;def&gt;"</tt>, <tt class="ph tt code">"-D&lt;def&gt;"</tt>, and <tt class="ph tt code">"-D &lt;def&gt;"</tt> are all supported.
                        </p>
                        <p class="p">The valid compiler options are:</p>
                        <p class="p">
                           <ul class="ul">
                              <li class="li">
                                 <div class="p">Compilation targets
                                    <ul class="ul">
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--gpu-architecture=&lt;arch&gt;</tt> (<tt class="ph tt code">-arch</tt>)
                                          </p>
                                          <p class="p">
                                             Specify the name of the class of GPU architectures for which the input must be compiled.
                                          </p>
                                          <div class="p">
                                             <ul class="ul">
                                                <li class="li">
                                                   <div class="p">Valid <tt class="ph tt code">&lt;arch&gt;</tt>s:
                                                      <ul class="ul">
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_30</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_32</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_35</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_37</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_50</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_52</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_53</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_60</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_61</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_62</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_70</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_72</tt></p>
                                                         </li>
                                                         <li class="li">
                                                            <p class="p"><tt class="ph tt code">compute_75</tt></p>
                                                         </li>
                                                      </ul>
                                                   </div>
                                                </li>
                                                <li class="li">
                                                   <p class="p">Default: <tt class="ph tt code">compute_30</tt></p>
                                                </li>
                                             </ul>
                                          </div>
                                       </li>
                                    </ul>
                                 </div>
                              </li>
                              <li class="li">
                                 <div class="p">Separate compilation / whole-program compilation
                                    <ul class="ul">
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--device-c</tt> (<tt class="ph tt code">-dc</tt>)
                                          </p>
                                          <p class="p">
                                             Generate relocatable code that can be linked with other relocatable device code. It is equivalent to --relocatable-device-code=true.
                                          </p>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--device-w</tt> (<tt class="ph tt code">-dw</tt>)
                                          </p>
                                          <p class="p">
                                             Generate non-relocatable code. It is equivalent to <tt class="ph tt code">--relocatable-device-code=false</tt>.
                                          </p>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--relocatable-device-code={true|false}</tt> (<tt class="ph tt code">-rdc</tt>)
                                          </p>
                                          <div class="p">
                                             Enable (disable) the generation of relocatable device code.
                                             <ul class="ul">
                                                <li class="li">
                                                   <p class="p">Default: <tt class="ph tt code">false</tt></p>
                                                </li>
                                             </ul>
                                          </div>
                                       </li>
                                    </ul>
                                 </div>
                              </li>
                              <li class="li">
                                 <div class="p">Debugging support
                                    <ul class="ul">
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--device-debug</tt> (<tt class="ph tt code">-G</tt>)
                                          </p>
                                          <p class="p">
                                             Generate debug information.
                                          </p>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--generate-line-info</tt> (<tt class="ph tt code">-lineinfo</tt>)
                                          </p>
                                          <p class="p">
                                             Generate line-number information.
                                          </p>
                                       </li>
                                    </ul>
                                 </div>
                              </li>
                              <li class="li">
                                 <div class="p">Code generation
                                    <ul class="ul">
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--maxrregcount=&lt;N&gt;</tt> (<tt class="ph tt code">-maxrregcount</tt>)
                                          </p>
                                          <p class="p">
                                             Specify the maximum amount of registers that GPU functions can use. Until a function-specific limit, a higher value will generally
                                             increase the performance of individual GPU threads that execute this function. However, because thread registers are allocated
                                             from a global register pool on each GPU, a higher value of this option will also reduce the maximum thread block size, thereby
                                             reducing the amount of thread parallelism. Hence, a good maxrregcount value is the result of a trade-off. If this option is
                                             not specified, then no maximum is assumed. Value less than the minimum registers required by ABI will be bumped up by the
                                             compiler to ABI minimum limit.
                                          </p>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--ftz={true|false}</tt> (<tt class="ph tt code">-ftz</tt>)
                                          </p>
                                          <div class="p">
                                             When performing single-precision floating-point operations, flush denormal values to zero or preserve denormal values. <tt class="ph tt code">--use_fast_math</tt> implies <tt class="ph tt code">--ftz=true</tt>.
                                             <ul class="ul">
                                                <li class="li">
                                                   <p class="p">Default: <tt class="ph tt code">false</tt></p>
                                                </li>
                                             </ul>
                                          </div>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--prec-sqrt={true|false}</tt> (<tt class="ph tt code">-prec-sqrt</tt>)
                                          </p>
                                          <div class="p">
                                             For single-precision floating-point square root, use IEEE round-to-nearest mode or use a faster approximation. <tt class="ph tt code">--use_fast_math</tt> implies <tt class="ph tt code">--prec-sqrt=false</tt>.
                                             <ul class="ul">
                                                <li class="li">
                                                   <p class="p">Default: <tt class="ph tt code">true</tt></p>
                                                </li>
                                             </ul>
                                          </div>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--prec-div={true|false}</tt> (<tt class="ph tt code">-prec-div</tt>)
                                          </p>
                                          <div class="p">
                                             For single-precision floating-point division and reciprocals, use IEEE round-to-nearest mode or use a faster approximation.
                                             <tt class="ph tt code">--use_fast_math</tt> implies <tt class="ph tt code">--prec-div=false</tt>.
                                             <ul class="ul">
                                                <li class="li">
                                                   <p class="p">Default: <tt class="ph tt code">true</tt></p>
                                                </li>
                                             </ul>
                                          </div>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--fmad={true|false}</tt> (<tt class="ph tt code">-fmad</tt>)
                                          </p>
                                          <div class="p">
                                             Enables (disables) the contraction of floating-point multiplies and adds/subtracts into floating-point multiply-add operations
                                             (FMAD, FFMA, or DFMA). <tt class="ph tt code">--use_fast_math</tt> implies <tt class="ph tt code">--fmad=true</tt>.
                                             <ul class="ul">
                                                <li class="li">
                                                   <p class="p">Default: <tt class="ph tt code">true</tt></p>
                                                </li>
                                             </ul>
                                          </div>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--use_fast_math</tt> (<tt class="ph tt code">-use_fast_math</tt>)
                                          </p>
                                          <p class="p">
                                             Make use of fast math operations. <tt class="ph tt code">--use_fast_math</tt> implies <tt class="ph tt code">--ftz=true</tt><tt class="ph tt code">--prec-div=false</tt><tt class="ph tt code">--prec-sqrt=false</tt><tt class="ph tt code">--fmad=true</tt>.
                                          </p>
                                       </li>
                                    </ul>
                                 </div>
                              </li>
                              <li class="li">
                                 <div class="p">Preprocessing
                                    <ul class="ul">
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--define-macro=&lt;def&gt;</tt> (<tt class="ph tt code">-D</tt>)
                                          </p>
                                          <div class="p"><tt class="ph tt code">&lt;def&gt;</tt> can be either <tt class="ph tt code">&lt;name&gt;</tt> or <tt class="ph tt code">&lt;name=definitions&gt;</tt>.
                                             <ul class="ul">
                                                <li class="li">
                                                   <p class="p"><tt class="ph tt code">&lt;name&gt;</tt></p>
                                                   <p class="p">
                                                      Predefine <tt class="ph tt code">&lt;name&gt;</tt> as a macro with definition <tt class="ph tt code">1</tt>.
                                                   </p>
                                                </li>
                                                <li class="li">
                                                   <p class="p"><tt class="ph tt code">&lt;name&gt;=&lt;definition&gt;</tt></p>
                                                   <p class="p">
                                                      The contents of <tt class="ph tt code">&lt;definition&gt;</tt> are tokenized and preprocessed as if they appeared during translation phase three in a <tt class="ph tt code">#define</tt> directive. In particular, the definition will be truncated by embedded new line characters.
                                                   </p>
                                                </li>
                                             </ul>
                                          </div>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--undefine-macro=&lt;def&gt;</tt> (<tt class="ph tt code">-U</tt>)
                                          </p>
                                          <p class="p">
                                             Cancel any previous definition of <tt class="ph tt code">&lt;def&gt;</tt>.
                                          </p>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--include-path=&lt;dir&gt;</tt> (<tt class="ph tt code">-I</tt>)
                                          </p>
                                          <p class="p">
                                             Add the directory <tt class="ph tt code">&lt;dir&gt;</tt> to the list of directories to be searched for headers. These paths are searched after the list of headers given to <a class="xref" href="index.html#group__compilation_1g9ae65f68911d1cf0adda2af4ad8cb458" title="nvrtcCreateProgram creates an instance of nvrtcProgram with the given input parameters, and sets the output parameter prog with it." shape="rect">nvrtcCreateProgram</a>.
                                          </p>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--pre-include=&lt;header&gt;</tt> (<tt class="ph tt code">-include</tt>)
                                          </p>
                                          <p class="p">
                                             Preinclude <tt class="ph tt code">&lt;header&gt;</tt> during preprocessing.
                                          </p>
                                       </li>
                                    </ul>
                                 </div>
                              </li>
                              <li class="li">
                                 <div class="p">Language Dialect
                                    <ul class="ul">
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--std={c++11|c++14}</tt> (<tt class="ph tt code">-std={c++11|c++14}</tt>)
                                          </p>
                                          <p class="p">
                                             Set language dialect to C++11 or C++14.
                                          </p>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--builtin-move-forward={true|false}</tt> (<tt class="ph tt code">-builtin-move-forward</tt>)
                                          </p>
                                          <div class="p">
                                             Provide builtin definitions of <tt class="ph tt code">std::move</tt> and <tt class="ph tt code">std::forward</tt>, when C++11 language dialect is selected.
                                             <ul class="ul">
                                                <li class="li">
                                                   <p class="p">Default: <tt class="ph tt code">true</tt></p>
                                                </li>
                                             </ul>
                                          </div>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--builtin-initializer-list={true|false}</tt> (<tt class="ph tt code">-builtin-initializer-list</tt>)
                                          </p>
                                          <div class="p">
                                             Provide builtin definitions of <tt class="ph tt code">std::initializer_list</tt> class and member functions when C++11 language dialect is selected.
                                             <ul class="ul">
                                                <li class="li">
                                                   <p class="p">Default: <tt class="ph tt code">true</tt></p>
                                                </li>
                                             </ul>
                                          </div>
                                       </li>
                                    </ul>
                                 </div>
                              </li>
                              <li class="li">
                                 <div class="p">Misc.
                                    <ul class="ul">
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--disable-warnings</tt> (<tt class="ph tt code">-w</tt>)
                                          </p>
                                          <p class="p">
                                             Inhibit all warning messages.
                                          </p>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--restrict</tt> (<tt class="ph tt code">-restrict</tt>)
                                          </p>
                                          <p class="p">
                                             Programmer assertion that all kernel pointer parameters are restrict pointers.
                                          </p>
                                       </li>
                                       <li class="li">
                                          <p class="p"><tt class="ph tt code">--device-as-default-execution-space</tt> (<tt class="ph tt code">-default-device</tt>)
                                          </p>
                                          <p class="p">
                                             Treat entities with no execution space annotation as <tt class="ph tt code">__device__</tt> entities.
                                          </p>
                                       </li>
                                    </ul>
                                 </div>
                              </li>
                           </ul>
                        </p>
                        <p class="p"></p>
                        <p class="p"></p>
                     </div>
                  </div>
                  <div class="topic reference apiRef apiPackage cppModule" id="group__hosthelper"><a name="group__hosthelper" shape="rect">
                        <!-- --></a><h3 class="topictitle3 cppModule">3.5.&nbsp;Host Helper</h3>
                     <div class="section">
                        <p>NVRTC defines the following functions for easier interaction with host code. </p>
                     </div>
                     <h4 class="fake_sectiontitle member_header">Functions</h4>
                     <dl class="members">
                        <dt><span class="template">template &lt; typename T &gt;</span><span class="member_type"><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a>&nbsp;</span><span class="member_name"><a href="#group__hosthelper_1g04c2aa82d9a33bdfe1749da1a1bce2e9" shape="rect">nvrtcGetTypeName</a> (  std::​string*<span>&nbsp;</span><span class="keyword keyword apiItemName">result</span> ) </span></dt>
                        <dd class="shortdesc"><span></span><span class="desc">nvrtcGetTypeName stores the source level name of the template type argument T in the given std::string location. </span></dd>
                     </dl>
                     <div class="description">
                        <h4 class="sectiontitle">Functions</h4>
                        <dl class="description">
                           <dt class="description"><a name="group__hosthelper_1g04c2aa82d9a33bdfe1749da1a1bce2e9" id="group__hosthelper_1g04c2aa82d9a33bdfe1749da1a1bce2e9" shape="rect">
                                 <!-- --></a><p class="template">template &lt; typename T &gt;</p><span><a href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result. " shape="rect">nvrtcResult</a> nvrtcGetTypeName (  std::​string*<span>&nbsp;</span><span class="keyword keyword apiItemName">result</span> )  [inline] </span></dt>
                           <dd class="description">
                              <div class="section">nvrtcGetTypeName stores the source level name of the template type argument T in the given std::string location. </div>
                              <div class="section">
                                 <h6 class="parameter_header">
                                    Parameters
                                    
                                 </h6>
                                 <dl class="table-display-params">
                                    <dt><tt class="code"><span class="keyword keyword apiItemName">result</span></tt></dt>
                                    <dd>pointer to std::string in which to store the type name. </dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h6 class="return_header">Returns</h6>
                                 <p class="return">
                                    <ul>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_SUCCESS</a></li>
                                       <li><a class="xref" href="index.html#group__error_1g31e41ef222c0ea75b4c48f715b3cd9f0" title="The enumerated type nvrtcResult defines API call result codes. NVRTC API functions return nvrtcResult to indicate the call result." shape="rect">NVRTC_ERROR_INTERNAL_ERROR</a></li>
                                    </ul>
                                 </p>
                              </div>
                              <div class="section">
                                 <h6 class="description_header">Description</h6>
                                 <p>This function is only provided when the macro NVRTC_GET_TYPE_NAME is defined with a non-zero value. It uses abi::__cxa_demangle
                                    or UnDecorateSymbolName function calls to extract the type name, when using gcc/clang or cl.exe compilers, respectively. If
                                    the name extraction fails, it will return NVRTC_INTERNAL_ERROR, otherwise *result is initialized with the extracted name.
                                 </p>
                                 <p class="p"></p>
                                 <p class="p"></p>
                              </div>
                           </dd>
                        </dl>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="language"><a name="language" shape="rect">
                     <!-- --></a><h2 class="topictitle2">4.&nbsp;Language</h2>
                  <div class="body conbody">
                     <p class="p">
                        Unlike the offline nvcc compiler, NVRTC is meant for compiling only device
                        CUDA C++ code.
                        It does not accept host code or host compiler extensions in the input
                        code, unless otherwise noted.
                        
                     </p>
                  </div>
                  <div class="topic concept nested1" id="execution-space"><a name="execution-space" shape="rect">
                        <!-- --></a><h3 class="topictitle3">4.1.&nbsp;Execution Space</h3>
                     <div class="body conbody">
                        <p class="p">
                           NVRTC uses <samp class="ph codeph">__host__</samp> as the default execution space, and
                           it generates an error if it encounters any host code in the input.
                           That is, if the input contains entities with explicit
                           <samp class="ph codeph">__host__</samp> annotations or no execution space annotation,
                           NVRTC will emit an error.
                           <samp class="ph codeph">__host__ __device__</samp> functions are treated as device
                           functions.
                           
                        </p>
                        <p class="p">
                           NVRTC provides a compile option,
                           <samp class="ph codeph"><a class="xref" href="index.html#group__options" shape="rect">--device-as-default-execution-space</a></samp>, that enables an
                           alternative compilation mode, in which entities with no execution space
                           annotations are treated as <samp class="ph codeph">__device__</samp> entities.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="separate-compilation"><a name="separate-compilation" shape="rect">
                        <!-- --></a><h3 class="topictitle3">4.2.&nbsp;Separate Compilation</h3>
                     <div class="body conbody">
                        <p class="p">
                           NVRTC itself does not provide any linker.
                           Users can, however, use
                           <a class="xref" href="http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MODULE.html#group__CUDA__MODULE_1g3ebcd2ccb772ba9c120937a2d2831b77" target="_blank" shape="rect">cuLinkAddData</a>
                           in the CUDA Driver API to link the generated relocatable PTX code with
                           other relocatable code.
                           To generate relocatable PTX code, the compile option
                           <samp class="ph codeph"><a class="xref" href="index.html#group__options" shape="rect">--relocatable-device-code</a>=true</samp>
                           or
                           <samp class="ph codeph"><a class="xref" href="index.html#group__options" shape="rect">--device-c</a></samp>
                           is required.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="dynamic-parallelism"><a name="dynamic-parallelism" shape="rect">
                        <!-- --></a><h3 class="topictitle3">4.3.&nbsp;Dynamic Parallelism</h3>
                     <div class="body conbody">
                        <div class="p">
                           NVRTC supports dynamic parallelism under the following conditions:
                           
                           <ul class="ul">
                              <li class="li">Compilation target must be compute 35 or higher.</li>
                              <li class="li">Separate compilation must be enabled with the 
                                 <samp class="ph codeph"><a class="xref" href="index.html#group__options" shape="rect">--relocatable-device-code</a>=true</samp> or
                                 <samp class="ph codeph"><a class="xref" href="index.html#group__options" shape="rect">--device-c</a></samp>
                                 compile option.
                                 
                              </li>
                              <li class="li">
                                 Generated PTX must be linked against the CUDA device runtime
                                 (cudadevrt) library (see <a class="xref" href="index.html#separate-compilation" shape="rect">Separate Compilation</a>).
                                 
                              </li>
                           </ul>
                        </div>
                        <p class="p"><a class="xref" href="index.html#example-dynamic-parallelism" shape="rect">Example: Dynamic Parallelism</a> provides a simple example.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="integer-size"><a name="integer-size" shape="rect">
                        <!-- --></a><h3 class="topictitle3">4.4.&nbsp;Integer Size</h3>
                     <div class="body conbody">
                        <p class="p">
                           Different operating systems define integer type sizes differently.
                           Linux x86_64 and Mac OS X implement LP64, and Windows x86_64 implements
                           LLP64.
                           
                        </p>
                        <div class="tablenoborder"><a name="integer-size__table-integer-size" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="integer-size__table-integer-size" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 1. Integer sizes in bits for LLP64 and LP64</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row" valign="middle">
                                    <th class="entry" align="center" valign="middle" width="16.666666666666664%" id="d57e3135" rowspan="1" colspan="1">&nbsp;</th>
                                    <th class="entry" align="center" valign="middle" width="16.666666666666664%" id="d57e3137" rowspan="1" colspan="1"><samp class="ph codeph">short</samp></th>
                                    <th class="entry" align="center" valign="middle" width="16.666666666666664%" id="d57e3141" rowspan="1" colspan="1"><samp class="ph codeph">int</samp></th>
                                    <th class="entry" align="center" valign="middle" width="16.666666666666664%" id="d57e3145" rowspan="1" colspan="1"><samp class="ph codeph">long</samp></th>
                                    <th class="entry" align="center" valign="middle" width="16.666666666666664%" id="d57e3149" rowspan="1" colspan="1"><samp class="ph codeph">long</samp><samp class="ph codeph">long</samp></th>
                                    <th class="entry" align="center" valign="middle" width="16.666666666666664%" id="d57e3157" rowspan="1" colspan="1">pointers and <samp class="ph codeph">size_t</samp></th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row" valign="middle">
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3135" rowspan="1" colspan="1">LLP64</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3137" rowspan="1" colspan="1">16</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3141" rowspan="1" colspan="1">32</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3145" rowspan="1" colspan="1">32</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3149" rowspan="1" colspan="1">64</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3157" rowspan="1" colspan="1">64</td>
                                 </tr>
                                 <tr class="row" valign="middle">
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3135" rowspan="1" colspan="1">LP64</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3137" rowspan="1" colspan="1">16</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3141" rowspan="1" colspan="1">32</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3145" rowspan="1" colspan="1">64</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3149" rowspan="1" colspan="1">64</td>
                                    <td class="entry" align="center" valign="middle" width="16.666666666666664%" headers="d57e3157" rowspan="1" colspan="1">64</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <p class="p">
                           NVRTC implements LP64 on Linux and Mac OS X, and LLP64 on Windows.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="predefined-macros"><a name="predefined-macros" shape="rect">
                        <!-- --></a><h3 class="topictitle3">4.5.&nbsp;Predefined Macros</h3>
                     <div class="body conbody">
                        <div class="p">
                           <ul class="ul">
                              <li class="li"><samp class="ph codeph">__CUDACC_RTC__</samp>:
                                 useful for distinguishing between runtime and offline
                                 <samp class="ph codeph">nvcc</samp> compilation in user code.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">__CUDACC__</samp>:
                                 defined with same semantics as with offline <samp class="ph codeph">nvcc</samp>
                                 compilation.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">__CUDACC_RDC__</samp>:
                                 defined with same semantics as with offline <samp class="ph codeph">nvcc</samp>
                                 compilation.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">__CUDACC_DEBUG__</samp>:
                                 defined with same semantics as with offline <samp class="ph codeph">nvcc</samp>
                                 compilation.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">__CUDA_ARCH__</samp>:
                                 defined with same semantics as with offline <samp class="ph codeph">nvcc</samp>
                                 compilation.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">__CUDACC_VER_MAJOR__</samp>:
                                 defined with the major version number as returned by
                                 <a class="xref" href="index.html#group__compilation" shape="rect">nvrtcVersion</a>.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">__CUDACC_VER_MINOR__</samp>:
                                 defined with the minor version number as returned by
                                 <a class="xref" href="index.html#group__compilation" shape="rect">nvrtcVersion</a>.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">__CUDACC_VER_BUILD__</samp>:
                                 defined with the build version number.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">NULL</samp>: null pointer constant.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">va_start</samp></li>
                              <li class="li"><samp class="ph codeph">va_end</samp></li>
                              <li class="li"><samp class="ph codeph">va_arg</samp></li>
                              <li class="li"><samp class="ph codeph">va_copy</samp> : defined when language dialect C++11 or later is selected.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph">__cplusplus</samp></li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="predefined-types"><a name="predefined-types" shape="rect">
                        <!-- --></a><h3 class="topictitle3">4.6.&nbsp;Predefined Types</h3>
                     <div class="body conbody">
                        <div class="p">
                           <ul class="ul">
                              <li class="li"><samp class="ph codeph">clock_t</samp></li>
                              <li class="li"><samp class="ph codeph">size_t</samp></li>
                              <li class="li"><samp class="ph codeph">ptrdiff_t</samp></li>
                              <li class="li"><samp class="ph codeph">va_list</samp>: Note that the definition of this type may
                                 be different than the one selected by nvcc when compiling CUDA code.
                              </li>
                              <li class="li">
                                 Predefined types such as <samp class="ph codeph">dim3</samp>,
                                 <samp class="ph codeph">char4</samp>, etc., that are available in the CUDA
                                 Runtime headers when compiling offline with <samp class="ph codeph">nvcc</samp>
                                 are also available, unless otherwise noted.
                                 
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="builtin-functions"><a name="builtin-functions" shape="rect">
                        <!-- --></a><h3 class="topictitle3">4.7.&nbsp;Builtin Functions</h3>
                     <div class="body conbody">
                        <p class="p">
                           Builtin functions provided by the CUDA Runtime headers when compiling
                           offline with <samp class="ph codeph">nvcc</samp> are available, unless otherwise
                           noted.
                           
                        </p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="basic-usage"><a name="basic-usage" shape="rect">
                     <!-- --></a><h2 class="topictitle2">5.&nbsp;Basic Usage</h2>
                  <div class="body conbody">
                     <p class="p">
                        This section of the document uses a simple example,
                        <em class="ph i">Single-Precision α⋅X Plus Y</em> (SAXPY), shown in <a class="xref" href="index.html#basic-usage__cuda-source-string-for-saxpy" shape="rect">Figure 1</a> to
                        explain what is involved in runtime compilation with NVRTC.
                        For brevity and readability, error checks on the API return values are not shown.
                        The complete code listing is available in
                        <a class="xref" href="index.html#example-saxpy" shape="rect">Example: SAXPY</a>.
                        
                     </p>
                     <div class="fig fignone" id="basic-usage__cuda-source-string-for-saxpy"><a name="basic-usage__cuda-source-string-for-saxpy" shape="rect">
                           <!-- --></a><span class="figcap">Figure 1. CUDA source string for SAXPY</span><pre xml:space="preserve">const char *saxpy = "                                           \n\
extern \"C\" __global__                                         \n\
void saxpy(float a, float *x, float *y, float *out, size_t n)   \n\
{                                                               \n\
  size_t tid = blockIdx.x * blockDim.x + threadIdx.x;           \n\
  if (tid &lt; n) {                                                \n\
    out[tid] = a * x[tid] + y[tid];                             \n\
  }                                                             \n\
}                                                               \n";</pre></div>
                     <p class="p">
                        First, an instance of
                        <a class="xref" href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program." shape="rect">nvrtcProgram</a>
                        needs to be created.
                        <a class="xref" href="index.html#basic-usage__nvrtcprogram-creation-for-saxpy" shape="rect">Figure 2</a> shows
                        creation of
                        <a class="xref" href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program." shape="rect">nvrtcProgram</a>
                        for SAXPY.
                        As SAXPY does not require any header, <samp class="ph codeph">0</samp> is passed as
                        <samp class="ph codeph"><a class="xref" href="index.html#group__compilation" shape="rect">numHeaders</a></samp>,
                        and <samp class="ph codeph">NULL</samp> as
                        <samp class="ph codeph"><a class="xref" href="index.html#group__compilation" shape="rect">headers</a></samp>
                        and
                        <samp class="ph codeph"><a class="xref" href="index.html#group__compilation" shape="rect">includeNames</a></samp>.
                        
                     </p>
                     <div class="fig fignone" id="basic-usage__nvrtcprogram-creation-for-saxpy"><a name="basic-usage__nvrtcprogram-creation-for-saxpy" shape="rect">
                           <!-- --></a><span class="figcap">Figure 2. nvrtcProgram creation for SAXPY</span><pre xml:space="preserve">nvrtcProgram prog;
nvrtcCreateProgram(&amp;prog,         // prog
                   saxpy,         // buffer
                   "saxpy.cu",    // name
                   0,             // numHeaders
                   NULL,          // headers
                   NULL);         // includeNames</pre></div>
                     <p class="p">
                        If SAXPY had any <samp class="ph codeph">#include</samp> directives, the contents of
                        the files that are <samp class="ph codeph">#include</samp>'d can be passed as elements
                        of headers, and their names as elements of
                        <samp class="ph codeph"><a class="xref" href="index.html#group__compilation" shape="rect">includeNames</a></samp>.
                        For example, <samp class="ph codeph">#include &lt;foo.h&gt;</samp> and
                        <samp class="ph codeph">#include &lt;bar.h&gt;</samp> would require
                        <samp class="ph codeph">2</samp> as
                        <samp class="ph codeph"><a class="xref" href="index.html#group__compilation" shape="rect">numHeaders</a></samp>,
                        <samp class="ph codeph">{ "&lt;contents of foo.h&gt;", "&lt;contents of bar.h&gt;" }</samp>
                        as <samp class="ph codeph">headers</samp>, and <samp class="ph codeph">{ "foo.h", "bar.h" }</samp> as
                        <samp class="ph codeph"><a class="xref" href="index.html#group__compilation" shape="rect">includeNames</a></samp>
                        (<samp class="ph codeph">&lt;contents of foo.h&gt;</samp> and
                        <samp class="ph codeph">&lt;contents of bar.h&gt;</samp> must be replaced by the
                        actual contents of <samp class="ph codeph">foo.h</samp> and <samp class="ph codeph">bar.h</samp>).
                        Alternatively, the compile option <samp class="ph codeph">-I</samp> can be used if the
                        header is guaranteed to exist in the file system at runtime.
                        
                     </p>
                     <p class="p">
                        Once the instance of
                        <a class="xref" href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program." shape="rect">nvrtcProgram</a>
                        for compilation is created, it can be compiled by
                        <a class="xref" href="index.html#group__compilation" shape="rect">nvrtcCompileProgram</a>
                        as shown in
                        <a class="xref" href="index.html#basic-usage__compilation-of-saxpy-for-compute_30-with-fmad-enabled" shape="rect">Figure 3</a>.
                        Two compile options are used in this example,
                        <samp class="ph codeph"><a class="xref" href="index.html#group__options" shape="rect">--gpu-architecture=compute_30</a></samp> and
                        <samp class="ph codeph"><a class="xref" href="index.html#group__options" shape="rect">--fmad=false</a></samp>, to generate code for the
                        <samp class="ph codeph">compute_30</samp> architecture and to disable the contraction
                        of floating-point multiplies and adds/subtracts into floating-point
                        multiply-add operations.
                        Other combinations of compile options can be used as needed and
                        <a class="xref" href="index.html#group__options" shape="rect">Supported Compile Options</a> lists valid compile options.
                        
                     </p>
                     <div class="fig fignone" id="basic-usage__compilation-of-saxpy-for-compute_30-with-fmad-enabled"><a name="basic-usage__compilation-of-saxpy-for-compute_30-with-fmad-enabled" shape="rect">
                           <!-- --></a><span class="figcap">Figure 3. Compilation of SAXPY for compute_30 with FMAD enabled</span><pre xml:space="preserve">const char *opts[] = {"--gpu-architecture=compute_30",
                      "--fmad=false"};
nvrtcCompileProgram(prog,     // prog
                    2,        // numOptions
                    opts);    // options</pre></div>
                     <p class="p">
                        After the compilation completes, users can obtain the program
                        compilation log and the generated PTX as
                        <a class="xref" href="index.html#basic-usage__obtaining-generated-ptx-and-program-compilation-log" shape="rect">Figure 4</a>
                        shows.
                        NVRTC does not generate valid PTX when the compilation fails, and it may
                        generate program compilation log even when the compilation succeeds if
                        needed.
                        
                     </p>
                     <p class="p">
                        A
                        <a class="xref" href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program." shape="rect">nvrtcProgram</a>
                        can be compiled by
                        <a class="xref" href="index.html#group__compilation" shape="rect">nvrtcCompileProgram</a>
                        multiple times with different compile options, and users can only
                        retrieve the PTX and the log generated by the last compilation.
                        
                     </p>
                     <div class="fig fignone" id="basic-usage__obtaining-generated-ptx-and-program-compilation-log"><a name="basic-usage__obtaining-generated-ptx-and-program-compilation-log" shape="rect">
                           <!-- --></a><span class="figcap">Figure 4. Obtaining generated PTX and program compilation log</span><pre xml:space="preserve">// Obtain compilation log from the program.
size_t logSize;
nvrtcGetProgramLogSize(prog, &amp;logSize);
char *log = new char[logSize];
nvrtcGetProgramLog(prog, log);
// Obtain PTX from the program.
size_t ptxSize;
nvrtcGetPTXSize(prog, &amp;ptxSize);
char *ptx = new char[ptxSize];
nvrtcGetPTX(prog, ptx);</pre></div>
                     <p class="p">
                        When the instance of
                        <a class="xref" href="index.html#group__compilation_1ga64ec636c8e3e1e8ea10028866c4594b" title="nvrtcProgram is the unit of compilation, and an opaque handle for a program." shape="rect">nvrtcProgram</a>
                        is no longer needed,
                        it can be destroyed by
                        <a class="xref" href="index.html#group__compilation_1gaa237c59615b7d4f48d5b308b5c9b140" title="nvrtcDestroyProgram destroys the given program." shape="rect">nvrtcDestroyProgram</a>
                        as shown in
                        <a class="xref" href="index.html#basic-usage__destruction-of-nvrtcprogram" shape="rect">Figure 5</a>.
                        
                     </p>
                     <div class="fig fignone" id="basic-usage__destruction-of-nvrtcprogram"><a name="basic-usage__destruction-of-nvrtcprogram" shape="rect">
                           <!-- --></a><span class="figcap">Figure 5. Destruction of nvrtcProgram</span><pre xml:space="preserve">nvrtcDestroyProgram(&amp;prog);</pre></div>
                     <p class="p">
                        The generated PTX can be further manipulated by the CUDA Driver API for
                        execution or linking.
                        <a class="xref" href="index.html#basic-usage__execution-of-saxpy-using-the-ptx-generated-by-nvrtc" shape="rect">Figure 6</a>
                        shows an example code sequence for execution of the generated PTX.
                        
                     </p>
                     <div class="fig fignone" id="basic-usage__execution-of-saxpy-using-the-ptx-generated-by-nvrtc"><a name="basic-usage__execution-of-saxpy-using-the-ptx-generated-by-nvrtc" shape="rect">
                           <!-- --></a><span class="figcap">Figure 6. Execution of SAXPY using the PTX generated by NVRTC</span><pre xml:space="preserve">CUdevice cuDevice;
CUcontext context;
CUmodule module;
CUfunction kernel;
cuInit(0);
cuDeviceGet(&amp;cuDevice, 0);
cuCtxCreate(&amp;context, 0, cuDevice);
cuModuleLoadDataEx(&amp;module, ptx, 0, 0, 0);
cuModuleGetFunction(&amp;kernel, module, "saxpy");
size_t n = size_t n = NUM_THREADS * NUM_BLOCKS;
size_t bufferSize = n * sizeof(float);
float a = ...;
float *hX = ..., *hY = ..., *hOut = ...;
CUdeviceptr dX, dY, dOut;
cuMemAlloc(&amp;dX, bufferSize);
cuMemAlloc(&amp;dY, bufferSize);
cuMemAlloc(&amp;dOut, bufferSize);
cuMemcpyHtoD(dX, hX, bufferSize);
cuMemcpyHtoD(dY, hY, bufferSize);
void *args[] = { &amp;a, &amp;dX, &amp;dY, &amp;dOut, &amp;n };
cuLaunchKernel(kernel,
               NUM_THREADS, 1, 1,   // grid dim
               NUM_BLOCKS, 1, 1,    // block dim
               0, NULL,             // shared mem and stream
               args,                // arguments
               0);
cuCtxSynchronize();
cuMemcpyDtoH(hOut, dOut, bufferSize);</pre></div>
                  </div>
               </div>
               <div class="topic concept nested0" id="accessing-lowered-names"><a name="accessing-lowered-names" shape="rect">
                     <!-- --></a><h2 class="topictitle2">6.&nbsp;Accessing Lowered Names</h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="accessing-lowered-names-intro"><a name="accessing-lowered-names-intro" shape="rect">
                        <!-- --></a><h3 class="topictitle3">6.1.&nbsp;Introduction</h3>
                     <div class="body conbody">
                        <p class="p">NVRTC will mangle <samp class="ph codeph">__global__</samp> function names and names of <samp class="ph codeph">__device__</samp>
                           and <samp class="ph codeph">__constant__</samp> variables as specified by the IA64 ABI. If the generated PTX is
                           being loaded using the CUDA Driver API, the kernel function or 
                           <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp> variable must be looked up by name, but this is
                           hard to do when the name has been mangled. To address this problem, NVRTC provides API functions
                           that map source level <samp class="ph codeph">__global__</samp> function or <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp>
                           variable names to the mangled names present in the generated PTX. 
                        </p>
                        <p class="p">
                           The two API functions <samp class="ph codeph">nvrtcAddNameExpression</samp> and <samp class="ph codeph">nvrtcGetLoweredName</samp> work together to provide this
                           functionality. First, a 'name expression' string denoting the address for the <samp class="ph codeph">__global__</samp> function
                           or <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp> variable is provided to <samp class="ph codeph">nvrtcAddNameExpression</samp>. 
                           Then, the program is compiled with
                           <samp class="ph codeph">nvrtcCompileProgram</samp>. During compilation, NVRTC will parse the name expression string as a C++ constant expression at the end of the user program.
                           The constant expression
                           must provide the address of the <samp class="ph codeph">__global__</samp> function or <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp> variable. Finally, the 
                           function <samp class="ph codeph">nvrtcGetLoweredName</samp> is called with the original name expression and it returns a pointer to the
                           lowered name. The lowered name can be used to refer to the kernel or variable in the CUDA Driver API.
                           
                        </p>
                        <p class="p">
                           NVRTC guarantees that any <samp class="ph codeph">__global__</samp> function or <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp> variable referenced in a call to <samp class="ph codeph">nvrtcAddNameExpression</samp>
                           will be present in the generated PTX (if the definition is available in the input source code). 
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="accessing-lowered-names-example"><a name="accessing-lowered-names-example" shape="rect">
                        <!-- --></a><h3 class="topictitle3">6.2.&nbsp;Example</h3>
                     <div class="body conbody">
                        <p class="p"><a class="xref" href="index.html#example-lowered-name" shape="rect">Example: Using Lowered Name</a> lists a complete runnable example. Some relevant snippets:
                        </p>
                        <ol class="ol">
                           <li class="li">
                              <div class="p">The GPU source code ('gpu_program') contains definitions of various <samp class="ph codeph">__global__</samp> functions/function templates and <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp> variables:
                                 <pre xml:space="preserve">const char *gpu_program = "                                     \n\
__device__ int V1; // set from host code                        \n\
static __global__ void f1(int *result) { *result = V1 + 10; }   \n\
namespace N1 {                                                  \n\
  namespace N2 {                                                \n\
     __constant__ int V2; // set from host code                 \n\
    __global__ void f2(int *result) { *result = V2 + 20; }      \n\
  }                                                             \n\
}                                                               \n\
template&lt;typename T&gt;                                            \n\
__global__ void f3(int *result) { *result = sizeof(T); }        \n\
</pre></div>
                           </li>
                           <li class="li">
                              <div class="p"> The host source code invokes <samp class="ph codeph">nvrtcAddNameExpression</samp> with various name expressions referring to the address of <samp class="ph codeph">__global__</samp>
                                 functions and <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp> variables:
                                 <pre xml:space="preserve">kernel_name_vec.push_back("&amp;f1");
..
kernel_name_vec.push_back("N1::N2::f2");
..
kernel_name_vec.push_back("f3&lt;int&gt;");
..
kernel_name_vec.push_back("f3&lt;double&gt;");

// add name expressions to NVRTC. Note this must be done before
// the program is compiled.
for (size_t i = 0; i &lt; name_vec.size(); ++i)
NVRTC_SAFE_CALL(nvrtcAddNameExpression(prog, kernel_name_vec[i].c_str()));
..
// add expressions for  __device__ / __constant__ variables to NVRTC
variable_name_vec.push_back("&amp;V1");
..
variable_name_vec.push_back("&amp;N1::N2::V2");
..  
for (size_t i = 0; i &lt; variable_name_vec.size(); ++i)
  NVRTC_SAFE_CALL(nvrtcAddNameExpression(prog, 
  variable_name_vec[i].c_str()));
</pre></div>
                           </li>
                           <li class="li">
                              <div class="p"> The GPU program is then compiled with <samp class="ph codeph">nvrtcCompileProgram</samp>. The generated PTX is loaded on the GPU.
                                 The mangled names of the <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp> variables and <samp class="ph codeph">__global__</samp> functions are looked up:
                                 <pre xml:space="preserve">// note: this call must be made after NVRTC program has been 
// compiled and before it has been destroyed.
NVRTC_SAFE_CALL(nvrtcGetLoweredName(
prog, 
variable_name_vec[i].c_str(), // name expression
&amp;name                         // lowered name
));
..
NVRTC_SAFE_CALL(nvrtcGetLoweredName(
prog,
kernel_name_vec[i].c_str(), // name expression
&amp;name // lowered name
));

</pre></div>
                           </li>
                           <li class="li">
                              <div class="p"> The mangled name of the <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp> variable is then
                                 used to lookup the variable in the module and update its value using the CUDA Driver API:
                                 <pre xml:space="preserve">CUdeviceptr variable_addr;
CUDA_SAFE_CALL(cuModuleGetGlobal(&amp;variable_addr, NULL, module, name));
CUDA_SAFE_CALL(cuMemcpyHtoD(variable_addr, 
&amp;initial_value, sizeof(initial_value)));
  </pre></div>
                           </li>
                           <li class="li">
                              <div class="p"> The mangled name of the kernel is then used to launch it using the CUDA Driver API:
                                 <pre xml:space="preserve">CUfunction kernel;
CUDA_SAFE_CALL(cuModuleGetFunction(&amp;kernel, module, name));
...
CUDA_SAFE_CALL(
cuLaunchKernel(kernel,
1, 1, 1, // grid dim
1, 1, 1, // block dim
0, NULL, // shared mem and stream
args, 0));    
  </pre></div>
                           </li>
                        </ol>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="accessing-lowered-names-notes"><a name="accessing-lowered-names-notes" shape="rect">
                        <!-- --></a><h3 class="topictitle3">6.3.&nbsp;Notes</h3>
                     <div class="body conbody">
                        <ol class="ol">
                           <li class="li">Sequence of calls:
                              All name expressions must be added using <samp class="ph codeph">nvrtcAddNameExpression</samp> before the NVRTC program is compiled
                              with <samp class="ph codeph">nvrtcCompileProgram</samp>. This is required because the name expressions are parsed at the end of the
                              user program, and may trigger template instantiations. The lowered names must be looked up by calling
                              <samp class="ph codeph">nvrtcGetLoweredName</samp> only after the NVRTC program has been compiled, and before it has been destroyed.
                              The pointer returned by <samp class="ph codeph">nvrtcGetLoweredName</samp> points to memory owned by NVRTC, and this memory is freed
                              when the NVRTC program has been destroyed (<samp class="ph codeph">nvrtcDestroyProgram</samp>). Thus the correct sequence of 
                              calls is : <samp class="ph codeph">nvrtcAddNameExpression</samp>, <samp class="ph codeph">nvrtcCompileProgram</samp>, <samp class="ph codeph">nvrtcGetLoweredName</samp>, <samp class="ph codeph">nvrtcDestroyProgram</samp>.
                              
                           </li>
                           <li class="li">Identical Name Expressions:
                              The name expression string passed to <samp class="ph codeph">nvrtcAddNameExpression</samp> and <samp class="ph codeph">nvrtcGetLoweredName</samp> must have identical
                              characters. For example, "foo" and "foo " are not identical strings, even though semantically they
                              refer to the same entity (foo), because the second string has a extra whitespace character.
                              
                           </li>
                           <li class="li">Constant Expressions:
                              The characters in the name expression string are parsed as a C++ constant expression
                              at the end of the user program. Any
                              errors during parsing will cause compilation failure and compiler diagnostics will be generated in 
                              the compilation log. The constant expression must refer to the address of a <samp class="ph codeph">__global__</samp> function or <samp class="ph codeph">__device__</samp>/<samp class="ph codeph">__constant__</samp> variable.
                              
                           </li>
                           <li class="li">Address of overloaded function:
                              If the NVRTC source code has multiple overloaded <samp class="ph codeph">__global__</samp> functions, then the name expression must use
                              a cast operation to disambiguate. However, casts are not allowed in constant expression for C++ dialects
                              before C++11. If using such name expressions, please compile the code in C++11 or later dialect using the
                              '-std' command line flag.
                              Example: Consider that the GPU code string contains:
                              <pre xml:space="preserve">
__global__ void foo(int) { }
__global__ void foo(char) { }
</pre>
                              
                              The name expression <samp class="ph codeph">'(void(*)(int))foo'</samp> correctly disambiguates <samp class="ph codeph">'foo(int)'</samp>, but the program must be compiled
                              in C++11 or later dialect (e.g. <samp class="ph codeph">'-std=c++11'</samp>) because casts are not allowed in pre-C++11 constant expressions.
                              
                           </li>
                        </ol>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="host-template-interface"><a name="host-template-interface" shape="rect">
                     <!-- --></a><h2 class="topictitle2">7.&nbsp;Interfacing With Template Host Code</h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="host-template-interface-intro"><a name="host-template-interface-intro" shape="rect">
                        <!-- --></a><h3 class="topictitle3">7.1.&nbsp;Introduction</h3>
                     <div class="body conbody">
                        <p class="p">In some scenarios, it is useful to instantiate <samp class="ph codeph">__global__</samp> function templates in device code based on 
                           template arguments in host code. The NVRTC helper function <samp class="ph codeph">nvrtcGetTypeName</samp> 
                           can be used to extract the source level name of a type in host code, and this string can be used to 
                           instantiate a <samp class="ph codeph">__global__</samp> function template and get the mangled name of the instantiation using the
                           <samp class="ph codeph">nvrtcAddNameExpression</samp> and <samp class="ph codeph">nvrtcGetLoweredName</samp> functions.
                           
                        </p>
                        <p class="p"><samp class="ph codeph">nvrtcGetTypeName</samp> is defined inline in the NVRTC header file, and is available when
                           the macro <samp class="ph codeph">NVRTC_GET_TYPE_NAME</samp> is defined with a non-zero value. 
                           It uses the <samp class="ph codeph">abi::__cxa_demangle</samp> and <samp class="ph codeph">UnDecorateSymbolName</samp> host code functions 
                           when using <samp class="ph codeph">gcc/clang</samp>
                           and <samp class="ph codeph">cl.exe</samp> compilers, respectively. Users may need to specify additional header paths and libraries
                           to find the host functions used (<samp class="ph codeph">abi::__cxa_demangle</samp> / <samp class="ph codeph">UnDecorateSymbolName</samp>).
                           See the build instructions for the example below for reference (<a class="xref" href="index.html#host-type-name-build-instruction" shape="rect">Build Instruction</a>).
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="host-template-interface-example"><a name="host-template-interface-example" shape="rect">
                        <!-- --></a><h3 class="topictitle3">7.2.&nbsp;Example</h3>
                     <div class="body conbody">
                        <p class="p"><a class="xref" href="index.html#example-host-type-name" shape="rect">Example: Using nvrtcGetTypeName</a> lists a complete runnable example. Some relevant snippets:
                        </p>
                        <ol class="ol">
                           <li class="li">
                              <div class="p"> The GPU source code ('gpu_program') contains definitions of a <samp class="ph codeph">__global__</samp>
                                 function template:
                                 <pre xml:space="preserve">const char *gpu_program = " \n\
namespace N1 { struct S1_t { int i; double d; }; } \n\
template&lt;typename T&gt; \n\
__global__ void f3(int *result) { *result = sizeof(T); } \n\
\n";


</pre></div>
                           </li>
                           <li class="li">
                              <div class="p"> The host code function <samp class="ph codeph">getKernelNameForType</samp> creates the
                                 name expression for a <samp class="ph codeph">__global__</samp> function template instantiation based on the
                                 host template type <samp class="ph codeph">T</samp>. The name of the type <samp class="ph codeph">T</samp> is extracted
                                 using <samp class="ph codeph">nvrtcGetTypeName</samp>:
                                 
                                 <pre xml:space="preserve">template &lt;typename T&gt;
std::string getKernelNameForType(void)
{
// Look up the source level name string for the type "T" using
// nvrtcGetTypeName() and use it to create the kernel name
std::string type_name;
NVRTC_SAFE_CALL(nvrtcGetTypeName&lt;T&gt;(&amp;type_name));
return std::string("f3&lt;") + type_name + "&gt;";
}


</pre></div>
                           </li>
                           <li class="li">
                              <div class="p"> The name expressions are presented to NVRTC using the <samp class="ph codeph">nvrtcAddNameExpression</samp> function:
                                 
                                 <pre xml:space="preserve">name_vec.push_back(getKernelNameForType&lt;int&gt;());
..
name_vec.push_back(getKernelNameForType&lt;double&gt;());
..
name_vec.push_back(getKernelNameForType&lt;N1::S1_t&gt;());
..
for (size_t i = 0; i &lt; name_vec.size(); ++i)
NVRTC_SAFE_CALL(nvrtcAddNameExpression(prog, name_vec[i].c_str()));


</pre></div>
                           </li>
                           <li class="li">
                              <div class="p"> The GPU program is then compiled with <samp class="ph codeph">nvrtcCompileProgram</samp>. The generated PTX is loaded on the GPU.
                                 The mangled names of the <samp class="ph codeph">__global__</samp> function  template instantiations are looked up:
                                 <pre xml:space="preserve">// note: this call must be made after NVRTC program has been
// compiled and before it has been destroyed.
NVRTC_SAFE_CALL(nvrtcGetLoweredName(
prog,
name_vec[i].c_str(), // name expression
&amp;name // lowered name
));

</pre></div>
                           </li>
                           <li class="li">
                              <div class="p"> The mangled name is then used to launch the kernel using the CUDA Driver API:
                                 <pre xml:space="preserve">CUfunction kernel;
CUDA_SAFE_CALL(cuModuleGetFunction(&amp;kernel, module, name));
...
CUDA_SAFE_CALL(
cuLaunchKernel(kernel,
1, 1, 1, // grid dim
1, 1, 1, // block dim
0, NULL, // shared mem and stream
args, 0));    
  </pre></div>
                           </li>
                        </ol>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="example-saxpy"><a name="example-saxpy" shape="rect">
                     <!-- --></a><h2 class="topictitle2">A.&nbsp;Example: SAXPY</h2>
                  <div class="topic concept nested1" id="code-saxpy-cpp"><a name="code-saxpy-cpp" shape="rect">
                        <!-- --></a><h3 class="topictitle3">A.1.&nbsp;Code (saxpy.cpp)</h3>
                     <div class="body conbody"><pre xml:space="preserve">#include &lt;nvrtc.h&gt;
#include &lt;cuda.h&gt;
#include &lt;iostream&gt;

#define NUM_THREADS 128
#define NUM_BLOCKS 32
#define NVRTC_SAFE_CALL(x)                                        \
  do {                                                            \
    nvrtcResult result = x;                                       \
    if (result != NVRTC_SUCCESS) {                                \
      std::cerr &lt;&lt; "\nerror: " #x " failed with error "           \
                &lt;&lt; nvrtcGetErrorString(result) &lt;&lt; '\n';           \
      exit(1);                                                    \
    }                                                             \
  } while(0)
#define CUDA_SAFE_CALL(x)                                         \
  do {                                                            \
    CUresult result = x;                                          \
    if (result != CUDA_SUCCESS) {                                 \
      const char *msg;                                            \
      cuGetErrorName(result, &amp;msg);                               \
      std::cerr &lt;&lt; "\nerror: " #x " failed with error "           \
                &lt;&lt; msg &lt;&lt; '\n';                                   \
      exit(1);                                                    \
    }                                                             \
  } while(0)

const char *saxpy = "                                           \n\
extern \"C\" __global__                                         \n\
void saxpy(float a, float *x, float *y, float *out, size_t n)   \n\
{                                                               \n\
  size_t tid = blockIdx.x * blockDim.x + threadIdx.x;           \n\
  if (tid &lt; n) {                                                \n\
    out[tid] = a * x[tid] + y[tid];                             \n\
  }                                                             \n\
}                                                               \n";

int main()
{
  // Create an instance of nvrtcProgram with the SAXPY code string.
  nvrtcProgram prog;</pre><p class="p"></p><pre xml:space="preserve">  NVRTC_SAFE_CALL(
    nvrtcCreateProgram(&amp;prog,         // prog
                       saxpy,         // buffer
                       "saxpy.cu",    // name
                       0,             // numHeaders
                       NULL,          // headers
                       NULL));        // includeNames
  // Compile the program for compute_30 with fmad disabled.
  const char *opts[] = {"--gpu-architecture=compute_30",
                        "--fmad=false"};
  nvrtcResult compileResult = nvrtcCompileProgram(prog,  // prog
                                                  2,     // numOptions
                                                  opts); // options
  // Obtain compilation log from the program.
  size_t logSize;
  NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &amp;logSize));
  char *log = new char[logSize];
  NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
  std::cout &lt;&lt; log &lt;&lt; '\n';
  delete[] log;
  if (compileResult != NVRTC_SUCCESS) {
    exit(1);
  }
  // Obtain PTX from the program.
  size_t ptxSize;
  NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &amp;ptxSize));
  char *ptx = new char[ptxSize];
  NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx));
  // Destroy the program.
  NVRTC_SAFE_CALL(nvrtcDestroyProgram(&amp;prog));
  // Load the generated PTX and get a handle to the SAXPY kernel.
  CUdevice cuDevice;
  CUcontext context;
  CUmodule module;
  CUfunction kernel;
  CUDA_SAFE_CALL(cuInit(0));
  CUDA_SAFE_CALL(cuDeviceGet(&amp;cuDevice, 0));
  CUDA_SAFE_CALL(cuCtxCreate(&amp;context, 0, cuDevice));
  CUDA_SAFE_CALL(cuModuleLoadDataEx(&amp;module, ptx, 0, 0, 0));
  CUDA_SAFE_CALL(cuModuleGetFunction(&amp;kernel, module, "saxpy"));
  // Generate input for execution, and create output buffers.
  size_t n = NUM_THREADS * NUM_BLOCKS;
  size_t bufferSize = n * sizeof(float);
  float a = 5.1f;
  float *hX = new float[n], *hY = new float[n], *hOut = new float[n];
  for (size_t i = 0; i &lt; n; ++i) {
    hX[i] = static_cast&lt;float&gt;(i);
    hY[i] = static_cast&lt;float&gt;(i * 2);
  }
  CUdeviceptr dX, dY, dOut;
  CUDA_SAFE_CALL(cuMemAlloc(&amp;dX, bufferSize));
  CUDA_SAFE_CALL(cuMemAlloc(&amp;dY, bufferSize));
  CUDA_SAFE_CALL(cuMemAlloc(&amp;dOut, bufferSize));
  CUDA_SAFE_CALL(cuMemcpyHtoD(dX, hX, bufferSize));
  CUDA_SAFE_CALL(cuMemcpyHtoD(dY, hY, bufferSize));
  // Execute SAXPY.
  void *args[] = { &amp;a, &amp;dX, &amp;dY, &amp;dOut, &amp;n };
  CUDA_SAFE_CALL(
    cuLaunchKernel(kernel,
                   NUM_BLOCKS, 1, 1,    // grid dim
                   NUM_THREADS, 1, 1,   // block dim
                   0, NULL,             // shared mem and stream
                   args, 0));           // arguments
  CUDA_SAFE_CALL(cuCtxSynchronize());
  // Retrieve and print output.
  CUDA_SAFE_CALL(cuMemcpyDtoH(hOut, dOut, bufferSize));</pre><p class="p"></p><pre xml:space="preserve">  for (size_t i = 0; i &lt; n; ++i) {
    std::cout &lt;&lt; a &lt;&lt; " * " &lt;&lt; hX[i] &lt;&lt; " + " &lt;&lt; hY[i]
              &lt;&lt; " = " &lt;&lt; hOut[i] &lt;&lt; '\n';
  }
  // Release resources.
  CUDA_SAFE_CALL(cuMemFree(dX));
  CUDA_SAFE_CALL(cuMemFree(dY));
  CUDA_SAFE_CALL(cuMemFree(dOut));
  CUDA_SAFE_CALL(cuModuleUnload(module));
  CUDA_SAFE_CALL(cuCtxDestroy(context));
  delete[] hX;
  delete[] hY;
  delete[] hOut;
  return 0;
}</pre></div>
                  </div>
                  <div class="topic concept nested1" id="build-instruction"><a name="build-instruction" shape="rect">
                        <!-- --></a><h3 class="topictitle3">A.2.&nbsp;Build Instruction</h3>
                     <div class="body conbody">
                        <div class="p">
                           Assuming the environment variable <samp class="ph codeph">CUDA_PATH</samp> points to
                           CUDA Toolkit installation directory, build this example as:
                           
                           <ul class="ul">
                              <li class="li">
                                 Windows:
                                 <pre class="pre screen" xml:space="preserve">cl.exe saxpy.cpp /Fesaxpy ^
  /I "%CUDA_PATH%"\include ^
  "%CUDA_PATH%"\lib\x64\nvrtc.lib "%CUDA_PATH%"\lib\x64\cuda.lib</pre></li>
                              <li class="li">
                                 Linux:
                                 <pre class="pre screen" xml:space="preserve">g++ saxpy.cpp -o saxpy \
  -I $CUDA_PATH/include \
  -L $CUDA_PATH/lib64 \
  -lnvrtc -lcuda \
  -Wl,-rpath,$CUDA_PATH/lib64</pre></li>
                              <li class="li">
                                 Mac OS X:
                                 <pre class="pre screen" xml:space="preserve">clang++ saxpy.cpp -o saxpy \
  -I $CUDA_PATH/include \
  -L $CUDA_PATH/lib \
  -lnvrtc -framework CUDA \
  -Wl,-rpath,$CUDA_PATH/lib</pre></li>
                           </ul>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="example-lowered-name"><a name="example-lowered-name" shape="rect">
                     <!-- --></a><h2 class="topictitle2">B.&nbsp;Example: Using Lowered Name</h2>
                  <div class="topic concept nested1" id="code-lowered-name-cpp"><a name="code-lowered-name-cpp" shape="rect">
                        <!-- --></a><h3 class="topictitle3">B.1.&nbsp;Code (lowered-name.cpp)</h3>
                     <div class="body conbody"><pre xml:space="preserve">
#include &lt;nvrtc.h&gt;
#include &lt;cuda.h&gt;
#include &lt;iostream&gt;
#include &lt;vector&gt;
#include &lt;string&gt;

#define NVRTC_SAFE_CALL(x)                                        \
  do {                                                            \
    nvrtcResult result = x;                                       \
    if (result != NVRTC_SUCCESS) {                                \
      std::cerr &lt;&lt; "\nerror: " #x " failed with error "           \
                &lt;&lt; nvrtcGetErrorString(result) &lt;&lt; '\n';           \
      exit(1);                                                    \
    }                                                             \
  } while(0)
#define CUDA_SAFE_CALL(x)                                         \
  do {                                                            \
    CUresult result = x;                                          \
    if (result != CUDA_SUCCESS) {                                 \
      const char *msg;                                            \
      cuGetErrorName(result, &amp;msg);                               \
      std::cerr &lt;&lt; "\nerror: " #x " failed with error "           \
                &lt;&lt; msg &lt;&lt; '\n';                                   \
      exit(1);                                                    \
    }                                                             \
  } while(0)

const char *gpu_program = "                                     \n\
__device__ int V1; // set from host code                        \n\
static __global__ void f1(int *result) { *result = V1 + 10; }   \n\
namespace N1 {                                                  \n\
  namespace N2 {                                                \n\
     __constant__ int V2; // set from host code                 \n\
    __global__ void f2(int *result) { *result = V2 + 20; }      \n\
  }                                                             \n\
}                                                               \n\
template&lt;typename T&gt;                                            \n\
__global__ void f3(int *result) { *result = sizeof(T); }        \n\
                                                                \n";

</pre><p class="p"></p><pre xml:space="preserve">
int main()
{
  // Create an instance of nvrtcProgram
  nvrtcProgram prog;
  NVRTC_SAFE_CALL(nvrtcCreateProgram(&amp;prog,         // prog
                                     gpu_program,   // buffer
                                     "prog.cu",     // name
                                     0,             // numHeaders
                                     NULL,          // headers
                                     NULL));        // includeNames

  // add all name expressions for kernels
  std::vector&lt;std::string&gt; kernel_name_vec;
  std::vector&lt;std::string&gt; variable_name_vec;
  std::vector&lt;int&gt; variable_initial_value;
  
  std::vector&lt;int&gt; expected_result;
  
  // note the name expressions are parsed as constant expressions
  kernel_name_vec.push_back("&amp;f1");
  expected_result.push_back(10 + 100);
  
  kernel_name_vec.push_back("N1::N2::f2");
  expected_result.push_back(20 + 200);
    
  kernel_name_vec.push_back("f3&lt;int&gt;");
  expected_result.push_back(sizeof(int));
  
  kernel_name_vec.push_back("f3&lt;double&gt;");
  expected_result.push_back(sizeof(double));
  
  // add kernel name expressions to NVRTC. Note this must be done before
  // the program is compiled.
  for (size_t i = 0; i &lt; kernel_name_vec.size(); ++i)
    NVRTC_SAFE_CALL(nvrtcAddNameExpression(prog, kernel_name_vec[i].c_str()));
  
  // add expressions for  __device__ / __constant__ variables to NVRTC
  variable_name_vec.push_back("&amp;V1");
  variable_initial_value.push_back(100);
  
  variable_name_vec.push_back("&amp;N1::N2::V2");
  variable_initial_value.push_back(200);
  
  for (size_t i = 0; i &lt; variable_name_vec.size(); ++i)
    NVRTC_SAFE_CALL(nvrtcAddNameExpression(prog, variable_name_vec[i].c_str()));
    
  nvrtcResult compileResult = nvrtcCompileProgram(prog,  // prog
                                                  0,     // numOptions
                                                  NULL); // options
  // Obtain compilation log from the program.
  size_t logSize;
  NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &amp;logSize));
  char *log = new char[logSize];
  NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
  std::cout &lt;&lt; log &lt;&lt; '\n';
  delete[] log;
  if (compileResult != NVRTC_SUCCESS) {
    exit(1);
  }
  // Obtain PTX from the program.
  size_t ptxSize;
  NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &amp;ptxSize));
  char *ptx = new char[ptxSize];
  NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx));
</pre><p class="p"></p><pre xml:space="preserve">
  // Load the generated PTX
  CUdevice cuDevice;
  CUcontext context;
  CUmodule module;
  
  CUDA_SAFE_CALL(cuInit(0));
  CUDA_SAFE_CALL(cuDeviceGet(&amp;cuDevice, 0));
  CUDA_SAFE_CALL(cuCtxCreate(&amp;context, 0, cuDevice));
  CUDA_SAFE_CALL(cuModuleLoadDataEx(&amp;module, ptx, 0, 0, 0));
  
  CUdeviceptr dResult;
  int hResult = 0;
  CUDA_SAFE_CALL(cuMemAlloc(&amp;dResult, sizeof(hResult)));
  CUDA_SAFE_CALL(cuMemcpyHtoD(dResult, &amp;hResult, sizeof(hResult)));
  
  // for each of the __device__/__constant__ variable address
  // expressions provided to NVRTC, extract the lowered name for the
  // corresponding variable, and set its value
  for (size_t i = 0; i &lt; variable_name_vec.size(); ++i) {
    const char *name;
    
    // note: this call must be made after NVRTC program has been 
    // compiled and before it has been destroyed.
    NVRTC_SAFE_CALL(nvrtcGetLoweredName(
                          prog, 
			  variable_name_vec[i].c_str(), // name expression
			  &amp;name                         // lowered name
                                        ));
    int initial_value = variable_initial_value[i];
    
    // get pointer to variable using lowered name, and set its
    // initial value
    CUdeviceptr variable_addr;
    CUDA_SAFE_CALL(cuModuleGetGlobal(&amp;variable_addr, NULL, module, name));
    CUDA_SAFE_CALL(cuMemcpyHtoD(variable_addr, &amp;initial_value, sizeof(initial_value)));
  }
  
  
  // for each of the kernel name expressions previously provided to NVRTC,
  // extract the lowered name for corresponding __global__ function,
  // and launch it.
  
  for (size_t i = 0; i &lt; kernel_name_vec.size(); ++i) {
    const char *name;
    
    // note: this call must be made after NVRTC program has been 
    // compiled and before it has been destroyed.
    NVRTC_SAFE_CALL(nvrtcGetLoweredName(
                          prog, 
			  kernel_name_vec[i].c_str(), // name expression
			  &amp;name                // lowered name
                                        ));
    
    // get pointer to kernel from loaded PTX
    CUfunction kernel;
    CUDA_SAFE_CALL(cuModuleGetFunction(&amp;kernel, module, name));
</pre><p class="p"></p><pre xml:space="preserve">    
    // launch the kernel
    std::cout &lt;&lt; "\nlaunching " &lt;&lt; name &lt;&lt; " ("
	      &lt;&lt; kernel_name_vec[i] &lt;&lt; ")" &lt;&lt; std::endl;
    
    void *args[] = { &amp;dResult };
    CUDA_SAFE_CALL(
      cuLaunchKernel(kernel,
		     1, 1, 1,             // grid dim
		     1, 1, 1,             // block dim
		     0, NULL,             // shared mem and stream
		     args, 0));           // arguments
    CUDA_SAFE_CALL(cuCtxSynchronize());
    
    // Retrieve the result
    CUDA_SAFE_CALL(cuMemcpyDtoH(&amp;hResult, dResult, sizeof(hResult)));
    
    // check against expected value
    if (expected_result[i] != hResult) {
      std::cout &lt;&lt; "\n Error: expected result = " &lt;&lt; expected_result[i]
                &lt;&lt; " , actual result = " &lt;&lt; hResult &lt;&lt; std::endl;
      exit(1);
    }
  }  // for
    
  // Release resources.
  CUDA_SAFE_CALL(cuMemFree(dResult));
  CUDA_SAFE_CALL(cuModuleUnload(module));
  CUDA_SAFE_CALL(cuCtxDestroy(context));
  
  // Destroy the program. 
  NVRTC_SAFE_CALL(nvrtcDestroyProgram(&amp;prog));

  return 0;
}</pre></div>
                  </div>
                  <div class="topic concept nested1" id="lowered-name-build-instruction"><a name="lowered-name-build-instruction" shape="rect">
                        <!-- --></a><h3 class="topictitle3">B.2.&nbsp;Build Instruction</h3>
                     <div class="body conbody">
                        <div class="p">
                           Assuming the environment variable <samp class="ph codeph">CUDA_PATH</samp> points to
                           CUDA Toolkit installation directory, build this example as:
                           
                           <ul class="ul">
                              <li class="li">
                                 Windows:
                                 <pre class="pre screen" xml:space="preserve">cl.exe lowered-name.cpp /Felowered-name ^
  /I "%CUDA_PATH%"\include ^
  "%CUDA_PATH%"\lib\x64\nvrtc.lib "%CUDA_PATH%"\lib\x64\cuda.lib</pre></li>
                              <li class="li">
                                 Linux:
                                 <pre class="pre screen" xml:space="preserve">g++ lowered-name.cpp -o lowered-name \
  -I $CUDA_PATH/include \
  -L $CUDA_PATH/lib64 \
  -lnvrtc -lcuda \
  -Wl,-rpath,$CUDA_PATH/lib64</pre></li>
                              <li class="li">
                                 Mac OS X:
                                 <pre class="pre screen" xml:space="preserve">clang++ lowered-name.cpp -o lowered-name \
  -I $CUDA_PATH/include \
  -L $CUDA_PATH/lib \
  -lnvrtc -framework CUDA \
  -Wl,-rpath,$CUDA_PATH/lib</pre></li>
                           </ul>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="example-host-type-name"><a name="example-host-type-name" shape="rect">
                     <!-- --></a><h2 class="topictitle2">C.&nbsp;Example: Using nvrtcGetTypeName</h2>
                  <div class="topic concept nested1" id="code-host-type-name-cpp"><a name="code-host-type-name-cpp" shape="rect">
                        <!-- --></a><h3 class="topictitle3">C.1.&nbsp;Code (host-type-name.cpp)</h3>
                     <div class="body conbody"><pre xml:space="preserve">
#include &lt;nvrtc.h&gt;
#include &lt;cuda.h&gt;
#include &lt;iostream&gt;
#include &lt;vector&gt;
#include &lt;string&gt;

#define NVRTC_SAFE_CALL(x)                                        \
  do {                                                            \
    nvrtcResult result = x;                                       \
    if (result != NVRTC_SUCCESS) {                                \
      std::cerr &lt;&lt; "\nerror: " #x " failed with error "           \
                &lt;&lt; nvrtcGetErrorString(result) &lt;&lt; '\n';           \
      exit(1);                                                    \
    }                                                             \
  } while(0)
#define CUDA_SAFE_CALL(x)                                         \
  do {                                                            \
    CUresult result = x;                                          \
    if (result != CUDA_SUCCESS) {                                 \
      const char *msg;                                            \
      cuGetErrorName(result, &amp;msg);                               \
      std::cerr &lt;&lt; "\nerror: " #x " failed with error "           \
                &lt;&lt; msg &lt;&lt; '\n';                                   \
      exit(1);                                                    \
    }                                                             \
  } while(0)

const char *gpu_program = "                                     \n\
namespace N1 { struct S1_t { int i; double d; }; }              \n\
template&lt;typename T&gt;                                            \n\
__global__ void f3(int *result) { *result = sizeof(T); }        \n\
                                                                \n";


// note: this structure is also defined in GPU code string. Should ideally
// be in a header file included by both GPU code string and by CPU code.
namespace N1 { struct S1_t { int i; double d; }; };</pre><p class="p"></p><pre xml:space="preserve">template &lt;typename T&gt;
std::string getKernelNameForType(void)
{
   // Look up the source level name string for the type "T" using
   // nvrtcGetTypeName() and use it to create the kernel name 
   std::string type_name;
   NVRTC_SAFE_CALL(nvrtcGetTypeName&lt;T&gt;(&amp;type_name));
   return std::string("f3&lt;") + type_name + "&gt;";
}

int main()
{
  // Create an instance of nvrtcProgram
  nvrtcProgram prog;
  NVRTC_SAFE_CALL(
    nvrtcCreateProgram(&amp;prog,         // prog
                       gpu_program,   // buffer
                       "gpu_program.cu",    // name
                       0,             // numHeaders
                       NULL,          // headers
                       NULL));        // includeNames

  // add all name expressions for kernels
  std::vector&lt;std::string&gt; name_vec;
  std::vector&lt;int&gt; expected_result;
  
  // note the name expressions are parsed as constant expressions
  name_vec.push_back(getKernelNameForType&lt;int&gt;());
  expected_result.push_back(sizeof(int));
    
  name_vec.push_back(getKernelNameForType&lt;double&gt;());
  expected_result.push_back(sizeof(double));
  
  name_vec.push_back(getKernelNameForType&lt;N1::S1_t&gt;());
  expected_result.push_back(sizeof(N1::S1_t));
  
  
  // add name expressions to NVRTC. Note this must be done before
  // the program is compiled.
  for (size_t i = 0; i &lt; name_vec.size(); ++i)
    NVRTC_SAFE_CALL(nvrtcAddNameExpression(prog, name_vec[i].c_str()));
  
  nvrtcResult compileResult = nvrtcCompileProgram(prog,  // prog
                                                  0,     // numOptions
                                                  NULL); // options
  // Obtain compilation log from the program.
  size_t logSize;
  NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &amp;logSize));
  char *log = new char[logSize];
  NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
  std::cout &lt;&lt; log &lt;&lt; '\n';
  delete[] log;
  if (compileResult != NVRTC_SUCCESS) {
    exit(1);
  }
  // Obtain PTX from the program.
  size_t ptxSize;
  NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &amp;ptxSize));
  char *ptx = new char[ptxSize];
  NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx));
  </pre><p class="p"></p><pre xml:space="preserve">

  // Load the generated PTX
  CUdevice cuDevice;
  CUcontext context;
  CUmodule module;
  
  CUDA_SAFE_CALL(cuInit(0));
  CUDA_SAFE_CALL(cuDeviceGet(&amp;cuDevice, 0));
  CUDA_SAFE_CALL(cuCtxCreate(&amp;context, 0, cuDevice));
  CUDA_SAFE_CALL(cuModuleLoadDataEx(&amp;module, ptx, 0, 0, 0));
  
  CUdeviceptr dResult;
  int hResult = 0;
  CUDA_SAFE_CALL(cuMemAlloc(&amp;dResult, sizeof(hResult)));
  CUDA_SAFE_CALL(cuMemcpyHtoD(dResult, &amp;hResult, sizeof(hResult)));
  
  // for each of the name expressions previously provided to NVRTC,
  // extract the lowered name for corresponding __global__ function,
  // and launch it.
  
  for (size_t i = 0; i &lt; name_vec.size(); ++i) {
    const char *name;
    
    // note: this call must be made after NVRTC program has been 
    // compiled and before it has been destroyed.
    NVRTC_SAFE_CALL(nvrtcGetLoweredName(
                          prog, 
			  name_vec[i].c_str(), // name expression
			  &amp;name                // lowered name
                                        ));
    
    // get pointer to kernel from loaded PTX
    CUfunction kernel;
    CUDA_SAFE_CALL(cuModuleGetFunction(&amp;kernel, module, name));
    
    // launch the kernel
    std::cout &lt;&lt; "\nlaunching " &lt;&lt; name &lt;&lt; " ("
	      &lt;&lt; name_vec[i] &lt;&lt; ")" &lt;&lt; std::endl;
    
    void *args[] = { &amp;dResult };
    CUDA_SAFE_CALL(
      cuLaunchKernel(kernel,
		     1, 1, 1,             // grid dim
		     1, 1, 1,             // block dim
		     0, NULL,             // shared mem and stream
		     args, 0));           // arguments
    CUDA_SAFE_CALL(cuCtxSynchronize());
    
    // Retrieve the result
    CUDA_SAFE_CALL(cuMemcpyDtoH(&amp;hResult, dResult, sizeof(hResult)));
    
    // check against expected value
    if (expected_result[i] != hResult) {
      std::cout &lt;&lt; "\n Error: expected result = " &lt;&lt; expected_result[i]
		&lt;&lt; " , actual result = " &lt;&lt; hResult &lt;&lt; std::endl;
      exit(1);
    }
  }  // for
    
  // Release resources.
  CUDA_SAFE_CALL(cuMemFree(dResult));
  CUDA_SAFE_CALL(cuModuleUnload(module));
  CUDA_SAFE_CALL(cuCtxDestroy(context));
  
  // Destroy the program. 
  NVRTC_SAFE_CALL(nvrtcDestroyProgram(&amp;prog));

  return 0;
}</pre></div>
                  </div>
                  <div class="topic concept nested1" id="host-type-name-build-instruction"><a name="host-type-name-build-instruction" shape="rect">
                        <!-- --></a><h3 class="topictitle3">C.2.&nbsp;Build Instruction</h3>
                     <div class="body conbody">
                        <div class="p">
                           Assuming the environment variable <samp class="ph codeph">CUDA_PATH</samp> points to
                           CUDA Toolkit installation directory, build this example as:
                           
                           <ul class="ul">
                              <li class="li">
                                 Windows:
                                 <pre class="pre screen" xml:space="preserve">cl.exe -DNVRTC_GET_TYPE_NAME=1 host-type-name.cpp /Fehost-type-name ^
  /I "%CUDA_PATH%"\include ^
  "%CUDA_PATH%"\lib\x64\nvrtc.lib "%CUDA_PATH%"\lib\x64\cuda.lib DbgHelp.lib</pre></li>
                              <li class="li">
                                 Linux:
                                 <pre class="pre screen" xml:space="preserve">g++ -DNVRTC_GET_TYPE_NAME=1 host-type-name.cpp -o host-type-name \
  -I $CUDA_PATH/include \
  -L $CUDA_PATH/lib64 \
  -lnvrtc -lcuda \
  -Wl,-rpath,$CUDA_PATH/lib64</pre></li>
                              <li class="li">
                                 Mac OS X:
                                 <pre class="pre screen" xml:space="preserve">clang++ -DNVRTC_GET_TYPE_NAME=1 host-type-name.cpp -o host-type-name \
  -I $CUDA_PATH/include \
  -L $CUDA_PATH/lib \
  -lnvrtc -framework CUDA \
  -Wl,-rpath,$CUDA_PATH/lib</pre></li>
                           </ul>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="example-dynamic-parallelism"><a name="example-dynamic-parallelism" shape="rect">
                     <!-- --></a><h2 class="topictitle2">D.&nbsp;Example: Dynamic Parallelism</h2>
                  <div class="topic concept nested1" id="code-dynamic-parallelism-cpp"><a name="code-dynamic-parallelism-cpp" shape="rect">
                        <!-- --></a><h3 class="topictitle3">D.1.&nbsp;Code (dynamic-parallelism.cpp)</h3>
                     <div class="body conbody"><pre xml:space="preserve">#include &lt;nvrtc.h&gt;
#include &lt;cuda.h&gt;
#include &lt;iostream&gt;

#define NVRTC_SAFE_CALL(x)                                        \
  do {                                                            \
    nvrtcResult result = x;                                       \
    if (result != NVRTC_SUCCESS) {                                \
      std::cerr &lt;&lt; "\nerror: " #x " failed with error "           \
                &lt;&lt; nvrtcGetErrorString(result) &lt;&lt; '\n';           \
      exit(1);                                                    \
    }                                                             \
  } while(0)
#define CUDA_SAFE_CALL(x)                                         \
  do {                                                            \
    CUresult result = x;                                          \
    if (result != CUDA_SUCCESS) {                                 \
      const char *msg;                                            \
      cuGetErrorName(result, &amp;msg);                               \
      std::cerr &lt;&lt; "\nerror: " #x " failed with error "           \
                &lt;&lt; msg &lt;&lt; '\n';                                   \
      exit(1);                                                    \
    }                                                             \
  } while(0)

const char *dynamic_parallelism = "                             \n\
extern \"C\" __global__                                         \n\
void child(float *out, size_t n)                                \n\
{                                                               \n\
  size_t tid = blockIdx.x * blockDim.x + threadIdx.x;           \n\
  if (tid &lt; n) {                                                \n\
    out[tid] = tid;                                             \n\
  }                                                             \n\
}                                                               \n\
                                                                \n\
extern \"C\" __global__                                         \n\
void parent(float *out, size_t n,                               \n\
              size_t numBlocks, size_t numThreads)              \n\
{                                                               \n\
  child&lt;&lt;&lt;numBlocks, numThreads&gt;&gt;&gt;(out, n);                     \n\
  cudaDeviceSynchronize();                                      \n\
}                                                               \n";</pre><p class="p"></p><pre xml:space="preserve">int main(int argc, char *argv[])
{
  if (argc &lt; 2) {
    std::cout &lt;&lt; "Usage: dynamic-parallelism &lt;path to cudadevrt library&gt;\n\n"
              &lt;&lt; "&lt;path to cudadevrt library&gt; must include the cudadevrt\n"
              &lt;&lt; "library name itself, e.g., Z:\\path\\to\\cudadevrt.lib on \n"
              &lt;&lt; "Windows and /path/to/libcudadevrt.a on Linux and Mac OS X.\n";
    exit(1);
  }
  size_t numBlocks = 32;
  size_t numThreads = 128;
  // Create an instance of nvrtcProgram with the code string.
  nvrtcProgram prog;
  NVRTC_SAFE_CALL(
    nvrtcCreateProgram(&amp;prog,                       // prog
                       dynamic_parallelism,         // buffer
                       "dynamic_parallelism.cu",    // name
                       0,                           // numHeaders
                       NULL,                        // headers
                       NULL));                      // includeNames
  // Compile the program for compute_35 with rdc enabled.
  const char *opts[] = {"--gpu-architecture=compute_35",
                        "--relocatable-device-code=true"};
  nvrtcResult compileResult = nvrtcCompileProgram(prog,  // prog
                                                  2,     // numOptions
                                                  opts); // options
  // Obtain compilation log from the program.
  size_t logSize;
  NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &amp;logSize));
  char *log = new char[logSize];
  NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
  std::cout &lt;&lt; log &lt;&lt; '\n';
  delete[] log;
  if (compileResult != NVRTC_SUCCESS) {
    exit(1);
  }
  // Obtain PTX from the program.
  size_t ptxSize;
  NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &amp;ptxSize));
  char *ptx = new char[ptxSize];
  NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx));
  // Destroy the program.
  NVRTC_SAFE_CALL(nvrtcDestroyProgram(&amp;prog));
  // Load the generated PTX and get a handle to the parent kernel.
  CUdevice cuDevice;
  CUcontext context;
  CUlinkState linkState;
  CUmodule module;
  CUfunction kernel;
  CUDA_SAFE_CALL(cuInit(0));
  CUDA_SAFE_CALL(cuDeviceGet(&amp;cuDevice, 0));
  CUDA_SAFE_CALL(cuCtxCreate(&amp;context, 0, cuDevice));
  CUDA_SAFE_CALL(cuLinkCreate(0, 0, 0, &amp;linkState));
  CUDA_SAFE_CALL(cuLinkAddFile(linkState, CU_JIT_INPUT_LIBRARY, argv[1],
                               0, 0, 0));
  CUDA_SAFE_CALL(cuLinkAddData(linkState, CU_JIT_INPUT_PTX,
                               (void *)ptx, ptxSize, "dynamic_parallelism.ptx",
                               0, 0, 0));
  size_t cubinSize;
  void *cubin;
  CUDA_SAFE_CALL(cuLinkComplete(linkState, &amp;cubin, &amp;cubinSize));
  CUDA_SAFE_CALL(cuModuleLoadData(&amp;module, cubin));
  CUDA_SAFE_CALL(cuModuleGetFunction(&amp;kernel, module, "parent"));</pre><p class="p"></p><pre xml:space="preserve">  // Generate input for execution, and create output buffers.
  size_t n = numBlocks * numThreads;
  size_t bufferSize = n * sizeof(float);
  float *hOut = new float[n];
  CUdeviceptr dX, dY, dOut;
  CUDA_SAFE_CALL(cuMemAlloc(&amp;dOut, bufferSize));
  // Execute parent kernel.
  void *args[] = { &amp;dOut, &amp;n, &amp;numBlocks, &amp;numThreads };
  CUDA_SAFE_CALL(
    cuLaunchKernel(kernel,
                   1, 1, 1,    // grid dim
                   1, 1, 1,    // block dim
                   0, NULL,    // shared mem and stream
                   args, 0));  // arguments
  CUDA_SAFE_CALL(cuCtxSynchronize());
  // Retrieve and print output.
  CUDA_SAFE_CALL(cuMemcpyDtoH(hOut, dOut, bufferSize));

  for (size_t i = 0; i &lt; n; ++i) {
    std::cout &lt;&lt; hOut[i] &lt;&lt; '\n';
  }
  // Release resources.
  CUDA_SAFE_CALL(cuMemFree(dOut));
  CUDA_SAFE_CALL(cuModuleUnload(module));
  CUDA_SAFE_CALL(cuLinkDestroy(linkState));
  CUDA_SAFE_CALL(cuCtxDestroy(context));
  delete[] hOut;
  return 0;
}</pre><p class="p"></p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="dynamic-parallelism-build-instruction"><a name="dynamic-parallelism-build-instruction" shape="rect">
                        <!-- --></a><h3 class="topictitle3">D.2.&nbsp;Build Instruction</h3>
                     <div class="body conbody">
                        <div class="p">
                           Assuming the environment variable <samp class="ph codeph">CUDA_PATH</samp> points to
                           CUDA Toolkit installation directory, build this example as:
                           
                           <ul class="ul">
                              <li class="li">
                                 Windows:
                                 <pre class="pre screen" xml:space="preserve">cl.exe dynamic-parallelism.cpp /Fedynamic-parallelism ^
  /I "%CUDA_PATH%\include" ^
  "%CUDA_PATH%"\lib\x64\nvrtc.lib "%CUDA_PATH%"\lib\x64\cuda.lib</pre></li>
                              <li class="li">
                                 Linux:
                                 <pre class="pre screen" xml:space="preserve">g++ dynamic-parallelism.cpp -o dynamic-parallelism \
  -I $CUDA_PATH/include \
  -L $CUDA_PATH/lib64 \
  -lnvrtc -lcuda \
  -Wl,-rpath,$CUDA_PATH/lib64</pre></li>
                              <li class="li">
                                 Mac OS X:
                                 <pre class="pre screen" xml:space="preserve">clang++ dynamic-parallelism.cpp -o dynamic-parallelism \
  -I $CUDA_PATH/include \
  -L $CUDA_PATH/lib \
  -lnvrtc -framework CUDA \
  -Wl,-rpath,$CUDA_PATH/lib</pre></li>
                           </ul>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="notices-header"><a name="notices-header" shape="rect">
                     <!-- --></a><h2 class="topictitle2">Notices</h2>
                  <div class="topic reference nested1" id="notice"><a name="notice" shape="rect">
                        <!-- --></a><h3 class="topictitle3"></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Notice</h3>
                           <p class="p">ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND
                              SEPARATELY, "MATERIALS") ARE BEING PROVIDED "AS IS." NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE
                              WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS
                              FOR A PARTICULAR PURPOSE. 
                           </p>
                           <p class="p">Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the
                              consequences of use of such information or for any infringement of patents or other rights of third parties that may result
                              from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications
                              mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information
                              previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems
                              without express written approval of NVIDIA Corporation.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="trademarks"><a name="trademarks" shape="rect">
                        <!-- --></a><h3 class="topictitle3"></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Trademarks</h3>
                           <p class="p">NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation
                              in the U.S. and other countries.  Other company and product names may be trademarks of
                              the respective companies with which they are associated.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="copyright-past-to-present"><a name="copyright-past-to-present" shape="rect">
                        <!-- --></a><h3 class="topictitle3"></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Copyright</h3>
                           <p class="p">© <span class="ph">2014</span>-<span class="ph">2019</span> NVIDIA
                              Corporation. All rights reserved.
                           </p>
                           <p class="p">This product includes software developed by the Syncro Soft SRL (http://www.sync.ro/).</p>
                        </div>
                     </div>
                  </div>
               </div>
               
               <hr id="contents-end"></hr>
               
            </article>
         </div>
      </div>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/formatting/common.min.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-write.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-tracker.js"></script>
      <script type="text/javascript">var switchTo5x=true;</script><script type="text/javascript" src="http://w.sharethis.com/button/buttons.js"></script><script type="text/javascript">stLight.options({publisher: "998dc202-a267-4d8e-bce9-14debadb8d92", doNotHash: false, doNotCopy: false, hashAddressBar: false});</script><script type="text/javascript">_satellite.pageBottom();</script></body>
</html>