Sophie

Sophie

distrib > Mageia > 7 > x86_64 > media > nonfree-updates > by-pkgid > b86a85131cc739c1c53d0b55840a4328 > files > 3678

nvidia-cuda-toolkit-devel-10.1.168-1.2.mga7.nonfree.x86_64.rpm

<!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-us" xml:lang="en-us">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
      <meta http-equiv="X-UA-Compatible" content="IE=edge"></meta>
      <meta name="copyright" content="(C) Copyright 2005"></meta>
      <meta name="DC.rights.owner" content="(C) Copyright 2005"></meta>
      <meta name="DC.Type" content="concept"></meta>
      <meta name="DC.Title" content="PTX Writer's Guide to Interoperability"></meta>
      <meta name="abstract" content="The guide to writing ABI-compliant PTX."></meta>
      <meta name="description" content="The guide to writing ABI-compliant PTX."></meta>
      <meta name="DC.Coverage" content="Programming Guides"></meta>
      <meta name="DC.subject" content="CUDA PTX, CUDA PTX interoperability, CUDA PTX ABI, CUDA PTX calling sequence, CUDA ABI"></meta>
      <meta name="keywords" content="CUDA PTX, CUDA PTX interoperability, CUDA PTX ABI, CUDA PTX calling sequence, CUDA ABI"></meta>
      <meta name="DC.Format" content="XHTML"></meta>
      <meta name="DC.Identifier" content="abstract"></meta>
      <link rel="stylesheet" type="text/css" href="../common/formatting/commonltr.css"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/site.css"></link>
      <title>PTX Interoperability :: CUDA Toolkit Documentation</title>
      <!--[if lt IE 9]>
      <script src="../common/formatting/html5shiv-printshiv.min.js"></script>
      <![endif]-->
      <script type="text/javascript" charset="utf-8" src="//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.ba-hashchange.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.scrollintoview.min.js"></script>
      <script type="text/javascript" src="../search/htmlFileList.js"></script>
      <script type="text/javascript" src="../search/htmlFileInfoList.js"></script>
      <script type="text/javascript" src="../search/nwSearchFnt.min.js"></script>
      <script type="text/javascript" src="../search/stemmers/en_stemmer.min.js"></script>
      <script type="text/javascript" src="../search/index-1.js"></script>
      <script type="text/javascript" src="../search/index-2.js"></script>
      <script type="text/javascript" src="../search/index-3.js"></script>
      <link rel="canonical" href="http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/qwcode.highlight.css"></link>
   </head>
   <body>
      
      <header id="header"><span id="company">NVIDIA</span><span id="site-title">CUDA Toolkit Documentation</span><form id="search" method="get" action="search">
            <input type="text" name="search-text"></input><fieldset id="search-location">
               <legend>Search In:</legend>
               <label><input type="radio" name="search-type" value="site"></input>Entire Site</label>
               <label><input type="radio" name="search-type" value="document"></input>Just This Document</label></fieldset>
            <button type="reset">clear search</button>
            <button id="submit" type="submit">search</button></form>
      </header>
      <div id="site-content">
         <nav id="site-nav">
            <div class="category closed"><a href="../index.html" title="The root of the site.">CUDA Toolkit 
                  
                  
                  v10.1.168</a></div>
            <div class="category"><a href="index.html" title="PTX Interoperability">PTX Interoperability</a></div>
            <ul>
               <li>
                  <div class="section-link"><a href="#introduction">1.&nbsp;Introduction</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#data-representation">2.&nbsp;Data Representation</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#fundamental-types">2.1.&nbsp;Fundamental Types</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#aggregates-unions">2.2.&nbsp;Aggregates and Unions</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#bit-fields">2.3.&nbsp;Bit Fields</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#textures-surfaces-samplers">2.4.&nbsp;Texture, Sampler, and Surface Types</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#function-calling-sequence">3.&nbsp;Function Calling Sequence</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#registers">3.1.&nbsp;Registers</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#stack-frame">3.2.&nbsp;Stack Frame</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#parameter-passing">3.3.&nbsp;Parameter Passing</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#system-calls">4.&nbsp;System Calls</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#debug-information">5.&nbsp;Debug Information</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#generation-of-debug">5.1.&nbsp;Generation of Debug Information</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#cuda-specific-dwarf">5.2.&nbsp;CUDA-Specific DWARF Definitions</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#example">6.&nbsp;Example</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#cxx">7.&nbsp;C++</a></div>
               </li>
            </ul>
         </nav>
         <div id="resize-nav"></div>
         <nav id="search-results">
            <h2>Search Results</h2>
            <ol></ol>
         </nav>
         
         <div id="contents-container">
            <div id="breadcrumbs-container">
               <div id="release-info">PTX Interoperability
                  (<a href="../../pdf/PTX_Writers_Guide_To_Interoperability.pdf">PDF</a>)
                  -
                   
                  
                  
                  v10.1.168
                  (<a href="https://developer.nvidia.com/cuda-toolkit-archive">older</a>)
                  -
                  Last updated April 24, 2019
                  -
                  <a href="mailto:CUDAIssues@nvidia.com?subject=CUDA Toolkit Documentation Feedback: PTX Interoperability">Send Feedback</a></div>
            </div>
            <article id="contents">
               <div class="topic nested0" id="abstract"><a name="abstract" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#abstract" name="abstract" shape="rect">PTX Writer's Guide to Interoperability</a></h2>
                  <div class="body conbody">
                     <p class="shortdesc">The guide to writing ABI-compliant PTX. </p>
                  </div>
               </div>
               <div class="topic concept nested0" id="introduction"><a name="introduction" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#introduction" name="introduction" shape="rect">1.&nbsp;Introduction</a></h2>
                  <div class="body conbody">
                     <p class="p">This document defines the Application Binary Interface (ABI) for the 
                        CUDA<sup>®</sup> architecture when generating <dfn class="term">PTX</dfn>.
                        By following the ABI, external developers can generate compliant PTX 
                        code that can be linked with other code.
                     </p>
                     <p class="p">PTX is a low-level parallel-thread-execution virtual machine and ISA 
                        (Instruction Set Architecture). 
                        PTX can be output from multiple tools or written directly by developers. 
                        PTX is meant to be GPU-architecture independent, so that the same code 
                        can be reused for different GPU architectures. 
                        For more information on PTX, refer to the latest version of the
                        <a class="xref" href="http://docs.nvidia.com/cuda/parallel-thread-execution/index.html" target="_blank" shape="rect"><em class="ph i">PTX ISA reference document</em></a>.
                        
                     </p>
                     <p class="p">There are multiple CUDA architecture families, each with their own ISA;
                        e.g. SM 2.x is the Fermi family, SM 3.x is the Kepler family. 
                        This document describes the high-level ABI for all architectures.
                        Programs conforming to an ABI are expected to be executed on the 
                        appropriate architecture GPU, and can assume that instructions from 
                        that ISA are available.
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" id="data-representation"><a name="data-representation" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#data-representation" name="data-representation" shape="rect">2.&nbsp;Data Representation</a></h2>
                  <div class="topic concept nested1" id="fundamental-types"><a name="fundamental-types" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#fundamental-types" name="fundamental-types" shape="rect">2.1.&nbsp;Fundamental Types</a></h3>
                     <div class="body conbody">
                        <p class="p">The below table shows the native scalar PTX types that are supported. 
                           Any PTX producer must use these sizes and alignments in order for its 
                           PTX to be compatible with PTX generated by other producers. 
                           PTX also supports native vector types, which are discussed in
                           <a class="xref" href="index.html#aggregates-unions" shape="rect">Aggregates and Unions</a>.
                        </p>
                        <p class="p">The sizes of types are defined by the host. 
                           For example, pointer size and long int size are dictated by the hosts ABI.
                           PTX has an .address_size directive that specifies the address size used 
                           throughout the PTX code. 
                           The size of pointers is 32 bits on a 32-bit host 
                           or 64 bits on a 64-bit host. 
                           However, addresses of the local and shared memory spaces 
                           are always 32 bits in size.
                        </p>
                        <p class="p">During separate compilation we store info about the host platform in 
                           each object file. 
                           The linker will fail to link object files generated for incompatible 
                           host platforms.
                        </p>
                        <div class="tablenoborder">
                           <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="29.411764705882355%" id="d54e122" rowspan="1" colspan="1">PTX Type</th>
                                    <th class="entry" valign="top" width="11.76470588235294%" id="d54e125" rowspan="1" colspan="1">Size (bytes)</th>
                                    <th class="entry" valign="top" width="11.76470588235294%" id="d54e128" rowspan="1" colspan="1">Align (bytes)</th>
                                    <th class="entry" valign="top" width="47.05882352941176%" id="d54e131" rowspan="1" colspan="1">Hardware Representation</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.b8</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">1</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">1</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">untyped byte</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.b16</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">2</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">2</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">untyped halfword</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.b32</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">untyped word</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.b64</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">8</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">8</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">untyped doubleword</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.s8</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">1</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">1</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">signed integral byte</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.s16</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">2</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">2</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">signed integral halfword</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.s32</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">signed integral word</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.s64</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">8</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">8</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">signed integral doubleword</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.u8</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">1</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">1</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">unsigned integral byte</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.u16</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">2</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">2</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">unsigned integral halfword</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.u32</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">unsigned integral word</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.u64</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">8</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">8</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">unsigned integral doubleword</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.f16</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">2</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">2</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">IEEE half precision</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.f32</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">IEEE single precision</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="29.411764705882355%" headers="d54e122" rowspan="1" colspan="1">.f64</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e125" rowspan="1" colspan="1">8</td>
                                    <td class="entry" valign="top" width="11.76470588235294%" headers="d54e128" rowspan="1" colspan="1">8</td>
                                    <td class="entry" valign="top" width="47.05882352941176%" headers="d54e131" rowspan="1" colspan="1">IEEE double precision</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="aggregates-unions"><a name="aggregates-unions" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#aggregates-unions" name="aggregates-unions" shape="rect">2.2.&nbsp;Aggregates and Unions</a></h3>
                     <div class="body conbody">
                        <p class="p">Beyond the scalar types, PTX also supports native-vector types of 
                           these scalar types, with both its vector syntax and its byte-array syntax.
                           For scalar types with a size no greater than four bytes, 
                           vector types with 1, 2, 3, and 4 elements exist; 
                           for all other types, only 1 and 2 element vector types exist.
                        </p>
                        <p class="p">All aggregates and unions can be supported in PTX 
                           with its byte-array syntax.
                        </p>
                        <div class="p">The following are the size-and-alignment rules for all 
                           aggregates and unions.
                           
                           <ul class="ul">
                              <li class="li">For a non-native-vector type, an entire aggregate or union 
                                 is aligned on the same boundary as its most strictly aligned member.
                                 This rule is not followed if the alignments are defined by the 
                                 input language. For example, in OpenCL built-in vector data types 
                                 have their alignment set to the size of the built-in data type 
                                 in bytes.
                              </li>
                              <li class="li">For a native vector type -- discussed at the start of this section --
                                 the alignment is defined as follows. 
                                 (For the definitions below, the native vector has n elements 
                                 and has an element type t.)
                                 
                                 <ul class="ul">
                                    <li class="li">For a vector with an odd number of elements, 
                                       its alignment is the same as its member:  alignof(t).
                                    </li>
                                    <li class="li">For a vector with an even number of elements, 
                                       its alignment is set to number of elements times the alignment 
                                       of its member:  n*alignof(t).
                                    </li>
                                 </ul>
                              </li>
                              <li class="li">Each member is assigned to the lowest available offset with the 
                                 appropriate alignment. This may require internal padding, 
                                 depending on the previous member.
                              </li>
                              <li class="li">The size of an aggregate or union, if necessary, is increased to 
                                 make it a multiple of the alignment of the aggregate or union. 
                                 This may require tail padding, depending on the last member.
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="bit-fields"><a name="bit-fields" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#bit-fields" name="bit-fields" shape="rect">2.3.&nbsp;Bit Fields</a></h3>
                     <div class="body conbody">
                        <p class="p">C structure and union definitions may have bit fields that define 
                           integral objects with a specified number of bits.
                        </p>
                        <div class="tablenoborder">
                           <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="34.78260869565217%" id="d54e435" rowspan="1" colspan="1">Bit Field Type</th>
                                    <th class="entry" valign="top" width="34.78260869565217%" id="d54e438" rowspan="1" colspan="1">Width w</th>
                                    <th class="entry" valign="top" width="30.434782608695656%" id="d54e441" rowspan="1" colspan="1">Range</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e435" rowspan="1" colspan="1">signed char</td>
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e438" rowspan="1" colspan="1">1 to 8</td>
                                    <td class="entry" valign="top" width="30.434782608695656%" headers="d54e441" rowspan="1" colspan="1">-2<sup class="ph sup">w-1</sup> to 2<sup class="ph sup">w-1</sup> - 1
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e435" rowspan="1" colspan="1">unsigned char</td>
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e438" rowspan="1" colspan="1">1 to 8</td>
                                    <td class="entry" valign="top" width="30.434782608695656%" headers="d54e441" rowspan="1" colspan="1">0 to 2<sup class="ph sup">w</sup> - 1
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e435" rowspan="1" colspan="1">signed short</td>
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e438" rowspan="1" colspan="1">1 to 16</td>
                                    <td class="entry" valign="top" width="30.434782608695656%" headers="d54e441" rowspan="1" colspan="1">-2<sup class="ph sup">w-1</sup> to 2<sup class="ph sup">w-1</sup> - 1
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e435" rowspan="1" colspan="1">unsigned short</td>
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e438" rowspan="1" colspan="1">1 to 16</td>
                                    <td class="entry" valign="top" width="30.434782608695656%" headers="d54e441" rowspan="1" colspan="1">0 to 2<sup class="ph sup">w</sup> - 1
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e435" rowspan="1" colspan="1">signed int</td>
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e438" rowspan="1" colspan="1">1 to 32</td>
                                    <td class="entry" valign="top" width="30.434782608695656%" headers="d54e441" rowspan="1" colspan="1">-2<sup class="ph sup">w-1</sup> to 2<sup class="ph sup">w-1</sup> - 1
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e435" rowspan="1" colspan="1">unsigned int</td>
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e438" rowspan="1" colspan="1">1 to 32</td>
                                    <td class="entry" valign="top" width="30.434782608695656%" headers="d54e441" rowspan="1" colspan="1">0 to 2<sup class="ph sup">w</sup> - 1
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e435" rowspan="1" colspan="1">signed long long</td>
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e438" rowspan="1" colspan="1">1 to 64</td>
                                    <td class="entry" valign="top" width="30.434782608695656%" headers="d54e441" rowspan="1" colspan="1">-2<sup class="ph sup">w-1</sup> to 2<sup class="ph sup">w-1</sup> - 1
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e435" rowspan="1" colspan="1">unsigned long long</td>
                                    <td class="entry" valign="top" width="34.78260869565217%" headers="d54e438" rowspan="1" colspan="1">1 to 64</td>
                                    <td class="entry" valign="top" width="30.434782608695656%" headers="d54e441" rowspan="1" colspan="1">0 to 2<sup class="ph sup">w</sup> - 1
                                    </td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <p class="p">Current GPUs only support little-endian memory, 
                           so the below assumes little-endian layout.
                        </p>
                        <p class="p">The following are rules that apply to bit fields.</p>
                        <ul class="ul">
                           <li class="li">Plain bit fields (neither signed nor unsigned is specified) 
                              are treated as signed.
                           </li>
                           <li class="li">When no type is provided (e.g., signed : 6 is specified), 
                              the type defaults to int.
                           </li>
                        </ul>
                        <p class="p">Bit fields obey the same size and alignment rules as other structure 
                           and union members, with the following modifications.
                        </p>
                        <ul class="ul">
                           <li class="li">Bit fields are allocated in memory from right to left 
                              (least to more significant) for little endian.
                           </li>
                           <li class="li">A bit field must entirely reside in a storage unit appropriate for 
                              its declared type. A bit field should never cross its unit boundary.
                           </li>
                           <li class="li">Bit fields may share a storage unit with other structure and union 
                              members, including members that are not bit fields, 
                              as long as there is enough space within the storage unit.
                           </li>
                           <li class="li">Unnamed bit fields do not affect the alignment of a structure or 
                              union.
                           </li>
                           <li class="li">Zero-length bit fields force the alignment of the following member 
                              of a structure to the next alignment boundary corresponding to the 
                              bit-field type. An unnamed, zero-length bit field will not force 
                              the external alignment of the structure to that boundary. 
                              If an unnamed, zero-length bit field has a stricter alignment than 
                              the external alignment, there is no guarantee that the stricter 
                              alignment will be maintained when the structure or union gets 
                              allocated to memory.
                           </li>
                        </ul>
                        <p class="p">The following figures contain examples of bit fields. 
                           Figure 1 shows the byte offsets (upper corners) and the bit numbers
                           (lower corners) that are used in the examples. 
                           The remaining figures show different bit-field examples.
                        </p>
                        <div class="fig fignone" id="bit-fields__bitfields1"><a name="bit-fields__bitfields1" shape="rect">
                              <!-- --></a><span class="figcap">Figure 1. Bit Numbering</span><br clear="none"></br><div class="imagecenter"><img class="image imagecenter" src="graphics/bitfields1.png"></img></div><br clear="none"></br></div>
                        <div class="fig fignone" id="bit-fields__bitfields2"><a name="bit-fields__bitfields2" shape="rect">
                              <!-- --></a><span class="figcap">Figure 2. Bit-field Allocation</span><br clear="none"></br><div class="imagecenter"><img class="image imagecenter" src="graphics/bitfields2.png"></img></div><br clear="none"></br></div>
                        <div class="fig fignone" id="bit-fields__bitfields3"><a name="bit-fields__bitfields3" shape="rect">
                              <!-- --></a><span class="figcap">Figure 3. Boundary Alignment</span><br clear="none"></br><div class="imagecenter"><img class="image imagecenter" src="graphics/bitfields3.png"></img></div><br clear="none"></br></div>
                        <div class="fig fignone" id="bit-fields__bitfields4"><a name="bit-fields__bitfields4" shape="rect">
                              <!-- --></a><span class="figcap">Figure 4. Storage Unit Sharing</span><br clear="none"></br><div class="imagecenter"><img class="image imagecenter" src="graphics/bitfields4.png"></img></div><br clear="none"></br></div>
                        <div class="fig fignone" id="bit-fields__bitfields5"><a name="bit-fields__bitfields5" shape="rect">
                              <!-- --></a><span class="figcap">Figure 5. Union Allocation</span><br clear="none"></br><div class="imagecenter"><img class="image imagecenter" src="graphics/bitfields5.png"></img></div><br clear="none"></br></div>
                        <div class="fig fignone" id="bit-fields__bitfields6"><a name="bit-fields__bitfields6" shape="rect">
                              <!-- --></a><span class="figcap">Figure 6. Unnamed Bit Fields</span><br clear="none"></br><div class="imagecenter"><img class="image imagecenter" src="graphics/bitfields6.png"></img></div><br clear="none"></br></div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="textures-surfaces-samplers"><a name="textures-surfaces-samplers" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#textures-surfaces-samplers" name="textures-surfaces-samplers" shape="rect">2.4.&nbsp;Texture, Sampler, and Surface Types</a></h3>
                     <div class="body conbody">
                        <p class="p">Texture, sampler and surface types are used to define references 
                           to texture and surface memory. The CUDA architecture provides 
                           hardware and instructions to efficiently read data from texture 
                           or surface memory as opposed to global memory.
                        </p>
                        <p class="p">References to textures are bound through runtime functions to 
                           device read-only regions of memory, called a texture memory, 
                           before they can be used by a kernel. 
                           A texture reference has several attributes e.g. normalized mode, 
                           addressing mode, and texture filtering etc. 
                           A sampler reference can be used to sample a texture when read in 
                           a kernel. A surface reference is used to read or write data 
                           from and to the surface memory. 
                           It also has various attributes similar to a texture.
                        </p>
                        <p class="p">At the PTX level objects that access texture or surface memory are 
                           referred to as opaque objects. Textures are expressed by either a 
                           .texref or .samplerref type and surfaces are expressed by the 
                           .surfref type. 
                           The data of opaque objects can be accessed by specific instructions 
                           (TEX for .texref/.samplerref and SULD/SUST for .surfref). 
                           The attributes of opaque objects are implemented by allocating a 
                           descriptor in memory which is populated by the driver. 
                           PTX TXQ/SUQ instructions get translated into memory reads of fields 
                           of the descriptor. 
                           The internal format of the descriptor varies with each architecture 
                           and should not be relied on by the user. 
                           The data and the attributes of an opaque object may be accessed 
                           directly if the texture or surface reference is known at compile time 
                           or indirectly. If the reference is not known during compile time 
                           all information required to read data and attributes is contained in 
                           a .b64 value called the handle. 
                           The handle can be used to pass and return oqaque object references 
                           to and from functions as well as to reference external textures, 
                           samplers and surfaces.
                        </p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="function-calling-sequence"><a name="function-calling-sequence" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#function-calling-sequence" name="function-calling-sequence" shape="rect">3.&nbsp;Function Calling Sequence</a></h2>
                  <div class="body conbody">
                     <p class="p">This section describes the PTX-level function calling sequence, 
                        including register usage, stack-frame layout, and parameter passing. 
                        The PTX-level function calling sequence describes what gets represented 
                        in PTX to enable function calls. There is an abstraction at this level. 
                        Most of the details associated with the function calling sequence are 
                        handled at the SASS level.
                     </p>
                     <p class="p">PTX versions earlier than 2.0 do not conform to the ABI defined 
                        in this document, and cannot perform ABI compatible function calls. 
                        For the calling convention to work PTX version 2.0 or greater 
                        must be used.
                     </p>
                  </div>
                  <div class="topic concept nested1" id="registers"><a name="registers" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#registers" name="registers" shape="rect">3.1.&nbsp;Registers</a></h3>
                     <div class="body conbody">
                        <p class="p">At the PTX level, the registers that are specified are virtual. 
                           Register allocation occurs during PTX-to-SASS translation. 
                           The PTX-to-SASS translation also converts parameters and return values 
                           to physical registers or stack locations.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="stack-frame"><a name="stack-frame" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#stack-frame" name="stack-frame" shape="rect">3.2.&nbsp;Stack Frame</a></h3>
                     <div class="body conbody">
                        <p class="p">The PTX level has no concept of the software stack. 
                           Manipulation of the stack is completely defined at the SASS level, 
                           and gets allocated during the PTX-to-SASS translation process.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="parameter-passing"><a name="parameter-passing" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#parameter-passing" name="parameter-passing" shape="rect">3.3.&nbsp;Parameter Passing</a></h3>
                     <div class="body conbody">
                        <p class="p">At the PTX level, all parameters and return values present in a 
                           device function use the parameter state space (.param). 
                           The below table contains the rules for handling parameters and 
                           return values that are defined at the source level. 
                           For each source-level type, the corresponding PTX-level type 
                           that should be used is provided.
                        </p>
                        <div class="tablenoborder">
                           <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="32%" id="d54e762" rowspan="1" colspan="1">Source Type</th>
                                    <th class="entry" valign="top" width="32%" id="d54e765" rowspan="1" colspan="1">Size in Bits</th>
                                    <th class="entry" valign="top" width="36%" id="d54e768" rowspan="1" colspan="1">PTX Type</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="32%" headers="d54e762" rowspan="1" colspan="1">Integral types</td>
                                    <td class="entry" valign="top" width="32%" headers="d54e765" rowspan="1" colspan="1">8 to 32 (A)</td>
                                    <td class="entry" valign="top" width="36%" headers="d54e768" rowspan="1" colspan="1">.u32 (if unsigned) or .s32 (if signed)</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="32%" headers="d54e762" rowspan="1" colspan="1">Integral types</td>
                                    <td class="entry" valign="top" width="32%" headers="d54e765" rowspan="1" colspan="1">64</td>
                                    <td class="entry" valign="top" width="36%" headers="d54e768" rowspan="1" colspan="1">.u64 (if unsigned) or .s64 (if signed)</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="32%" headers="d54e762" rowspan="1" colspan="1">Pointers (B)</td>
                                    <td class="entry" valign="top" width="32%" headers="d54e765" rowspan="1" colspan="1">32</td>
                                    <td class="entry" valign="top" width="36%" headers="d54e768" rowspan="1" colspan="1">.u32</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="32%" headers="d54e762" rowspan="1" colspan="1">Pointers (B)</td>
                                    <td class="entry" valign="top" width="32%" headers="d54e765" rowspan="1" colspan="1">64</td>
                                    <td class="entry" valign="top" width="36%" headers="d54e768" rowspan="1" colspan="1">.u64</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="32%" headers="d54e762" rowspan="1" colspan="1">Floating-point types (C)</td>
                                    <td class="entry" valign="top" width="32%" headers="d54e765" rowspan="1" colspan="1">32</td>
                                    <td class="entry" valign="top" width="36%" headers="d54e768" rowspan="1" colspan="1">.f32</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="32%" headers="d54e762" rowspan="1" colspan="1">Floating-point types (C)</td>
                                    <td class="entry" valign="top" width="32%" headers="d54e765" rowspan="1" colspan="1">64</td>
                                    <td class="entry" valign="top" width="36%" headers="d54e768" rowspan="1" colspan="1">.f64</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="32%" headers="d54e762" rowspan="1" colspan="1">Aggregates or unions</td>
                                    <td class="entry" valign="top" width="32%" headers="d54e765" rowspan="1" colspan="1">Any size</td>
                                    <td class="entry" valign="top" width="36%" headers="d54e768" rowspan="1" colspan="1">
                                       <p class="lines">.align <samp class="ph codeph">align</samp> .b8 <samp class="ph codeph">name</samp>[<samp class="ph codeph">size</samp>]
                                       </p>
                                       
                                       Where <samp class="ph codeph">align</samp> is overall aggregate-or-union 
                                       alignment in bytes (D), 
                                       <samp class="ph codeph">name</samp> is variable name associated with aggregate 
                                       or union, 
                                       and <samp class="ph codeph">size</samp> is the aggregate-or-union size in bytes.
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="32%" headers="d54e762" rowspan="1" colspan="1">Handles (E)</td>
                                    <td class="entry" valign="top" width="32%" headers="d54e765" rowspan="1" colspan="1">64</td>
                                    <td class="entry" valign="top" width="36%" headers="d54e768" rowspan="1" colspan="1">.b64 (assigned from .texref, .sampleref, .surfref)</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <p class="p">NOTES:</p>
                        <p class="p">(A)	Values shorter than 32-bits are sign extended or zero extended, depending on whether they are signed or unsigned types.</p>
                        <p class="p">(B)	Unless the memory type is specified in the function declaration, all pointers passed at the PTX level must use a generic
                           address.
                        </p>
                        <p class="p">(C)	16-bit floating-point types are only used for storage. Therefore, they cannot be used for parameters or return values.</p>
                        <p class="p">(D)	The alignment must be 1, 2, 4, 8, 16, 32, 64, or 128 bytes.</p>
                        <p class="p">(E)	The PTX built-in opaque types such as texture, sampler, and surface types are can be passed into functions as parameters
                           and be returned by them through 64-bit handles. The handle contains the necessary information to access the actual data from
                           the texture or surface memory as well as the attributes of the object stored in its type descriptor. See section <a class="xref" href="index.html#textures-surfaces-samplers" shape="rect">Texture, Sampler, and Surface Types</a> for more information on handles.
                        </p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="system-calls"><a name="system-calls" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#system-calls" name="system-calls" shape="rect">4.&nbsp;System Calls</a></h2>
                  <div class="body conbody">
                     <p class="p">System calls are calls into the driver operating system code. 
                        In PTX they look like regular calls, but the function definition 
                        is not given. A prototype must be provided in the PTX file, 
                        but the implementation of the function is provided by the driver.
                     </p>
                     <p class="p">The prototype for the vprintf system call is:</p><pre xml:space="preserve">
 .extern .func (.param .s32 status) vprintf (.param t1 format, .param t2 valist)
    </pre><div class="p">The following are the definitions for the vprintf parameters 
                        and return value.
                        
                        <ul class="ul">
                           <li class="li">status :  The status value that is returned by vprintf.</li>
                           <li class="li">format :  A pointer to the format specifier input. 
                              For 32-bit addresses, type t1 is .b32. 
                              For 64-bit addresses, type t1 is .b64.
                           </li>
                           <li class="li">valist :  A pointer to the valist input. 
                              For 32-bit addresses, type t2 is .b32. 
                              For 64-bit addresses, type t2 is .b64.
                           </li>
                        </ul>
                     </div>
                     <p class="p">A call to vprintf using 32-bit addresses looks like:</p><pre xml:space="preserve">
    cvta.global.b32    %r2, _fmt;
    st.param.b32  [param0], %r2;
    cvta.local.b32  %r3, _valist_array;
    st.param.b32  [param1], %r3;
    call.uni (_), vprintf, (param0, param1);
    </pre><p class="p">For this code, _fmt is the format string in global memory, 
                        and _valist_array is the valist of arguments. 
                        Note that any pointers must be converted to generic space. 
                        The vprintf syscall is emitted as part of the printf function 
                        defined in "stdio.h".
                     </p>
                     <p class="p">The prototype for the malloc system call is:</p><pre xml:space="preserve">
    .extern .func (.param t1 ptr) malloc (.param t2 size)
    </pre><div class="p">The following are the definitions for the malloc parameters 
                        and return value.
                        
                        <ul class="ul">
                           <li class="li">ptr :  The pointer to the memory that was allocated by malloc. 
                              For 32-bit addresses, type t1 is .b32. 
                              For 64-bit addresses, type t1 is .b64.
                           </li>
                           <li class="li">size :  The size of memory needed from malloc. 
                              This size is defined by the type size_t. 
                              When size_t is 32 bits, type t2 is .b32. 
                              When size_t is 64 bits, type t2 is .b64.
                           </li>
                        </ul>
                     </div>
                     <p class="p">The prototype for the free system call is:</p><pre xml:space="preserve">
     .extern .func free (.param t1 ptr)
     </pre><div class="p">The following is the definition for the free parameter.
                        
                        <ul class="ul">
                           <li class="li">ptr :  The pointer to the memory that should be freed. 
                              For 32-bit addresses, type t1 is .b32. 
                              For 64-bit addresses, type t1 is .b64.
                           </li>
                        </ul>
                     </div>
                     <p class="p">The malloc and free system calls are emitted as part of the 
                        malloc and free functions defined in "malloc.h".
                     </p>
                     <p class="p">In order to support assert, 
                        the PTX function call __assertfail is used whenever the 
                        assert expression produces a false value. 
                        The prototype for the __assertfail system call is:
                     </p><pre xml:space="preserve">
        .extern .func __assertfail (.param t1 message, .param t1 file, .param .b32 line, .param t1 function, .param t2 charSize)
        </pre><div class="p">The following are the definitions for the __assertfail parameters.
                        
                        <ul class="ul">
                           <li class="li">message :  The pointer to the string that should be output. 
                              For 32-bit addresses, type t1 is .b32. 
                              For 64-bit addresses, type t1 is .b64.
                           </li>
                           <li class="li">file :  The pointer to the file name string associated with 
                              the assert. For 32-bit addresses, type t1 is .b32. 
                              For 64-bit addresses, type t1 is .b64.
                           </li>
                           <li class="li">line :  The line number associated with the assert.</li>
                           <li class="li">function :  The pointer to the function name string 
                              associated with the assert. 
                              For 32-bit addresses, type t1 is .b32. 
                              For 64-bit addresses, type t1 is .b64.
                           </li>
                           <li class="li">charSize :  The size in bytes of the characters contained in 
                              the __assertfail parameter strings. 
                              The only supported character size is 1. 
                              The character size is defined by the type size_t. 
                              When size_t is 32 bits, type t2 is .b32. 
                              When size_t is 64 bits, type t2 is .b64.
                           </li>
                        </ul>
                     </div>
                     <p class="p">The __assertfail system call is emitted as part of the 
                        assert macro defined in "assert.h".
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" id="debug-information"><a name="debug-information" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#debug-information" name="debug-information" shape="rect">5.&nbsp;Debug Information</a></h2>
                  <div class="body conbody">
                     <p class="p">Debug information is encoded in DWARF 
                        (Debug With Arbitrary Record Format).
                     </p>
                  </div>
                  <div class="topic concept nested1" id="generation-of-debug"><a name="generation-of-debug" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#generation-of-debug" name="generation-of-debug" shape="rect">5.1.&nbsp;Generation of Debug Information</a></h3>
                     <div class="body conbody">
                        <p class="p">The responsibility for generating debug information is split between 
                           the PTX producer and the PTX-to-SASS backend. 
                           The PTX producer is responsible for emitting binary DWARF into 
                           the PTX file, using the .section and .b8-.b16-.b32-and-.b64 directives 
                           in PTX.  This should contain the .debug_info and .debug_abbrev sections,
                           and possibly optional sections .debug_pubnames and .debug_aranges. 
                           These sections are standard DWARF2 sections that refer to labels 
                           and registers in the PTX.
                        </p>
                        <p class="p">The PTX-to-SASS backend is responsible for generating the 
                           .debug_line section from the .file and .loc directives in the PTX file.
                           This section maps source lines to SASS addresses. 
                           The backend also generates the .debug_frame section.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="cuda-specific-dwarf"><a name="cuda-specific-dwarf" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#cuda-specific-dwarf" name="cuda-specific-dwarf" shape="rect">5.2.&nbsp;CUDA-Specific DWARF Definitions</a></h3>
                     <div class="body conbody">
                        <p class="p">In order to support debugging of multiple memory segments, address 
                           class codes are defined to reflect the memory space of variables. 
                           The address-class values are emitted as the DW_AT_address_class 
                           attribute for all variable and parameter Debugging Information Entries.
                           The address class codes are defined in the below table.
                        </p>
                        <div class="tablenoborder">
                           <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="44.44444444444444%" id="d54e1078" rowspan="1" colspan="1">Code</th>
                                    <th class="entry" valign="top" width="11.11111111111111%" id="d54e1081" rowspan="1" colspan="1">Value</th>
                                    <th class="entry" valign="top" width="44.44444444444444%" id="d54e1084" rowspan="1" colspan="1">Description</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_code_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">1</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Code storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_reg_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">2</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Register storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_sreg_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">3</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Special register storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_const_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Constant storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_global_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">5</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Global storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_local_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">6</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Local storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_param_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">7</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Parameter storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_shared_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">8</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Shared storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_surf_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">9</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Surface storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_tex_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">10</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Texture storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_tex_sampler_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">11</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Texture sampler storage</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1078" rowspan="1" colspan="1">ADDR_generic_space</td>
                                    <td class="entry" valign="top" width="11.11111111111111%" headers="d54e1081" rowspan="1" colspan="1">12</td>
                                    <td class="entry" valign="top" width="44.44444444444444%" headers="d54e1084" rowspan="1" colspan="1">Generic-address storage</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="example"><a name="example" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#example" name="example" shape="rect">6.&nbsp;Example</a></h2>
                  <div class="body conbody">
                     <p class="p">The following is example PTX with debug information 
                        for implementing the following program that makes a call:
                     </p><pre xml:space="preserve">
__device__ __noinline__ int foo (int i, int j)
{
  return i+j;
}

__global__ void test (int *p)
{
  *p = foo(1, 2);
}
    </pre><p class="p">The resulting PTX would be something like:</p><pre xml:space="preserve">
.version 4.2
.target sm_20, debug
.address_size 64

 .file   1 "call_example.cu"

.visible .func  (.param .b32 func_retval0) // return value
_Z3fooii(
        .param .b32 _Z3fooii_param_0, // parameter "i"
        .param .b32 _Z3fooii_param_1) // parameter "j"
{
        .reg .s32       %r&lt;4&gt;;
        .loc 1 1 1      // following instructions are for line 1

func_begin0:
        ld.param.u32    %r1, [_Z3fooii_param_0]; // load 1st param
        ld.param.u32    %r2, [_Z3fooii_param_1]; // load 2nd param
        .loc    1 3 1   // following instructions are for line 3
        add.s32         %r3, %r1, %r2;
        st.param.b32    [func_retval0+0], %r3; // store return value
        ret;
func_end0:
}

.visible .entry _Z4testPi(
        .param .u64 _Z4testPi_param_0) // parameter *p
{
        .reg .s32       %r&lt;4&gt;;
        .reg .s64       %rd&lt;2&gt;;
        .loc 1 6 1

func_begin1:
        ld.param.u64    %rd1, [_Z4testPi_param_0]; // load *p
        mov.u32         %r1, 1;
        mov.u32         %r2, 2;
        .loc    1 8 9
        .param .b32 param0;
        st.param.b32    [param0+0], %r1; // store 1
        .param .b32 param1;
        st.param.b32    [param1+0], %r2; // store 2
        .param .b32 retval0;
        call.uni (retval0), _Z3fooii, ( param0, param1); // call foo
        ld.param.b32    %r3, [retval0+0]; // get return value
        st.u32  [%rd1], %r3;              // *p = return value
        .loc    1 9 2
        ret;
func_end1:
}
    </pre><p class="p"></p><pre xml:space="preserve">
.section .debug_info {
 .b32 262
 .b8 2, 0
 .b32 .debug_abbrev
 .b8 8, 1, 108, 103, 101, 110, 102, 101, 58, 32, 69, 68, 71, 32, 52, 46, 57
 .b8 0, 4, 99, 97, 108, 108, 49, 46, 99, 117, 0
 .b64 0
 .b32 .debug_line // the .debug_line section will be created by ptxas from the .loc
 .b8 47, 104, 111, 109, 101, 47, 109, 109, 117, 114, 112, 104, 121, 47, 116
 .b8 101, 115, 116, 0, 2, 95, 90, 51, 102, 111, 111, 105, 105, 0, 95, 90
 .b8 51, 102, 111, 111, 105, 105, 0
 .b32 1, 1, 164
 .b8 1
 .b64 func_begin0 // start and end location of foo
 .b64 func_end0
 .b8 1, 156, 3, 105, 0
 .b32 1, 1, 164
 .b8 5, 144, 177, 228, 149, 1, 2, 3, 106, 0
 .b32 1, 1, 164
 .b8 5, 144, 178, 228, 149, 1, 2, 0, 4, 105, 110, 116, 0, 5
 .b32 4
 .b8 2, 95, 90, 52, 116, 101, 115, 116, 80, 105, 0, 95, 90, 52, 116, 101
 .b8 115, 116, 80, 105, 0
 .b32 1, 6, 253
 .b8 1
 .b64 func_begin1 // start and end location of test
 .b64 func_end1
 .b8 1, 156, 3, 112, 0
 .b32 1, 6, 259
 .b8 9, 3
 .b64 _Z4testPi_param_0
 .b8 7, 0, 5, 118, 111, 105, 100, 0, 6
 .b32 164
 .b8 12, 0
}
.section .debug_abbrev {
 .b8 1, 17, 1, 37, 8, 19, 11, 3, 8, 17, 1, 16, 6, 27, 8, 0, 0, 2, 46, 1, 135
 .b8 64, 8, 3, 8, 58, 6, 59, 6, 73, 19, 63, 12, 17, 1, 18, 1, 64, 10, 0, 0
 .b8 3, 5, 0, 3, 8, 58, 6, 59, 6, 73, 19, 2, 10, 51, 11, 0, 0, 4, 36, 0, 3
 .b8 8, 62, 11, 11, 6, 0, 0, 5, 59, 0, 3, 8, 0, 0, 6, 15, 0, 73, 19, 51, 11
 .b8 0, 0, 0
}
.section .debug_pubnames {
 .b32 41
 .b8 2, 0
 .b32 .debug_info
 .b32 262, 69
 .b8 95, 90, 51, 102, 111, 111, 105, 105, 0
 .b32 174
 .b8 95, 90, 52, 116, 101, 115, 116, 80, 105, 0
 .b32 0
}
</pre></div>
               </div>
               <div class="topic concept nested0" id="cxx"><a name="cxx" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#cxx" name="cxx" shape="rect">7.&nbsp;C++</a></h2>
                  <div class="body conbody">
                     <p class="p">The C++ implementation for device functions follows the Itanium C++ ABI.
                        However, not everything in C++ is supported.  
                        In particular, the following are not supported in device code.
                     </p>
                     <ul class="ul">
                        <li class="li">Exceptions and try/catch blocks</li>
                        <li class="li">RTTI</li>
                        <li class="li">STL library</li>
                        <li class="li">Global constructors and destructors</li>
                        <li class="li">Virtual functions and classes across host and device 
                           (i.e., vtables cannot be used across host and device)
                        </li>
                     </ul>
                     <p class="p">There are also a few C features that are not currently supported:</p>
                     <ul class="ul">
                        <li class="li">stdio other than printf</li>
                     </ul>
                  </div>
               </div>
               <div class="topic concept nested0" id="notices-header"><a name="notices-header" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#notices-header" name="notices-header" shape="rect">Notices</a></h2>
                  <div class="topic reference nested1" id="notice"><a name="notice" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#notice" name="notice" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Notice</h3>
                           <p class="p">ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND
                              SEPARATELY, "MATERIALS") ARE BEING PROVIDED "AS IS." NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE
                              WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS
                              FOR A PARTICULAR PURPOSE. 
                           </p>
                           <p class="p">Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the
                              consequences of use of such information or for any infringement of patents or other rights of third parties that may result
                              from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications
                              mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information
                              previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems
                              without express written approval of NVIDIA Corporation.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="trademarks"><a name="trademarks" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#trademarks" name="trademarks" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Trademarks</h3>
                           <p class="p">NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation
                              in the U.S. and other countries.  Other company and product names may be trademarks of
                              the respective companies with which they are associated.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="copyright-past-to-present"><a name="copyright-past-to-present" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#copyright-past-to-present" name="copyright-past-to-present" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Copyright</h3>
                           <p class="p">© <span class="ph">2007</span>-<span class="ph">2019</span> NVIDIA
                              Corporation. All rights reserved.
                           </p>
                           <p class="p">This product includes software developed by the Syncro Soft SRL (http://www.sync.ro/).</p>
                        </div>
                     </div>
                  </div>
               </div>
               
               <hr id="contents-end"></hr>
               
            </article>
         </div>
      </div>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/formatting/common.min.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-write.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-tracker.js"></script>
      <script type="text/javascript">var switchTo5x=true;</script><script type="text/javascript" src="http://w.sharethis.com/button/buttons.js"></script><script type="text/javascript">stLight.options({publisher: "998dc202-a267-4d8e-bce9-14debadb8d92", doNotHash: false, doNotCopy: false, hashAddressBar: false});</script><script type="text/javascript">_satellite.pageBottom();</script></body>
</html>