Sophie

Sophie

distrib > Mageia > 7 > x86_64 > media > nonfree-updates > by-pkgid > b86a85131cc739c1c53d0b55840a4328 > files > 3630

nvidia-cuda-toolkit-devel-10.1.168-1.2.mga7.nonfree.x86_64.rpm

<!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-us" xml:lang="en-us">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
      <meta http-equiv="X-UA-Compatible" content="IE=edge"></meta>
      <meta name="copyright" content="(C) Copyright 2005"></meta>
      <meta name="DC.rights.owner" content="(C) Copyright 2005"></meta>
      <meta name="DC.Type" content="concept"></meta>
      <meta name="DC.Title" content="NVVM IR Specification 1.5"></meta>
      <meta name="abstract" content="Reference guide to the NVVM compiler IR (internal representation) based on the LVVM IR."></meta>
      <meta name="description" content="Reference guide to the NVVM compiler IR (internal representation) based on the LVVM IR."></meta>
      <meta name="DC.Coverage" content="Compiler SDK"></meta>
      <meta name="DC.subject" content="NVVM, NVVM IR, NVVM compiler, NVVM compiler IR, NVVM IR specification, NVVM specification, NVVM LLVM IR, NVVM CUDA, NVVM CUDA C compiler, NVVM GPU"></meta>
      <meta name="keywords" content="NVVM, NVVM IR, NVVM compiler, NVVM compiler IR, NVVM IR specification, NVVM specification, NVVM LLVM IR, NVVM CUDA, NVVM CUDA C compiler, NVVM GPU"></meta>
      <meta name="DC.Format" content="XHTML"></meta>
      <meta name="DC.Identifier" content="abstract"></meta>
      <link rel="stylesheet" type="text/css" href="../common/formatting/commonltr.css"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/site.css"></link>
      <title>NVVM IR :: CUDA Toolkit Documentation</title>
      <!--[if lt IE 9]>
      <script src="../common/formatting/html5shiv-printshiv.min.js"></script>
      <![endif]-->
      <script type="text/javascript" charset="utf-8" src="//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.ba-hashchange.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.scrollintoview.min.js"></script>
      <script type="text/javascript" src="../search/htmlFileList.js"></script>
      <script type="text/javascript" src="../search/htmlFileInfoList.js"></script>
      <script type="text/javascript" src="../search/nwSearchFnt.min.js"></script>
      <script type="text/javascript" src="../search/stemmers/en_stemmer.min.js"></script>
      <script type="text/javascript" src="../search/index-1.js"></script>
      <script type="text/javascript" src="../search/index-2.js"></script>
      <script type="text/javascript" src="../search/index-3.js"></script>
      <link rel="canonical" href="http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/qwcode.highlight.css"></link>
   </head>
   <body>
      
      <header id="header"><span id="company">NVIDIA</span><span id="site-title">CUDA Toolkit Documentation</span><form id="search" method="get" action="search">
            <input type="text" name="search-text"></input><fieldset id="search-location">
               <legend>Search In:</legend>
               <label><input type="radio" name="search-type" value="site"></input>Entire Site</label>
               <label><input type="radio" name="search-type" value="document"></input>Just This Document</label></fieldset>
            <button type="reset">clear search</button>
            <button id="submit" type="submit">search</button></form>
      </header>
      <div id="site-content">
         <nav id="site-nav">
            <div class="category closed"><a href="../index.html" title="The root of the site.">CUDA Toolkit 
                  
                  
                  v10.1.168</a></div>
            <div class="category"><a href="index.html" title="NVVM IR">NVVM IR</a></div>
            <ul>
               <li>
                  <div class="section-link"><a href="#introduction">1.&nbsp;Introduction</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#identifiers">2.&nbsp;Identifiers</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#high-level-structure">3.&nbsp;High Level Structure</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#linkage-types">3.1.&nbsp;Linkage Types</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#calling-conventions">3.2.&nbsp;Calling Conventions</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#rules-and-restrictions">3.2.1.&nbsp;Rules and Restrictions</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#visibility-styles">3.3.&nbsp;Visibility Styles</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#dll-storage-classes">3.4.&nbsp;DLL Storage Classes</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#threadlocal-storage-models">3.5.&nbsp;Thread Local Storage Models</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#comdats">3.6.&nbsp;Comdats</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#named-types">3.7.&nbsp;Named Types</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#source-filename">3.8.&nbsp;source_filename</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#global-variables">3.9.&nbsp;Global Variables</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#functions">3.10.&nbsp;Functions</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#aliases">3.11.&nbsp;Aliases</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#ifuncs">3.12.&nbsp;Ifuncs</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#named-metadata">3.13.&nbsp;Named Metadata</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#parameter-attributes">3.14.&nbsp;Parameter Attributes</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#garbage-collector-names">3.15.&nbsp;Garbage Collector Names</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#prefix-data">3.16.&nbsp;Prefix Data</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#attribute-groups">3.17.&nbsp;Attribute Groups</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#function-attributes">3.18.&nbsp;Function Attributes</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#global-attributes">3.19.&nbsp;Global Attributes</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#unique_1842295346">3.20.&nbsp;Operand Bundles</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#module-level-inline-assembly">3.21.&nbsp;Module-Level Inline Assembly</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#data-layout">3.22.&nbsp;Data Layout</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#target-triple">3.23.&nbsp;Target Triple</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#pointer-aliasing-rules">3.24.&nbsp;Pointer Aliasing Rules</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#volatile-memory-access">3.25.&nbsp;Volatile Memory Access</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#memory-model-for-concurrent-operations">3.26.&nbsp;Memory Model for Concurrent Operations</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#atomic-memory-ordering-constraints">3.27.&nbsp;Atomic Memory Ordering Constraints</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#unique_1546694063">3.28.&nbsp;Fast Math Flags</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#uselist-order-directives">3.29.&nbsp;Use-list Order Directives</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#type-system">4.&nbsp;Type System</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#constants">5.&nbsp;Constants</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#other-values">6.&nbsp;Other Values</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#inline-assembler-expressions">6.1.&nbsp;Inline Assembler Expressions</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#metadata-nodes-and-metadata-strings">6.2.&nbsp;Metadata Nodes and Metadata Strings</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#intrinsic-global-variables">7.&nbsp;Intrinsic Global Variables</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#instructions">8.&nbsp;Instructions</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#terminator-instructions">8.1.&nbsp;Terminator Instructions</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#binary-operations">8.2.&nbsp;Binary Operations</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#bitwise-binary-operations">8.3.&nbsp;Bitwise Binary Operations</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#vector-operations">8.4.&nbsp;Vector Operations</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#aggregate-operations">8.5.&nbsp;Aggregate Operations</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#memory-access-and-addressing-operations">8.6.&nbsp;Memory Access and Addressing Operations</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#alloca-instruction">8.6.1.&nbsp;alloca Instruction</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#load-instruction">8.6.2.&nbsp;load Instruction</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#store-instruction">8.6.3.&nbsp;store Instruction</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#fence-instruction">8.6.4.&nbsp;fence Instruction</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cmpxchg-instruction">8.6.5.&nbsp;cmpxchg Instruction</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#atomicrmw-instruction">8.6.6.&nbsp;atomicrmw Instruction</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#unique_894469802">8.6.7.&nbsp;getelementptr Instruction</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#conversion-operations">8.7.&nbsp;Conversion Operations</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#other-operations">8.8.&nbsp;Other Operations</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#intrinsic-functions">9.&nbsp;Intrinsic Functions</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#variable-argument-handling-intrinsics">9.1.&nbsp;Variable Argument Handling Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#accurate-garbage-collection-intrinics">9.2.&nbsp;Accurate Garbage Collection Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#code-generator-intrinics">9.3.&nbsp;Code Generator Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#standard-c-library-intrinics">9.4.&nbsp;Standard C Library Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#bit-manipulations-intrinics">9.5.&nbsp;Bit Manipulations Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#unique_1886512331">9.6.&nbsp;Specialised Arithmetic Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#half-precision-floating-point-intrinsics">9.7.&nbsp;Half Precision Floating Point Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#debugger-intrinsics">9.8.&nbsp;Debugger Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#exception-handling-intrinsics">9.9.&nbsp;Exception Handling Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#trampoline-intrinsics">9.10.&nbsp;Trampoline Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#unique_1741578225">9.11.&nbsp;Masked Vector Load and Store Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#vector-reduce-intrinsics">9.12.&nbsp;Vector Reduction Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#constrained-float-intrinsics">9.13.&nbsp;Constrained Floating Point Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#constrained-libm-intrinsics">9.14.&nbsp;Constrained libm-equivalent Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#unique_420250914">9.15.&nbsp;Masked Vector Gather and Scatter Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#memory-use-markers">9.16.&nbsp;Memory Use Markers</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#general-intrinsics">9.17.&nbsp;General Intrinsics</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#elementwise-atomic-memory-intrinics">9.18.&nbsp;Element Wise Atomic Memory Intrinsics</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#address-space">10.&nbsp;Address Space</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#address-spaces">10.1.&nbsp;Address Spaces</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#generic-pointers-and-non-generic-pointers">10.2.&nbsp;Generic Pointers and Non-Generic Pointers</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#generic-pointers-vs-non-generic-pointers">10.2.1.&nbsp;Generic Pointers vs. Non-generic Pointers</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#conversion">10.2.2.&nbsp;Conversion</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#no-aliasing-between-two-different-specific-address-spaces">10.2.3.&nbsp;No Aliasing between Two Different Specific Address Spaces</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#the-alloca-instruction">10.3.&nbsp;The alloca Instruction</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#global-property-annotation-chapter-11">11.&nbsp;Global Property Annotation</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#overview">11.1.&nbsp;Overview</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#representation-of-properties">11.2.&nbsp;Representation of Properties</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#supported-properties">11.3.&nbsp;Supported Properties</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#texture-and-surface">12.&nbsp;Texture and Surface</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#texture-variable-and-surface-variable">12.1.&nbsp;Texture Variable and Surface Variable</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#accessing-texture-memory-or-surface-memory">12.2.&nbsp;Accessing Texture Memory or Surface Memory</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#nvvm-specific-intrinsic-functions">13.&nbsp;NVVM Specific Intrinsic Functions</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#nvvm-intrin-atomic">13.1.&nbsp;Atomic</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvvm-intrin-barrier">13.2.&nbsp;Barrier and Memory Fence</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvvm-intrin-addrsp">13.3.&nbsp;Address space conversion</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvvm-intrin-spreg">13.4.&nbsp;Special Registers</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvvm-intrin-texture-surface">13.5.&nbsp;Texture/Surface Access</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#nvvm-intrin-texture">13.5.1.&nbsp;Texture Reads</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvvm-intrin-surface-load">13.5.2.&nbsp;Surface Loads</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#unique_1950908514">13.5.3.&nbsp;Surface Stores</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvvm-intrin-warp-level">13.6.&nbsp;Warp-level Operations</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#nvvm-intrin-warp-level-sync">13.6.1.&nbsp;Barrier Synchronization</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvvm-intrin-warp-level-datamove">13.6.2.&nbsp;Data Movement</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvvm-intrin-warp-level-vote">13.6.3.&nbsp;Vote</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#unique_389030136">13.6.4.&nbsp;Match</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#nvvm-intrin-warp-level-matrix">13.6.5.&nbsp;Matrix Operation</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#nvvm-intrin-warp-level-matrix-ld">13.6.5.1.&nbsp;Load Fragments</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#nvvm-intrin-warp-level-matrix-st">13.6.5.2.&nbsp;Store Fragments</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#nvvm-intrin-warp-level-matrix-mma">13.6.5.3.&nbsp;Matrix Multiply-and-Accumulate</a></div>
                                 </li>
                              </ul>
                           </li>
                        </ul>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#source-level-debugging-support">14.&nbsp;Source Level Debugging Support</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#nvvm-abi-for-ptx">15.&nbsp;NVVM ABI for PTX</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#linkage-types-nvvm">15.1.&nbsp;Linkage Types</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#argument-passing-and-return">15.2.&nbsp;Parameter Passing and Return</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#revision-history">A.&nbsp;Revision History</a></div>
               </li>
            </ul>
         </nav>
         <div id="resize-nav"></div>
         <nav id="search-results">
            <h2>Search Results</h2>
            <ol></ol>
         </nav>
         
         <div id="contents-container">
            <div id="breadcrumbs-container">
               <div id="release-info">NVVM IR
                  (<a href="../../pdf/NVVM_IR_Specification.pdf">PDF</a>)
                  -
                   
                  
                  
                  v10.1.168
                  (<a href="https://developer.nvidia.com/cuda-toolkit-archive">older</a>)
                  -
                  Last updated April 24, 2019
                  -
                  <a href="mailto:CUDAIssues@nvidia.com?subject=CUDA Toolkit Documentation Feedback: NVVM IR">Send Feedback</a></div>
            </div>
            <article id="contents">
               <div class="topic nested0" id="abstract"><a name="abstract" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#abstract" name="abstract" shape="rect">NVVM IR Specification 1.5</a></h2>
                  <div class="body conbody">
                     <p class="shortdesc">Reference guide to the NVVM compiler IR (internal representation)
                        based on the LVVM IR.
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" id="introduction"><a name="introduction" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#introduction" name="introduction" shape="rect">1.&nbsp;Introduction</a></h2>
                  <div class="body conbody">
                     <p class="p">NVVM IR is a compiler IR (internal representation) based on the LLVM IR. The NVVM IR is
                        designed to represent GPU compute kernels (for example, CUDA kernels). High-level
                        language front-ends, like the CUDA C compiler front-end, can generate NVVM IR. The NVVM
                        compiler (which is based on LLVM) generates PTX code from NVVM IR.
                     </p>
                     <p class="p">NVVM IR and NVVM compilers are mostly agnostic about the source language being used. The
                        PTX codegen part of a NVVM compiler needs to know the source language because of the
                        difference in DCI (driver/compiler interface). 
                     </p>
                     <p class="p"> NVVM IR is a binary format and is based on a subset of LLVM IR
                        bitcode format. This document uses only human-readable form to
                        describe NVVM IR.
                        
                     </p>
                     <p class="p">Technically speaking, NVVM IR is LLVM IR with a set of rules, restrictions, and
                        conventions, plus a set of supported intrinsic functions. A program specified in NVVM IR
                        is always a legal LLVM program. A legal LLVM program may not be a legal NVVM program. 
                     </p>
                     <p class="p">There are three levels of support for NVVM IR.</p>
                     <ul class="ul">
                        <li class="li">Supported: The feature is fully supported. Most IR features should fall into this
                           category.
                        </li>
                        <li class="li">Accepted and ignored: The NVVM compiler will accept this IR feature, but will ignore
                           the required semantics. This applies to some IR features that do not have meaningful
                           semantics on GPUs and that can be ignored. Calling convention markings are an
                           example.
                        </li>
                        <li class="li">Illegal, not supported: The specified semantics is not supported, such as a
                           <samp class="ph codeph">va_arg</samp> function. Future versions of NVVM may either support or
                           accept and ignore IRs that are illegal in the current version.
                        </li>
                     </ul>
                     <p class="p">This document describes version 1.5 of the NVVM IR and
                        version 2.0 of the NVVM debug metadata (see <a class="xref" href="index.html#source-level-debugging-support" shape="rect">Source Level Debugging Support</a>).
                        
                     </p>
                     <p class="p">The current NVVM IR is based on LLVM 5.0. For the complete semantics of the IR, readers
                        of this document should check the official LLVM Language Reference Manual (<a class="xref" href="http://llvm.org/releases/5.0.0/docs/LangRef.html" target="_blank" shape="rect">http://llvm.org/releases/5.0.0/docs/LangRef.html</a>). The NVVM IR version 1.2 and the NVVM debug metadata
                        version 2.0 are based on LLVM 3.4.
                        The Language Reference Manual for LLVM 3.4 is at <a class="xref" href="http://llvm.org/releases/3.4/docs/LangRef.html" target="_blank" shape="rect">http://llvm.org/releases/3.4/docs/LangRef.html</a>.
                        
                     </p>
                     <p class="p">Support for the following compute architectures are provided with this release of the
                        NVVM IR specification:
                     </p>
                     <ul class="ul">
                        <li class="li"><samp class="ph codeph">compute_30</samp></li>
                        <li class="li"><samp class="ph codeph">compute_32</samp></li>
                        <li class="li"><samp class="ph codeph">compute_35</samp></li>
                        <li class="li"><samp class="ph codeph">compute_37</samp></li>
                        <li class="li"><samp class="ph codeph">compute_50</samp></li>
                        <li class="li"><samp class="ph codeph">compute_52</samp></li>
                        <li class="li"><samp class="ph codeph">compute_53</samp></li>
                        <li class="li"><samp class="ph codeph">compute_60</samp></li>
                        <li class="li"><samp class="ph codeph">compute_61</samp></li>
                        <li class="li"><samp class="ph codeph">compute_62</samp></li>
                        <li class="li"><samp class="ph codeph">compute_70</samp></li>
                     </ul>
                  </div>
               </div>
               <div class="topic concept nested0" id="identifiers"><a name="identifiers" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#identifiers" name="identifiers" shape="rect">2.&nbsp;Identifiers</a></h2>
                  <div class="body conbody">
                     <p class="p">The name of a named global identifier must have the form:</p>
                     <p class="p"><samp class="ph codeph">@[a-zA-Z$_][a-zA-Z$_0-9]*</samp></p>
                     <p class="p">Note that it cannot contain the . character.</p>
                     <p class="p"><samp class="ph codeph">[@%]llvm.nvvm.*</samp> and <samp class="ph codeph">[@%]nvvm.*</samp> are reserved words.
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" id="high-level-structure"><a name="high-level-structure" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#high-level-structure" name="high-level-structure" shape="rect">3.&nbsp;High Level Structure</a></h2>
                  <div class="topic concept nested1" id="linkage-types"><a name="linkage-types" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#linkage-types" name="linkage-types" shape="rect">3.1.&nbsp;Linkage Types</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported:</p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">private</samp></li>
                           <li class="li"><samp class="ph codeph">internal</samp></li>
                           <li class="li"><samp class="ph codeph">available_externally</samp></li>
                           <li class="li"><samp class="ph codeph">linkonce</samp></li>
                           <li class="li"><samp class="ph codeph">weak</samp></li>
                           <li class="li"><samp class="ph codeph">common</samp></li>
                           <li class="li"><samp class="ph codeph">linkonce_odr</samp></li>
                           <li class="li"><samp class="ph codeph">weak_odr</samp></li>
                           <li class="li"><samp class="ph codeph">external</samp></li>
                        </ul>
                        <p class="p">Not supported:</p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">linker_private</samp></li>
                           <li class="li"><samp class="ph codeph">linker_private_weak</samp></li>
                           <li class="li"><samp class="ph codeph">appending</samp></li>
                           <li class="li"><samp class="ph codeph">extern_weak</samp></li>
                           <li class="li"><samp class="ph codeph">dllimport</samp></li>
                           <li class="li"><samp class="ph codeph">dllexport</samp></li>
                        </ul>
                        <p class="p">See <a class="xref" href="index.html#nvvm-abi-for-ptx" shape="rect">NVVM ABI for PTX</a> for details on how linkage types are
                           translated to PTX.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="calling-conventions"><a name="calling-conventions" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#calling-conventions" name="calling-conventions" shape="rect">3.2.&nbsp;Calling Conventions</a></h3>
                     <div class="body conbody">
                        <p class="p">All LLVM calling convention markings are accepted and ignored. Functions and calls are
                           generated according to the PTX calling convention.
                        </p>
                     </div>
                     <div class="topic concept nested2" id="rules-and-restrictions"><a name="rules-and-restrictions" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#rules-and-restrictions" name="rules-and-restrictions" shape="rect">3.2.1.&nbsp;Rules and Restrictions</a></h3>
                        <div class="body conbody"><a name="rules-and-restrictions__ol_x5g_nr1_n3" shape="rect">
                              <!-- --></a><ol class="ol" id="rules-and-restrictions__ol_x5g_nr1_n3">
                              <li class="li"><samp class="ph codeph">va_arg</samp> is not supported.
                              </li>
                              <li class="li">When an argument with width less than 32-bit is passed, the <samp class="ph codeph">zeroext/signext</samp>
                                 parameter attribute should be set. <samp class="ph codeph">zeroext</samp> will be assumed if not set.
                              </li>
                              <li class="li">When a value with width less than 32-bit is returned, the
                                 <samp class="ph codeph">zeroext/signext</samp> parameter attribute should be set.
                                 <samp class="ph codeph">zeroext</samp> will be assumed if not set.
                              </li>
                              <li class="li">Arguments of aggregate or vector types that are passed by value can be passed by
                                 pointer with the <samp class="ph codeph">byval</samp> attribute set (referred to as the
                                 <samp class="ph codeph">by-pointer-byval</samp> case below). The align attribute must be
                                 set if the type requires a non-natural alignment (natural alignment is the alignment
                                 inferred for the aggregate type according to the <a class="xref" href="index.html#data-layout" shape="rect">Data Layout</a>
                                 section).
                              </li>
                              <li class="li">If a function has an argument of aggregate or vector type that is passed by value
                                 directly and the type has a non-natural alignment requirement, the alignment must be
                                 annotated by the global property annotation &lt;<samp class="ph codeph">align</samp>,
                                 alignment&gt;, where alignment is a 32-bit integer whose upper 16 bits
                                 represent the argument position (starting from 1) and the lower 16 bits represent
                                 the alignment.
                              </li>
                              <li class="li">If the return type of a function is an aggregate or a vector that has a non-natural
                                 alignment, then the alignment requirement must be annotated by the global property
                                 annotation &lt;<samp class="ph codeph">align</samp>, alignment&gt;, where the upper 16 bits is
                                 0, and the lower 16 bits represent the alignment. 
                              </li>
                              <li class="li">It is not required to annotate a function with &lt;<samp class="ph codeph">align</samp>,
                                 alignment&gt; otherwise. If annotated, the alignment must match the
                                 natural alignment or the align attribute in the <samp class="ph codeph">by-pointer-byval</samp>
                                 case.
                              </li>
                              <li class="li" id="rules-and-restrictions__item-8-calling-conventions"><a name="rules-and-restrictions__item-8-calling-conventions" shape="rect">
                                    <!-- --></a>For an indirect call instruction of a function that
                                 has a non-natural alignment for its return value or one of its arguments that is not
                                 expressed in alignment in the <samp class="ph codeph">by-pointer-byval</samp> case, the call
                                 instruction must have an attached metadata of kind <samp class="ph codeph">callalign</samp>. The
                                 metadata contains a sequence of <samp class="ph codeph">i32</samp> fields each of which represents
                                 a non-natural alignment requirement. The upper 16 bits of an <samp class="ph codeph">i32</samp>
                                 field represent the argument position (0 for return value, 1 for the first argument,
                                 and so on) and the lower 16 bits represent the alignment. The <samp class="ph codeph">i32</samp>
                                 fields must be sorted in the increasing order. 
                                 <p class="p">For
                                    example,
                                 </p><pre xml:space="preserve">
%call = call %struct.S %fp1(%struct.S* byval align 8 %arg1p, %struct.S %arg2),!callalign !10
!10 = metadata !{i32 8, i32 520};
</pre><p class="p"></p>
                              </li>
                              <li class="li">It is not required to have an <samp class="ph codeph">i32</samp> metadata field for the other
                                 arguments or the return value otherwise. If presented, the alignment must match the
                                 natural alignment or the align attribute in the <samp class="ph codeph">by-pointer-byval
                                    case</samp>.
                              </li>
                              <li class="li">It is not required to have a <samp class="ph codeph">callalign</samp> metadata attached to a
                                 direct call instruction. If attached, the alignment must match the natural alignment
                                 or the alignment in the <samp class="ph codeph">by-pointer-byval</samp> case.
                              </li>
                              <li class="li">The absence of the metadata in an indirect call instruction means using natural
                                 alignment or the align attribute in the <samp class="ph codeph">by-pointer-byval</samp> case.
                              </li>
                           </ol>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="visibility-styles"><a name="visibility-styles" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#visibility-styles" name="visibility-styles" shape="rect">3.3.&nbsp;Visibility Styles</a></h3>
                     <div class="body conbody">
                        <p class="p">All styles—default, hidden, and protected—are accepted and ignored.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="dll-storage-classes"><a name="dll-storage-classes" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#dll-storage-classes" name="dll-storage-classes" shape="rect">3.4.&nbsp;DLL Storage Classes</a></h3>
                     <div class="body conbody">
                        <p class="p">DLL storage classes are not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="threadlocal-storage-models"><a name="threadlocal-storage-models" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#threadlocal-storage-models" name="threadlocal-storage-models" shape="rect">3.5.&nbsp;Thread Local Storage Models</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="comdats"><a name="comdats" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#comdats" name="comdats" shape="rect">3.6.&nbsp;Comdats</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="named-types"><a name="named-types" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#named-types" name="named-types" shape="rect">3.7.&nbsp;Named Types</a></h3>
                     <div class="body conbody">
                        <p class="p">Fully supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="source-filename"><a name="source-filename" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#source-filename" name="source-filename" shape="rect">3.8.&nbsp;source_filename</a></h3>
                     <div class="body conbody">
                        <p class="p">Accepted and ignored.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="global-variables"><a name="global-variables" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#global-variables" name="global-variables" shape="rect">3.9.&nbsp;Global Variables</a></h3>
                     <div class="body conbody">
                        <p class="p">A global variable, that is not an intrinsic global variable, may be optionally declared to reside
                           in one of the following address spaces: 
                        </p><a name="global-variables__ul_p4q_4cb_n3" shape="rect">
                           <!-- --></a><ul class="ul" id="global-variables__ul_p4q_4cb_n3">
                           <li class="li"><samp class="ph codeph">global</samp></li>
                           <li class="li"><samp class="ph codeph">shared</samp></li>
                           <li class="li"><samp class="ph codeph">constant</samp></li>
                        </ul>
                        <p class="p">If no address space is explicitly specified, the global variable is assumed to reside in the 
                           <samp class="ph codeph">global</samp> address space with a generic address value. 
                           See <a class="xref" href="index.html#address-space" shape="rect">Address Space</a> for details.
                        </p>
                        <p class="p"><samp class="ph codeph">thread_local</samp> variables are not supported.
                        </p>
                        <p class="p">No explicit section (except for the metadata section) is allowed. </p>
                        <p class="p">Initializations of <samp class="ph codeph">shared</samp> variables are ignored. 
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="functions"><a name="functions" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#functions" name="functions" shape="rect">3.10.&nbsp;Functions</a></h3>
                     <div class="body conbody">
                        <p class="p">The following are not supported on functions:</p><a name="functions__ul_gtp_gdb_n3" shape="rect">
                           <!-- --></a><ul class="ul" id="functions__ul_gtp_gdb_n3">
                           <li class="li">Alignment</li>
                           <li class="li">Explicit section</li>
                           <li class="li">Garbage collector name</li>
                           <li class="li">Prefix data</li>
                           <li class="li">Prologue</li>
                           <li class="li">Personality</li>
                           <li class="li">Optional list of attached metadata</li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="aliases"><a name="aliases" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#aliases" name="aliases" shape="rect">3.11.&nbsp;Aliases</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported only as aliases of non-kernel functions.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="ifuncs"><a name="ifuncs" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#ifuncs" name="ifuncs" shape="rect">3.12.&nbsp;Ifuncs</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="named-metadata"><a name="named-metadata" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#named-metadata" name="named-metadata" shape="rect">3.13.&nbsp;Named Metadata</a></h3>
                     <div class="body conbody">
                        <p class="p">Accepted and ignored, except for the following:</p><a name="named-metadata__ul_wzy_5db_n3" shape="rect">
                           <!-- --></a><ul class="ul" id="named-metadata__ul_wzy_5db_n3">
                           <li class="li"><samp class="ph codeph">!nvvm.annotations</samp>: see <a class="xref" href="index.html#global-property-annotation-chapter-11" shape="rect">Global Property Annotation</a></li>
                           <li class="li"><samp class="ph codeph">!nvvmir.version</samp></li>
                           <li class="li"><samp class="ph codeph">!llvm.dbg.cu</samp></li>
                           <li class="li"><samp class="ph codeph">!llvm.module.flags</samp></li>
                        </ul>
                        <div class="p">The NVVM IR version is specified using a named metadata
                           called <samp class="ph codeph">!nvvmir.version</samp>. The
                           <samp class="ph codeph">!nvvmir.version</samp> named metadata may have
                           one metadata node that contains the NVVM IR version for that
                           module. If multiple such modules are linked together,
                           the named metadata in the linked module may have more than
                           one metadata node with each node containing a version.
                           A metadata node with NVVM IR version takes either of
                           the following forms:
                           
                           <ul class="ul">
                              <li class="li"> It may consist of two i32 values—the first denotes the
                                 NVVM IR major version number and the second denotes the
                                 minor version number. If absent, the version number is
                                 assumed to be 1.0, which can be specified as:
                                 <pre xml:space="preserve">!nvvmir.version = !{!0}
!0 = metadata !{ i32 1, i32 0}</pre></li>
                              <li class="li"> It may consist of four i32 values—the first two denote the
                                 NVVM IR major and minor versions respectively. The third value
                                 denotes the NVVM IR debug metadata major version number, and
                                 the fourth value denotes the corresponding minor version
                                 number. If absent, the version number is assumed to be 1.0,
                                 which can be specified as:
                                 <pre xml:space="preserve">!nvvmir.version = !{!0}
!0 = metadata !{ i32 1, i32 0, i32 1, i32 0}</pre></li>
                           </ul>
                           
                           The version of NVVM IR described in this document is 1.5. The version of NVVM IR debug metadata described in this document
                           is 2.0.
                           
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="parameter-attributes"><a name="parameter-attributes" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#parameter-attributes" name="parameter-attributes" shape="rect">3.14.&nbsp;Parameter Attributes</a></h3>
                     <div class="body conbody">
                        <p class="p"> Fully supported, except the following:</p>
                        <p class="p"> Accepted and ignored:</p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">inreg</samp></li>
                           <li class="li"><samp class="ph codeph">nest</samp></li>
                           <li class="li"><samp class="ph codeph">nonnull</samp></li>
                           <li class="li"><samp class="ph codeph">dereferenceable(&lt;n&gt;)</samp></li>
                           <li class="li"><samp class="ph codeph">dereferenceable_or_null(&lt;n&gt;)</samp></li>
                        </ul>
                        <p class="p"> Not supported:</p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">inalloca</samp></li>
                           <li class="li"><samp class="ph codeph">swiftself</samp></li>
                           <li class="li"><samp class="ph codeph">swifterror</samp></li>
                           <li class="li"><samp class="ph codeph">align &lt;n&gt;</samp></li>
                        </ul>
                        <p class="p">See <a class="xref" href="index.html#calling-conventions" shape="rect">Calling Conventions</a> for the use of the attributes.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="garbage-collector-names"><a name="garbage-collector-names" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#garbage-collector-names" name="garbage-collector-names" shape="rect">3.15.&nbsp;Garbage Collector Names</a></h3>
                     <div class="body conbody">
                        <p class="p"> Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="prefix-data"><a name="prefix-data" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#prefix-data" name="prefix-data" shape="rect">3.16.&nbsp;Prefix Data</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="attribute-groups"><a name="attribute-groups" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#attribute-groups" name="attribute-groups" shape="rect">3.17.&nbsp;Attribute Groups</a></h3>
                     <div class="body conbody">
                        <p class="p">Fully supported.  The set of supported attributes is equal to the set of attributes
                           accepted where the attribute group is used.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="function-attributes"><a name="function-attributes" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#function-attributes" name="function-attributes" shape="rect">3.18.&nbsp;Function Attributes</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported:</p><a name="function-attributes__ul_ed5_hgb_n3" shape="rect">
                           <!-- --></a><ul class="ul" id="function-attributes__ul_ed5_hgb_n3">
                           <li class="li"><samp class="ph codeph">alwaysinline</samp></li>
                           <li class="li"><samp class="ph codeph">cold</samp></li>
                           <li class="li"><samp class="ph codeph">inlinehint</samp></li>
                           <li class="li"><samp class="ph codeph">minsize</samp></li>
                           <li class="li"><samp class="ph codeph">noduplicate</samp></li>
                           <li class="li"><samp class="ph codeph">noinline</samp></li>
                           <li class="li"><samp class="ph codeph">noreturn</samp></li>
                           <li class="li"><samp class="ph codeph">nounwind</samp></li>
                           <li class="li"><samp class="ph codeph">optnone</samp></li>
                           <li class="li"><samp class="ph codeph">optsize</samp></li>
                           <li class="li"><samp class="ph codeph">readnone</samp></li>
                           <li class="li"><samp class="ph codeph">readonly</samp></li>
                        </ul>
                        <p class="p"> Accepted and ignored:</p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">allocsize</samp></li>
                           <li class="li"><samp class="ph codeph">argmemonly</samp></li>
                           <li class="li"><samp class="ph codeph">inaccessiblememonly</samp></li>
                           <li class="li"><samp class="ph codeph">inaccessiblemem_or_argmemonly</samp></li>
                           <li class="li"><samp class="ph codeph">norecurse</samp></li>
                           <li class="li"><samp class="ph codeph">speculatable</samp></li>
                           <li class="li"><samp class="ph codeph">writeonly</samp></li>
                        </ul>
                        <p class="p">Not Supported:</p><a name="function-attributes__ul_fvq_sgb_n3" shape="rect">
                           <!-- --></a><ul class="ul" id="function-attributes__ul_fvq_sgb_n3">
                           <li class="li"><samp class="ph codeph">alignstack</samp></li>
                           <li class="li"><samp class="ph codeph">builtin</samp></li>
                           <li class="li"><samp class="ph codeph">nonlazybind</samp></li>
                           <li class="li"><samp class="ph codeph">naked</samp></li>
                           <li class="li"><samp class="ph codeph">nobuiltin</samp></li>
                           <li class="li"><samp class="ph codeph">noimplicitfloat</samp></li>
                           <li class="li"><samp class="ph codeph">noredzone</samp></li>
                           <li class="li"><samp class="ph codeph">probe-stack</samp></li>
                           <li class="li"><samp class="ph codeph">returns_twice</samp></li>
                           <li class="li"><samp class="ph codeph">sanitize_address</samp></li>
                           <li class="li"><samp class="ph codeph">sanitize_memory</samp></li>
                           <li class="li"><samp class="ph codeph">sanitize_thread</samp></li>
                           <li class="li"><samp class="ph codeph">ssp</samp></li>
                           <li class="li"><samp class="ph codeph">sspreq</samp></li>
                           <li class="li"><samp class="ph codeph">sspstrong</samp></li>
                           <li class="li"><samp class="ph codeph">stack-probe-size</samp></li>
                           <li class="li"><samp class="ph codeph">uwtable</samp></li>
                           <li class="li"><samp class="ph codeph">convergent</samp></li>
                           <li class="li"><samp class="ph codeph">jumptable</samp></li>
                           <li class="li"><samp class="ph codeph">safestack</samp></li>
                           <li class="li"><samp class="ph codeph">"thunk"</samp></li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="global-attributes"><a name="global-attributes" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#global-attributes" name="global-attributes" shape="rect">3.19.&nbsp;Global Attributes</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="unique_1842295346"><a name="unique_1842295346" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#unique_1842295346" name="unique_1842295346" shape="rect">Operand Bundles</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="module-level-inline-assembly"><a name="module-level-inline-assembly" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#module-level-inline-assembly" name="module-level-inline-assembly" shape="rect">3.21.&nbsp;Module-Level Inline Assembly</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="data-layout"><a name="data-layout" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#data-layout" name="data-layout" shape="rect">3.22.&nbsp;Data Layout</a></h3>
                     <div class="body conbody">
                        <p class="p">Only the following data layouts are supported,</p><a name="data-layout__ul_p3j_ps1_n3" shape="rect">
                           <!-- --></a><ul class="ul" id="data-layout__ul_p3j_ps1_n3">
                           <li class="li">32-bit
                              <p class="p"><samp class="ph codeph">e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64
                                    </samp></p>
                              <p class="p"></p>
                              <p class="p"></p>
                           </li>
                           <li class="li">64-bit
                              <p class="p"><samp class="ph codeph">e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64
                                    </samp></p>
                              <p class="p"></p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="target-triple"><a name="target-triple" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#target-triple" name="target-triple" shape="rect">3.23.&nbsp;Target Triple</a></h3>
                     <div class="body conbody">
                        <p class="p">Only the following target triples are supported,
                           where * can be any name:
                        </p>
                        <ul class="ul">
                           <li class="li">
                              <p class="p">32-bit: <samp class="ph codeph">nvptx-*-cuda</samp></p>
                           </li>
                           <li class="li">
                              <p class="p">64-bit: <samp class="ph codeph">nvptx64-*-cuda</samp></p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="pointer-aliasing-rules"><a name="pointer-aliasing-rules" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#pointer-aliasing-rules" name="pointer-aliasing-rules" shape="rect">3.24.&nbsp;Pointer Aliasing Rules</a></h3>
                     <div class="body conbody">
                        <p class="p">Fully supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="volatile-memory-access"><a name="volatile-memory-access" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#volatile-memory-access" name="volatile-memory-access" shape="rect">3.25.&nbsp;Volatile Memory Access</a></h3>
                     <div class="body conbody">
                        <p class="p">Fully supported. Note that for code generation: <samp class="ph codeph">ld.volatile</samp> and
                           <samp class="ph codeph">st.volatile</samp> will be generated.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="memory-model-for-concurrent-operations"><a name="memory-model-for-concurrent-operations" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#memory-model-for-concurrent-operations" name="memory-model-for-concurrent-operations" shape="rect">3.26.&nbsp;Memory Model for Concurrent Operations</a></h3>
                     <div class="body conbody">
                        <p class="p">Not applicable. Threads in an NVVM IR program must use atomic operations or
                           barrier synchronization to communicate.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="atomic-memory-ordering-constraints"><a name="atomic-memory-ordering-constraints" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#atomic-memory-ordering-constraints" name="atomic-memory-ordering-constraints" shape="rect">3.27.&nbsp;Atomic Memory Ordering Constraints</a></h3>
                     <div class="body conbody">
                        <p class="p">Atomic loads and stores are not supported. Other atomic operations on other than 32-bit
                           or 64-bit operands are not supported.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="unique_1546694063"><a name="unique_1546694063" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#unique_1546694063" name="unique_1546694063" shape="rect">Fast Math Flags</a></h3>
                     <div class="body conbody">
                        <p class="p">Accepted and ignored.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="uselist-order-directives"><a name="uselist-order-directives" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#uselist-order-directives" name="uselist-order-directives" shape="rect">3.29.&nbsp;Use-list Order Directives</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="type-system"><a name="type-system" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#type-system" name="type-system" shape="rect">4.&nbsp;Type System</a></h2>
                  <div class="body conbody">
                     <p class="p">Fully supported, except for the following:</p><a name="type-system__ul_all_y3b_n3" shape="rect">
                        <!-- --></a><ul class="ul" id="type-system__ul_all_y3b_n3">
                        <li class="li">Floating point types <samp class="ph codeph">half</samp>, <samp class="ph codeph">fp128</samp>, <samp class="ph codeph">x86_fp80</samp>,
                           <samp class="ph codeph">fp128</samp> and <samp class="ph codeph">ppc_fp128</samp> are not supported.
                        </li>
                        <li class="li">The <samp class="ph codeph">x86_mmx</samp> type is not supported.
                        </li>
                        <li class="li">The <samp class="ph codeph">token</samp> type is not supported.
                        </li>
                        <li class="li">The <samp class="ph codeph">non-integral pointer</samp> type is not supported.
                        </li>
                     </ul>
                  </div>
               </div>
               <div class="topic concept nested0" id="constants"><a name="constants" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#constants" name="constants" shape="rect">5.&nbsp;Constants</a></h2>
                  <div class="body conbody">
                     <p class="p">Fully supported, except for the following:</p>
                     <ul class="ul">
                        <li class="li"><samp class="ph codeph">Token constants</samp> is not supported.
                        </li>
                        <li class="li"><samp class="ph codeph">blockaddress(@function, %block)</samp> is not supported.
                        </li>
                        <li class="li">For a constant expression that is used as the initializer of a global variable
                           	    <samp class="ph codeph">@g1</samp>, if the constant expression contains a global identifier
                           	    <samp class="ph codeph">@g2</samp>, then the constant expression is supported if  
                           	    it can be reduced to the form of <samp class="ph codeph">bitcast+offset</samp>, where offset 
                           	    is an integer number (including <samp class="ph codeph">0</samp>) 
                        </li>
                     </ul>
                  </div>
               </div>
               <div class="topic concept nested0" id="other-values"><a name="other-values" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#other-values" name="other-values" shape="rect">6.&nbsp;Other Values</a></h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="inline-assembler-expressions"><a name="inline-assembler-expressions" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#inline-assembler-expressions" name="inline-assembler-expressions" shape="rect">6.1.&nbsp;Inline Assembler Expressions</a></h3>
                     <div class="body conbody">
                        <div class="p">Inline assembler of PTX instructions is supported, with the following supported
                           constraints: 
                           
                           <div class="tablenoborder"><a name="inline-assembler-expressions__table_kcr_tpb_n3" shape="rect">
                                 <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="inline-assembler-expressions__table_kcr_tpb_n3" class="table" frame="border" border="1" rules="all">
                                 <thead class="thead" align="left">
                                    <tr class="row">
                                       <th class="entry" valign="top" width="50%" id="d54e1272" rowspan="1" colspan="1">Constraint</th>
                                       <th class="entry" valign="top" width="50%" id="d54e1275" rowspan="1" colspan="1">Type</th>
                                    </tr>
                                 </thead>
                                 <tbody class="tbody">
                                    <tr class="row">
                                       <td class="entry" valign="top" width="50%" headers="d54e1272" rowspan="1" colspan="1">c</td>
                                       <td class="entry" valign="top" width="50%" headers="d54e1275" rowspan="1" colspan="1">i8</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" width="50%" headers="d54e1272" rowspan="1" colspan="1">h</td>
                                       <td class="entry" valign="top" width="50%" headers="d54e1275" rowspan="1" colspan="1">i16</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" width="50%" headers="d54e1272" rowspan="1" colspan="1">r</td>
                                       <td class="entry" valign="top" width="50%" headers="d54e1275" rowspan="1" colspan="1">i32</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" width="50%" headers="d54e1272" rowspan="1" colspan="1">l</td>
                                       <td class="entry" valign="top" width="50%" headers="d54e1275" rowspan="1" colspan="1">i64</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" width="50%" headers="d54e1272" rowspan="1" colspan="1">f</td>
                                       <td class="entry" valign="top" width="50%" headers="d54e1275" rowspan="1" colspan="1">f32</td>
                                    </tr>
                                    <tr class="row">
                                       <td class="entry" valign="top" width="50%" headers="d54e1272" rowspan="1" colspan="1">d</td>
                                       <td class="entry" valign="top" width="50%" headers="d54e1275" rowspan="1" colspan="1">f64</td>
                                    </tr>
                                 </tbody>
                              </table>
                           </div>
                        </div>
                        <p class="p">The inline asm metadata <samp class="ph codeph">!srcloc</samp> is accepted and ignored.
                        </p>
                        <p class="p">The inline asm dialect <samp class="ph codeph">inteldialect</samp> is not supported.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="metadata-nodes-and-metadata-strings"><a name="metadata-nodes-and-metadata-strings" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#metadata-nodes-and-metadata-strings" name="metadata-nodes-and-metadata-strings" shape="rect">6.2.&nbsp;Metadata Nodes and Metadata Strings</a></h3>
                     <div class="body conbody">
                        <p class="p">Fully supported. The <samp class="ph codeph">distinct</samp> keyword
                           is accepted and ignored.
                           The specialized metadata nodes are not supported.
                        </p>
                        <p class="p">The following metadata are understood by the NVVM compiler:</p>
                        <ul class="ul">
                           <li class="li">Debug information (using LLVM 3.4 debug metadata)</li>
                           <li class="li"><samp class="ph codeph">llvm.loop.unroll.count</samp></li>
                           <li class="li"><samp class="ph codeph">llvm.loop.unroll.disable</samp></li>
                           <li class="li"><samp class="ph codeph">llvm.loop.unroll.full</samp></li>
                           <li class="li"><samp class="ph codeph">pragma unroll</samp><p class="p">Attached to the branch instruction corresponding to
                                 the backedge of a loop.
                              </p>
                              <p class="p">The kind of the MDNode is <samp class="ph codeph">pragma</samp>.
                                 The first operand is a metadata string <samp class="ph codeph">!"unroll"</samp> and the second
                                 operand is an <samp class="ph codeph">i32</samp> value which specifies the unroll factor. For
                                 example,
                                 
                              </p><pre xml:space="preserve">br i1 %cond, label %BR1, label %BR2, !pragma !42
!42 = metadata !{metadata !"unroll", i32 4}</pre></li>
                           <li class="li"><samp class="ph codeph">callalign</samp>
                              (see <a class="xref" href="index.html#rules-and-restrictions" shape="rect">Rules and Restrictions</a> for <dfn class="term">Calling Conventions</dfn>)
                           </li>
                        </ul>
                        <p class="p">Module flags metadata (<samp class="ph codeph">llvm.module.flags</samp>) is supported and verified, but
                           the metadata values will be ignored.
                        </p>
                        <p class="p">All other metadata is accepted and ignored.</p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="intrinsic-global-variables"><a name="intrinsic-global-variables" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#intrinsic-global-variables" name="intrinsic-global-variables" shape="rect">7.&nbsp;Intrinsic Global Variables</a></h2>
                  <div class="body conbody">
                     <ul class="ul">
                        <li class="li">The <samp class="ph codeph">llvm.used</samp> global variable is supported.
                        </li>
                        <li class="li">The <samp class="ph codeph">llvm.compiler.used</samp> global variable is supported
                        </li>
                        <li class="li">The <samp class="ph codeph">llvm.global_ctors</samp> global variable is not supported
                        </li>
                        <li class="li">The <samp class="ph codeph">llvm.global_dtors</samp> global variable is not supported
                        </li>
                     </ul>
                  </div>
               </div>
               <div class="topic concept nested0" id="instructions"><a name="instructions" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#instructions" name="instructions" shape="rect">8.&nbsp;Instructions</a></h2>
                  <div class="body conbody">
                     <p class="p"></p>
                  </div>
                  <div class="topic concept nested1" id="terminator-instructions"><a name="terminator-instructions" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#terminator-instructions" name="terminator-instructions" shape="rect">8.1.&nbsp;Terminator Instructions</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported: </p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">ret</samp></li>
                           <li class="li"><samp class="ph codeph">br</samp></li>
                           <li class="li"><samp class="ph codeph">switch</samp></li>
                           <li class="li"><samp class="ph codeph">unreachable</samp></li>
                        </ul>
                        <p class="p">Unsupported: </p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">indirectbr</samp></li>
                           <li class="li"><samp class="ph codeph">invoke</samp></li>
                           <li class="li"><samp class="ph codeph">resume</samp></li>
                           <li class="li"><samp class="ph codeph">catchswitch</samp></li>
                           <li class="li"><samp class="ph codeph">catchret</samp></li>
                           <li class="li"><samp class="ph codeph">cleanupret</samp></li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="binary-operations"><a name="binary-operations" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#binary-operations" name="binary-operations" shape="rect">8.2.&nbsp;Binary Operations</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported: </p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">add</samp></li>
                           <li class="li"><samp class="ph codeph">fadd</samp></li>
                           <li class="li"><samp class="ph codeph">sub</samp></li>
                           <li class="li"><samp class="ph codeph">fsub</samp></li>
                           <li class="li"><samp class="ph codeph">mul</samp></li>
                           <li class="li"><samp class="ph codeph">fmul</samp></li>
                           <li class="li"><samp class="ph codeph">udiv</samp></li>
                           <li class="li"><samp class="ph codeph">sdiv</samp></li>
                           <li class="li"><samp class="ph codeph">fdiv</samp></li>
                           <li class="li"><samp class="ph codeph">urem</samp></li>
                           <li class="li"><samp class="ph codeph">srem</samp></li>
                           <li class="li"><samp class="ph codeph">frem</samp></li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="bitwise-binary-operations"><a name="bitwise-binary-operations" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#bitwise-binary-operations" name="bitwise-binary-operations" shape="rect">8.3.&nbsp;Bitwise Binary Operations</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported: </p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">shl</samp></li>
                           <li class="li"><samp class="ph codeph">lshr</samp></li>
                           <li class="li"><samp class="ph codeph">ashr</samp></li>
                           <li class="li"><samp class="ph codeph">and</samp></li>
                           <li class="li"><samp class="ph codeph">or</samp></li>
                           <li class="li"><samp class="ph codeph">xor</samp></li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="vector-operations"><a name="vector-operations" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#vector-operations" name="vector-operations" shape="rect">8.4.&nbsp;Vector Operations</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported: </p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">extractelement</samp></li>
                           <li class="li"><samp class="ph codeph">insertelement</samp></li>
                           <li class="li"><samp class="ph codeph">shufflevector</samp></li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="aggregate-operations"><a name="aggregate-operations" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#aggregate-operations" name="aggregate-operations" shape="rect">8.5.&nbsp;Aggregate Operations</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported: </p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">extractvalue</samp></li>
                           <li class="li"><samp class="ph codeph">insertvalue</samp></li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="memory-access-and-addressing-operations"><a name="memory-access-and-addressing-operations" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#memory-access-and-addressing-operations" name="memory-access-and-addressing-operations" shape="rect">8.6.&nbsp;Memory Access and Addressing Operations</a></h3>
                     <div class="body conbody">
                        <p class="p"></p>
                     </div>
                     <div class="topic concept nested2" id="alloca-instruction"><a name="alloca-instruction" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#alloca-instruction" name="alloca-instruction" shape="rect">8.6.1.&nbsp;alloca Instruction</a></h3>
                        <div class="body conbody">
                           <p class="p">The <samp class="ph codeph">alloca</samp> instruction returns a generic pointer to the local address
                              space. The number of elements, if specified, must be a compile-time constant, otherwise 
                              it is not supported. The <samp class="ph codeph">inalloca</samp>
                              attribute is not supported.
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="load-instruction"><a name="load-instruction" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#load-instruction" name="load-instruction" shape="rect">8.6.2.&nbsp;load Instruction</a></h3>
                        <div class="body conbody">
                           <p class="p"><samp class="ph codeph">load atomic</samp> is not supported.
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="store-instruction"><a name="store-instruction" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#store-instruction" name="store-instruction" shape="rect">8.6.3.&nbsp;store Instruction</a></h3>
                        <div class="body conbody">
                           <p class="p"><samp class="ph codeph">store atomic</samp> is not supported.
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="fence-instruction"><a name="fence-instruction" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#fence-instruction" name="fence-instruction" shape="rect">8.6.4.&nbsp;fence Instruction</a></h3>
                        <div class="body conbody">
                           <p class="p">Not supported. Use NVVM intrinsic functions instead. </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cmpxchg-instruction"><a name="cmpxchg-instruction" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cmpxchg-instruction" name="cmpxchg-instruction" shape="rect">8.6.5.&nbsp;cmpxchg Instruction</a></h3>
                        <div class="body conbody">
                           <p class="p">Supported for <samp class="ph codeph">i32</samp> and <samp class="ph codeph">i64</samp> types, with the following
                              restrictions:
                           </p>
                           <ul class="ul">
                              <li class="li">
                                 <p class="p">The pointer must be either a global pointer, a shared pointer, or a generic
                                    pointer that points to either the global address space or the shared address
                                    space. 
                                 </p>
                              </li>
                              <li class="li">
                                 <p class="p">The <samp class="ph codeph">weak</samp> marker and the <samp class="ph codeph">failure
                                       ordering</samp> are accepted and ignored. 
                                 </p>
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="atomicrmw-instruction"><a name="atomicrmw-instruction" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#atomicrmw-instruction" name="atomicrmw-instruction" shape="rect">8.6.6.&nbsp;atomicrmw Instruction</a></h3>
                        <div class="body conbody">
                           <p class="p"><samp class="ph codeph">nand</samp> is not supported. The other keywords are supported for
                              <samp class="ph codeph">i32</samp> and <samp class="ph codeph">i64</samp> types, with the following
                              restrictions.
                           </p>
                           <ul class="ul">
                              <li class="li">The pointer must be either a global pointer, a shared pointer, or a generic pointer
                                 that points to either the <samp class="ph codeph">global</samp> address space or the
                                 <samp class="ph codeph">shared</samp> address space.
                              </li>
                           </ul>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="unique_894469802"><a name="unique_894469802" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#unique_894469802" name="unique_894469802" shape="rect">getelementptr Instruction</a></h3>
                        <div class="body conbody">
                           <p class="p">Fully supported.</p>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="conversion-operations"><a name="conversion-operations" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#conversion-operations" name="conversion-operations" shape="rect">8.7.&nbsp;Conversion Operations</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported:</p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">trunc .. to</samp></li>
                           <li class="li"><samp class="ph codeph">zext .. to</samp></li>
                           <li class="li"><samp class="ph codeph">sext .. to</samp></li>
                           <li class="li"><samp class="ph codeph">fptrunc .. to</samp></li>
                           <li class="li"><samp class="ph codeph">fpext .. to</samp></li>
                           <li class="li"><samp class="ph codeph">fptoui .. to</samp></li>
                           <li class="li"><samp class="ph codeph">fptosi .. to</samp></li>
                           <li class="li"><samp class="ph codeph">uitofp .. to</samp></li>
                           <li class="li"><samp class="ph codeph">sitofp .. to</samp></li>
                           <li class="li"><samp class="ph codeph">ptrtoint .. to</samp></li>
                           <li class="li"><samp class="ph codeph">inttoptr .. to</samp></li>
                           <li class="li"><samp class="ph codeph">addrspacecast .. to</samp></li>
                           <li class="li"><samp class="ph codeph">bitcast .. to</samp><p class="p">See <a class="xref" href="index.html#conversion" shape="rect">Conversion</a> for a special use case of
                                 <samp class="ph codeph">bitcast</samp>.
                              </p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="other-operations"><a name="other-operations" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#other-operations" name="other-operations" shape="rect">8.8.&nbsp;Other Operations</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported:</p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">icmp</samp></li>
                           <li class="li"><samp class="ph codeph">fcmp</samp></li>
                           <li class="li"><samp class="ph codeph">phi</samp></li>
                           <li class="li"><samp class="ph codeph">select</samp></li>
                           <li class="li"><samp class="ph codeph">call</samp> (The <samp class="ph codeph">musttail</samp> and <samp class="ph codeph">notail</samp> markers are not supported. Optional fast math flags are accepted and ignored. See <a class="xref" href="index.html#calling-conventions" shape="rect">Calling Conventions</a> for other rules and
                              restrictions.)
                           </li>
                        </ul>
                        <p class="p">Unsupported:</p>
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">va_arg</samp></li>
                           <li class="li"><samp class="ph codeph">landingpad</samp></li>
                           <li class="li"><samp class="ph codeph">catchpad</samp></li>
                           <li class="li"><samp class="ph codeph">cleanuppad</samp></li>
                        </ul>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="intrinsic-functions"><a name="intrinsic-functions" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#intrinsic-functions" name="intrinsic-functions" shape="rect">9.&nbsp;Intrinsic Functions</a></h2>
                  <div class="body conbody">
                     <p class="p"></p>
                  </div>
                  <div class="topic concept nested1" id="variable-argument-handling-intrinsics"><a name="variable-argument-handling-intrinsics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#variable-argument-handling-intrinsics" name="variable-argument-handling-intrinsics" shape="rect">9.1.&nbsp;Variable Argument Handling Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="accurate-garbage-collection-intrinics"><a name="accurate-garbage-collection-intrinics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#accurate-garbage-collection-intrinics" name="accurate-garbage-collection-intrinics" shape="rect">9.2.&nbsp;Accurate Garbage Collection Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="code-generator-intrinics"><a name="code-generator-intrinics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#code-generator-intrinics" name="code-generator-intrinics" shape="rect">9.3.&nbsp;Code Generator Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="standard-c-library-intrinics"><a name="standard-c-library-intrinics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#standard-c-library-intrinics" name="standard-c-library-intrinics" shape="rect">9.4.&nbsp;Standard C Library Intrinsics</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">llvm.memcpy</samp><p class="p">Supported. Note that the constant address space
                                 cannot be used as the destination since it is read-only. 
                              </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.memmove </samp><p class="p">Supported. Note that the constant address space
                                 cannot be used since it is read-only. 
                              </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.memset</samp><p class="p">Supported. Note that the constant address space
                                 cannot be used since it is read-only. 
                              </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.sqrt</samp><p class="p">Supported for float/double and vector of float/double.
                                 Mapped to PTX <samp class="ph codeph">sqrt.rn.f32</samp> and
                                 <samp class="ph codeph">sqrt.rn.f64</samp>.
                              </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.powi</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.sin</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.cos</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.pow</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.exp</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.exp2</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.log</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.log10</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.log2</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.fma</samp><p class="p">Supported for float/double and vector of float/double.
                                 Mapped to PTX <samp class="ph codeph">fma.rn.f32</samp> and <samp class="ph codeph">fma.rn.f64</samp></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.fabs</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.copysign</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.floor</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.ceil</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.trunc</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.rint</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.nearbyint</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.round</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.minnum</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.maxnum</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="bit-manipulations-intrinics"><a name="bit-manipulations-intrinics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#bit-manipulations-intrinics" name="bit-manipulations-intrinics" shape="rect">Bit Manipulations Intrinsics</a></h3>
                     <div class="body conbody"><a name="bit-manipulations-intrinics__ul_f2n_1th_n3" shape="rect">
                           <!-- --></a><ul class="ul" id="bit-manipulations-intrinics__ul_f2n_1th_n3">
                           <li class="li"><samp class="ph codeph">llvm.bitreverse</samp><p class="p">Not supported. </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.bswap</samp><p class="p">Supported for <samp class="ph codeph">i16</samp>,
                                 <samp class="ph codeph">i32</samp>, and <samp class="ph codeph">i64</samp>. 
                              </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.ctpop</samp><p class="p">Supported for <samp class="ph codeph">i8</samp>,
                                 <samp class="ph codeph">i16</samp>, <samp class="ph codeph">i32</samp>, <samp class="ph codeph">i64</samp>, and
                                 vectors of these types.
                              </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.ctlz</samp><p class="p">Supported for <samp class="ph codeph">i8</samp>,
                                 <samp class="ph codeph">i16</samp>, <samp class="ph codeph">i32</samp>, <samp class="ph codeph">i64</samp>, and vectors of
                                 these types. 
                              </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.cttz</samp><p class="p">Supported for <samp class="ph codeph">i8</samp>,
                                 <samp class="ph codeph">i16</samp>, <samp class="ph codeph">i32</samp>, <samp class="ph codeph">i64</samp>, and vectors of
                                 these types.
                              </p>
                              <p class="p"></p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="unique_1886512331"><a name="unique_1886512331" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#unique_1886512331" name="unique_1886512331" shape="rect">Specialised Arithmetic Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported: <samp class="ph codeph">llvm.fmuladd</samp></p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="half-precision-floating-point-intrinsics"><a name="half-precision-floating-point-intrinsics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#half-precision-floating-point-intrinsics" name="half-precision-floating-point-intrinsics" shape="rect">9.7.&nbsp;Half Precision Floating Point Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported: <samp class="ph codeph">llvm.convert.to.fp16.f32</samp>,
                           <samp class="ph codeph">llvm.convert.from.fp16.f32</samp>, and the LLVM 3.4
                           versions <samp class="ph codeph">llvm.convert.to.fp16</samp>, <samp class="ph codeph">llvm.convert.from.fp16</samp></p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="debugger-intrinsics"><a name="debugger-intrinsics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#debugger-intrinsics" name="debugger-intrinsics" shape="rect">9.8.&nbsp;Debugger Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported: LLVM 3.4 versions of <samp class="ph codeph">llvm.dbg.declare</samp> and <samp class="ph codeph">llvm.dbg.value</samp>.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="exception-handling-intrinsics"><a name="exception-handling-intrinsics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#exception-handling-intrinsics" name="exception-handling-intrinsics" shape="rect">9.9.&nbsp;Exception Handling Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="trampoline-intrinsics"><a name="trampoline-intrinsics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#trampoline-intrinsics" name="trampoline-intrinsics" shape="rect">9.10.&nbsp;Trampoline Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="unique_1741578225"><a name="unique_1741578225" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#unique_1741578225" name="unique_1741578225" shape="rect">Masked Vector Load and Store Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="vector-reduce-intrinsics"><a name="vector-reduce-intrinsics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#vector-reduce-intrinsics" name="vector-reduce-intrinsics" shape="rect">9.12.&nbsp;Vector Reduction Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="constrained-float-intrinsics"><a name="constrained-float-intrinsics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#constrained-float-intrinsics" name="constrained-float-intrinsics" shape="rect">9.13.&nbsp;Constrained Floating Point Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="constrained-libm-intrinsics"><a name="constrained-libm-intrinsics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#constrained-libm-intrinsics" name="constrained-libm-intrinsics" shape="rect">9.14.&nbsp;Constrained libm-equivalent Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="unique_420250914"><a name="unique_420250914" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#unique_420250914" name="unique_420250914" shape="rect">Masked Vector Gather and Scatter Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="memory-use-markers"><a name="memory-use-markers" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#memory-use-markers" name="memory-use-markers" shape="rect">9.16.&nbsp;Memory Use Markers</a></h3>
                     <div class="body conbody">
                        <p class="p">Supported: <samp class="ph codeph">llvm.lifetime.start</samp>, <samp class="ph codeph">llvm.lifetime.end</samp>,
                           <samp class="ph codeph">llvm.invariant.start</samp>, and <samp class="ph codeph">llvm.invariant.end</samp>.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="general-intrinsics"><a name="general-intrinsics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#general-intrinsics" name="general-intrinsics" shape="rect">9.17.&nbsp;General Intrinsics</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li">
                              <p class="p"><samp class="ph codeph">llvm.var.annotation</samp></p>
                              <p class="p">Accepted and ignored.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.ptr.annotation</samp><p class="p">Accepted and ignored.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.annotation</samp><p class="p">Accepted and ignored.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.debugtrap</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.stackguard</samp><p class="p">Not supported. </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.stackprotector</samp><p class="p">Not supported. </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.stackprotectorcheck</samp><p class="p">Not supported. </p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.objectsize</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.expect</samp><p class="p">Supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.assume</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.type.test</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.checked.load</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.bitset.test</samp><p class="p">Not supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.donothing</samp><p class="p">Supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.experimental.deoptimize</samp><p class="p">Not Supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.experimental.guard</samp><p class="p">Not Supported.</p>
                              <p class="p"></p>
                           </li>
                           <li class="li"><samp class="ph codeph">llvm.load.relative</samp><p class="p">Not Supported.</p>
                              <p class="p"></p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="elementwise-atomic-memory-intrinics"><a name="elementwise-atomic-memory-intrinics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#elementwise-atomic-memory-intrinics" name="elementwise-atomic-memory-intrinics" shape="rect">Element Wise Atomic Memory Intrinsics</a></h3>
                     <div class="body conbody">
                        <p class="p">Not supported.</p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="address-space"><a name="address-space" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#address-space" name="address-space" shape="rect">10.&nbsp;Address Space</a></h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="address-spaces"><a name="address-spaces" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#address-spaces" name="address-spaces" shape="rect">10.1.&nbsp;Address Spaces</a></h3>
                     <div class="body conbody">
                        <p class="p"> NVVM IR has a set of predefined memory address spaces, whose semantics are similar to
                           those defined in CUDA C/C++, OpenCL C and PTX. Any address space not listed below is not
                           supported .
                        </p>
                        <div class="tablenoborder"><a name="address-spaces__table_ecr_txh_n3" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="address-spaces__table_ecr_txh_n3" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2719" rowspan="1" colspan="1">Name</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2722" rowspan="1" colspan="1">Address Space Number</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2725" rowspan="1" colspan="1">Semantics/Example</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2719" rowspan="1" colspan="1">code</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2722" rowspan="1" colspan="1">0</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2725" rowspan="1" colspan="1">functions, code <a name="address-spaces__ul_ttv_dyh_n3" shape="rect">
                                          <!-- --></a><ul class="ul" id="address-spaces__ul_ttv_dyh_n3">
                                          <li class="li">CUDA C/C++ function</li>
                                          <li class="li">OpenCL C function</li>
                                       </ul>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2719" rowspan="1" colspan="1"> generic </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2722" rowspan="1" colspan="1">0</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2725" rowspan="1" colspan="1"> Can only be used to qualify the pointee of a pointer <a name="address-spaces__ul_srj_nyh_n3" shape="rect">
                                          <!-- --></a><ul class="ul" id="address-spaces__ul_srj_nyh_n3">
                                          <li class="li"> Pointers in CUDA C/C++</li>
                                       </ul>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2719" rowspan="1" colspan="1">global</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2722" rowspan="1" colspan="1">1</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2725" rowspan="1" colspan="1"><a name="address-spaces__ul_gpb_xyh_n3" shape="rect">
                                          <!-- --></a><ul class="ul" id="address-spaces__ul_gpb_xyh_n3">
                                          <li class="li">CUDA C/C++ __device__</li>
                                          <li class="li">OpenCL C global</li>
                                       </ul>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2719" rowspan="1" colspan="1">shared</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2722" rowspan="1" colspan="1">3</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2725" rowspan="1" colspan="1"><a name="address-spaces__ul_ow2_zyh_n3" shape="rect">
                                          <!-- --></a><ul class="ul" id="address-spaces__ul_ow2_zyh_n3">
                                          <li class="li">CUDA C/C++ __shared__</li>
                                          <li class="li">OpenCL C local</li>
                                       </ul>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2719" rowspan="1" colspan="1">constant</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2722" rowspan="1" colspan="1">4</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2725" rowspan="1" colspan="1"><a name="address-spaces__ul_w3z_1zh_n3" shape="rect">
                                          <!-- --></a><ul class="ul" id="address-spaces__ul_w3z_1zh_n3">
                                          <li class="li">CUDA C/C++ __constant__</li>
                                          <li class="li">OpenCL C constant</li>
                                       </ul>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2719" rowspan="1" colspan="1">local</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2722" rowspan="1" colspan="1">5</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2725" rowspan="1" colspan="1"><a name="address-spaces__ul_tql_czh_n3" shape="rect">
                                          <!-- --></a><ul class="ul" id="address-spaces__ul_tql_czh_n3">
                                          <li class="li">CUDA C/C++ local</li>
                                          <li class="li">OpenCL C private</li>
                                       </ul>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2719" rowspan="1" colspan="1">&lt;reserved&gt;</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2722" rowspan="1" colspan="1">2, 101 and above</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2725" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <p class="p">Each global variable, that is not an intrinsic global variable, can be declared to
                           reside in a specific non-zero address space, which can only be one of the following:
                           <samp class="ph codeph">global</samp>, <samp class="ph codeph">shared</samp> or <samp class="ph codeph">constant</samp>. 
                        </p>
                        <p class="p">If a non-intrinsic global variable is declared without any address space number or 
                           with the address space number 0, then this global variable resides in address space
                           <samp class="ph codeph">global</samp> and the pointer of this global variable holds a 
                           generic pointer value.
                        </p>
                        <p class="p">The predefined NVVM memory spaces are needed for the language front-ends to model the
                           memory spaces in the source languages. For example,
                        </p><pre xml:space="preserve">// CUDA C/C++
__constant__ int c;
__device__ int g;

; NVVM IR
@c = addrspace(4) global i32 0, align 4
@g = addrspace(1) global [2 x i32] zeroinitializer, align 4</pre><p class="p">Address space numbers 2 and 101 or higher are reserved for NVVM compiler internal use
                           only. No language front-end should generate code that uses these address spaces directly.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="generic-pointers-and-non-generic-pointers"><a name="generic-pointers-and-non-generic-pointers" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#generic-pointers-and-non-generic-pointers" name="generic-pointers-and-non-generic-pointers" shape="rect">10.2.&nbsp;Generic Pointers and Non-Generic Pointers</a></h3>
                     <div class="body conbody">
                        <p class="p"></p>
                     </div>
                     <div class="topic concept nested2" id="generic-pointers-vs-non-generic-pointers"><a name="generic-pointers-vs-non-generic-pointers" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#generic-pointers-vs-non-generic-pointers" name="generic-pointers-vs-non-generic-pointers" shape="rect">10.2.1.&nbsp;Generic Pointers vs. Non-generic Pointers</a></h3>
                        <div class="body conbody">
                           <p class="p">There are generic pointers and non-generic pointers in NVVM IR. A generic pointer is a
                              pointer that may point to memory in any address space. A non-generic pointer points to
                              memory in a specific address space. 
                           </p>
                           <p class="p">In NVVM IR, a generic pointer has a pointer type with the address space
                              <samp class="ph codeph">generic</samp>, while a non-generic pointer has a pointer type with a
                              non-generic address space. 
                           </p>
                           <p class="p">Note that the address space number for the generic address space is 0—the default in both
                              NVVM IR and LLVM IR. The address space number for the code address space is also 0.
                              Function pointers are qualified by address space <samp class="ph codeph">code</samp> 
                              (<samp class="ph codeph">addrspace(0)</samp>). 
                           </p>
                           <p class="p">Loads/stores via generic pointers are supported, as well as loads/stores via non-generic
                              pointers. Loads/stores via function pointers are not supported
                           </p><pre xml:space="preserve">
@a = addrspace(1) global i32 0, align 4 ; 'global' addrspace, @a holds a specific value 
@b = global i32 0, align 4              ; 'global' addrspace, @b holds a generic value
@c = addrspace(4) global i32 0, align 4 ; 'constant' addrspace, @c holds a specific value 

... = load i32 addrspace(1)* @a, align 4 ; Correct
... = load i32* @a, align 4              ; Wrong
... = load i32* @b, align 4              ; Correct
... = load i32 addrspace(1)* @b, align 4 ; Wrong
... = load i32 addrspace(4)* @c, align4  ; Correct
... = load i32* @c, align 4              ; Wrong</pre></div>
                     </div>
                     <div class="topic concept nested2" id="conversion"><a name="conversion" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#conversion" name="conversion" shape="rect">10.2.2.&nbsp;Conversion</a></h3>
                        <div class="body conbody">
                           <p class="p">The bit value of a generic pointer that points to a specific object may be different from
                              the bit value of a specific pointer that points to the same object.
                           </p>
                           <p class="p">The <samp class="ph codeph">addrspacecast</samp> IR instruction should be used to perform pointer casts
                              across address spaces (generic to non-generic or non-generic to generic). 
                              Casting a non-generic pointer to a different non-generic pointer is not supported.
                              Casting from a generic to a non-generic pointer is undefined if the generic pointer does not point
                              to an object in the target non-generic address space.
                           </p>
                           <p class="p"><samp class="ph codeph">inttoptr</samp> and <samp class="ph codeph">ptrtoint</samp> are supported.
                              <samp class="ph codeph">inttoptr</samp> and <samp class="ph codeph">ptrtoint</samp> are value preserving
                              instructions when the two operands are of the same size. In general, using
                              <samp class="ph codeph">ptrtoint</samp> and <samp class="ph codeph">inttoptr</samp> to implement an address space cast
                              is undefined.
                           </p>
                           <p class="p">The following intrinsics can be used to query if a generic pointer can be safely cast to
                              a specific non-generic address space:
                           </p>
                           <ul class="ul">
                              <li class="li"><samp class="ph codeph">i1 @llvm.nvvm.isspacep.const(i8*)</samp></li>
                              <li class="li"><samp class="ph codeph">i1 @llvm.nvvm.isspacep.global(i8*)</samp></li>
                              <li class="li"><samp class="ph codeph">i1 @llvm.nvvm.isspacep.local(i8*)</samp></li>
                              <li class="li"><samp class="ph codeph">i1 @llvm.nvvm.isspacep.shared(i8*)</samp></li>
                           </ul>
                           <p class="p"><samp class="ph codeph">bitcast</samp> on pointers is supported, though LLVM IR forbids
                              <samp class="ph codeph">bitcast</samp> from being used to change the address space of a pointer.
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="no-aliasing-between-two-different-specific-address-spaces"><a name="no-aliasing-between-two-different-specific-address-spaces" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#no-aliasing-between-two-different-specific-address-spaces" name="no-aliasing-between-two-different-specific-address-spaces" shape="rect">10.2.3.&nbsp;No Aliasing between Two Different Specific Address Spaces</a></h3>
                        <div class="body conbody">
                           <p class="p">Two different specific address spaces do not overlap. NVVM compiler assumes two memory accesses via non-generic pointers that
                              point to different address spaces are not aliased.
                              
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="the-alloca-instruction"><a name="the-alloca-instruction" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#the-alloca-instruction" name="the-alloca-instruction" shape="rect">10.3.&nbsp;The alloca Instruction</a></h3>
                     <div class="body conbody">
                        <p class="p">The <samp class="ph codeph">alloca</samp> instruction returns a generic pointer that only points to
                           address space <samp class="ph codeph">local</samp>.
                        </p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="global-property-annotation-chapter-11"><a name="global-property-annotation-chapter-11" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#global-property-annotation-chapter-11" name="global-property-annotation-chapter-11" shape="rect">11.&nbsp;Global Property Annotation</a></h2>
                  <div class="body conbody">
                     <p class="p"></p>
                  </div>
                  <div class="topic concept nested1" id="overview"><a name="overview" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#overview" name="overview" shape="rect">11.1.&nbsp;Overview</a></h3>
                     <div class="body conbody">
                        <p class="p">NVVM uses Named Metadata to annotate IR objects with properties that are otherwise not representable in the IR. 
                           	The NVVM IR producers can use the Named Metadata to annotate the IR with properties, which the NVVM compiler can process.
                           	
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="representation-of-properties"><a name="representation-of-properties" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#representation-of-properties" name="representation-of-properties" shape="rect">11.2.&nbsp;Representation of Properties</a></h3>
                     <div class="body conbody">
                        <p class="p">For each translation unit (that is, per bitcode file), there is a named metadata called
                           <samp class="ph codeph">nvvm.annotations</samp>.
                        </p>
                        <p class="p">This named metadata contains a list of MDNodes.</p>
                        <p class="p">The first operand of each MDNode is an entity that the node is annotating using the
                           remaining operands.
                        </p>
                        <p class="p">Multiple MDNodes may provide annotations for the same entity, in which case their first
                           operands will be same.
                        </p>
                        <div class="p">The remaining operands of the MDNode are organized in order as &lt;property-name, value&gt;. 
                           <ul class="ul">
                              <li class="li">The property-name operand is MDString, while the value is <samp class="ph codeph">i32</samp>. 
                              </li>
                              <li class="li">Starting with the operand after the annotated entity, every alternate operand
                                 specifies a property.
                              </li>
                              <li class="li">The operand after a property is its value. 
                                 <p class="p">The following is an
                                    example.
                                 </p><pre xml:space="preserve">!nvvm.annotations = !{!12, !13}
  !12 = metadata !{void (i32, i32)* @_Z6kernelii, metadata !"kernel", i32 1}
  !13 = metadata !{void ()* @_Z7kernel2v, metadata !"kernel", i32 1, metadata !"maxntidx", i32 16}</pre></li>
                           </ul>
                        </div>
                        <p class="p">If two bitcode files are being linked and both have a named metadata
                           <samp class="ph codeph">nvvm.annotations</samp>, the linked file will have a single merged named
                           metadata. If both files define properties for the same entity foo , the linked file will
                           have two MDNodes defining properties for foo . It is illegal for the files to have
                           conflicting properties for the same entity.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="supported-properties"><a name="supported-properties" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#supported-properties" name="supported-properties" shape="rect">11.3.&nbsp;Supported Properties</a></h3>
                     <div class="body conbody">
                        <div class="tablenoborder"><a name="supported-properties__table_usq_cp3_n3" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="supported-properties__table_usq_cp3_n3" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e3137" rowspan="1" colspan="1">Property Name</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e3140" rowspan="1" colspan="1">Annotated On</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e3143" rowspan="1" colspan="1">Description</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3137" rowspan="1" colspan="1"><samp class="ph codeph">maxntid{x, y, z}</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3140" rowspan="1" colspan="1">kernel function</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3143" rowspan="1" colspan="1">Maximum expected CTA size from any launch.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3137" rowspan="1" colspan="1"><samp class="ph codeph">reqntid{x, y, z}</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3140" rowspan="1" colspan="1">kernel function</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3143" rowspan="1" colspan="1">Minimum expected CTA size from any launch.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3137" rowspan="1" colspan="1"><samp class="ph codeph">minctasm</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3140" rowspan="1" colspan="1">kernel function</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3143" rowspan="1" colspan="1">Hint/directive to the compiler/driver, asking it to put at least
                                       these many CTAs on an SM.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3137" rowspan="1" colspan="1"><samp class="ph codeph">kernel</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3140" rowspan="1" colspan="1">function</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3143" rowspan="1" colspan="1">Signifies that this function is a kernel function.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3137" rowspan="1" colspan="1"><samp class="ph codeph">align</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3140" rowspan="1" colspan="1">function</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3143" rowspan="1" colspan="1">Signifies that the value in low 16-bits of the 32-bit value contains
                                       alignment of n th parameter type if its alignment is not the natural
                                       alignment. n is specified by high 16-bits of the value. For return type,
                                       n is 0.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3137" rowspan="1" colspan="1"><samp class="ph codeph">texture</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3140" rowspan="1" colspan="1">global variable</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3143" rowspan="1" colspan="1">Signifies that variable is a texture.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3137" rowspan="1" colspan="1"><samp class="ph codeph">surface</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3140" rowspan="1" colspan="1">global variable</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3143" rowspan="1" colspan="1">Signifies that variable is a surface.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3137" rowspan="1" colspan="1"><samp class="ph codeph">managed</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3140" rowspan="1" colspan="1">global variable</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3143" rowspan="1" colspan="1">Signifies that variable is a UVM managed variable.</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="texture-and-surface"><a name="texture-and-surface" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#texture-and-surface" name="texture-and-surface" shape="rect">12.&nbsp;Texture and Surface</a></h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="texture-variable-and-surface-variable"><a name="texture-variable-and-surface-variable" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#texture-variable-and-surface-variable" name="texture-variable-and-surface-variable" shape="rect">12.1.&nbsp;Texture Variable and Surface Variable</a></h3>
                     <div class="body conbody">
                        <p class="p">A texture or a surface variable can be declared/defined as a global variable of
                           <samp class="ph codeph">i64</samp> type with annotation <samp class="ph codeph">texture</samp> or
                           <samp class="ph codeph">surface</samp> in the <samp class="ph codeph">global</samp> address space. 
                        </p>
                        <p class="p">A texture or surface variable must have a name, which must follow identifier naming
                           conventions. 
                        </p>
                        <div class="p">It is illegal to store to or load from the address of a texture or surface variable. A
                           texture or a surface variable may only have the following uses:<a name="texture-variable-and-surface-variable__ul_or2_bq3_n3" shape="rect">
                              <!-- --></a><ul class="ul" id="texture-variable-and-surface-variable__ul_or2_bq3_n3">
                              <li class="li">In a metadata node</li>
                              <li class="li">As an intrinsic function argument as shown below</li>
                              <li class="li">In <samp class="ph codeph">llvm.used</samp> Global Variable
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="accessing-texture-memory-or-surface-memory"><a name="accessing-texture-memory-or-surface-memory" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#accessing-texture-memory-or-surface-memory" name="accessing-texture-memory-or-surface-memory" shape="rect">12.2.&nbsp;Accessing Texture Memory or Surface Memory</a></h3>
                     <div class="body conbody">
                        <p class="p">Texture memory and surface memory can be accessed using texture or surface handles. NVVM
                           provides the following intrinsic function to get a texture or surface handle from a
                           texture or surface variable.
                        </p><pre xml:space="preserve">
delcare i64 %llvm.nvvm.texsurf.handle.p1i64(metadata, i64 addrspace(1)*)
	</pre><p class="p">The first argument to the intrinsic is a metadata holding the texture or surface
                           variable. Such a metadata may hold only one texture or one surface variable. The second
                           argument to the intrinsic is the texture or surface variable itself. The intrinsic
                           returns a handle of <samp class="ph codeph">i64</samp> type.
                        </p>
                        <p class="p">The returned handle value from the intrinsic call can be used as an operand (with a
                           constraint of l) in a PTX inline asm to access the texture or surface memory.
                        </p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="nvvm-specific-intrinsic-functions"><a name="nvvm-specific-intrinsic-functions" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#nvvm-specific-intrinsic-functions" name="nvvm-specific-intrinsic-functions" shape="rect">13.&nbsp;NVVM Specific Intrinsic Functions</a></h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="nvvm-intrin-atomic"><a name="nvvm-intrin-atomic" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-atomic" name="nvvm-intrin-atomic" shape="rect">13.1.&nbsp;Atomic</a></h3>
                     <div class="body conbody">
                        <p class="p">Besides the atomic instructions, the following extra atomic intrinsic functions are supported.</p>
                        <div class="p"><pre xml:space="preserve">
declare float @llvm.nvvm.atomic.load.add.f32.p0f32(float* address, float val)
declare float @llvm.nvvm.atomic.load.add.f32.p1f32(float addrspace(1)* address, float val)
declare float @llvm.nvvm.atomic.load.add.f32.p3f32(float addrspace(3)* address, float val)
declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double* address, double val)
declare double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* address, double val)
declare double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* address, double val)
    </pre></div>
                        <p class="p">reads the single/double precision floating point value <samp class="ph codeph">old</samp> located at the address 
                           <samp class="ph codeph">address</samp>, computes <samp class="ph codeph">old+val</samp>, and stores the result back 
                           to memory at the same address. These operations are performed in one atomic transaction. 
                           The function returns <samp class="ph codeph">old</samp>.
                        </p>
                        <div class="p"><pre xml:space="preserve">
declare i32 @llvm.nvvm.atomic.load.inc.32.p0i32(i32* address, i32 val)
declare i32 @llvm.nvvm.atomic.load.inc.32.p1i32(i32 addrspace(1)* address, i32 val)
declare i32 @llvm.nvvm.atomic.load.inc.32.p3i32(i32 addrspace(3)* address, i32 val)
    </pre></div>
                        <p class="p">reads the 32-bit word <samp class="ph codeph">old</samp> located at the address <samp class="ph codeph">address</samp>, 
                           computes <samp class="ph codeph">((old &gt;= val) ? 0 : (old+1))</samp>, and stores the result back to memory 
                           at the same address. These three operations are performed in one atomic transaction. 
                           The function returns <samp class="ph codeph">old</samp>.
                        </p>
                        <div class="p"><pre xml:space="preserve">
declare i32 @llvm.nvvm.atomic.load.dec.32.p0i32(i32* address, i32 val)
declare i32 @llvm.nvvm.atomic.load.dec.32.p1i32(i32 addrspace(1)* address, i32 val)
declare i32 @llvm.nvvm.atomic.load.dec.32.p3i32(i32 addrspace(3)* address, i32 val)
    </pre></div>
                        <p class="p">reads the 32-bit word <samp class="ph codeph">old</samp> located at the address <samp class="ph codeph">address</samp>,
                           computes <samp class="ph codeph">(((old == 0) | (old &gt; val)) ? val : (old-1) )</samp>, and stores the 
                           result back to memory at the same address. These three operations are performed in 
                           one atomic transaction. The function returns <samp class="ph codeph">old</samp>.
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvvm-intrin-barrier"><a name="nvvm-intrin-barrier" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-barrier" name="nvvm-intrin-barrier" shape="rect">13.2.&nbsp;Barrier and Memory Fence</a></h3>
                     <div class="body conbody">
                        <div class="p"><pre xml:space="preserve">
declare void @llvm.nvvm.barrier0()
    </pre></div>
                        <p class="p">waits until all threads in the thread block have reached this point and all global and shared 
                           memory accesses made by these threads prior to <samp class="ph codeph">llvm.nvvm.barrier0()</samp> are 
                           visible to all threads in the block.
                        </p>
                        <div class="p"><pre xml:space="preserve">
declare i32 @llvm.nvvm.barrier0.popc(i32)
    </pre></div>
                        <p class="p">is identical to <samp class="ph codeph">llvm.nvvm.barrier0()</samp> with the additional feature that it 
                           evaluates predicate for all threads of the block and returns the number of threads 
                           for which predicate evaluates to non-zero.
                        </p>
                        <div class="p"><pre xml:space="preserve">
declare i32 @llvm.nvvm.barrier0.and(i32)
    </pre></div>
                        <p class="p">is identical to <samp class="ph codeph">llvm.nvvm.barrier0()</samp> with the additional feature that it 
                           evaluates predicate for all threads of the block and returns non-zero if and only if 
                           predicate evaluates to non-zero for all of them.
                        </p>
                        <div class="p"><pre xml:space="preserve">
declare i32 @llvm.nvvm.barrier0.or(i32)
    </pre></div>
                        <p class="p">is identical to <samp class="ph codeph">llvm.nvvm.barrier0()</samp> with the additional feature that it 
                           evaluates predicate for all threads of the block and returns non-zero if and only if 
                           predicate evaluates to non-zero for any of them.
                        </p>
                        <div class="p"><pre xml:space="preserve">
declare void @llvm.nvvm.membar.cta()
    </pre></div>
                        <p class="p">is a memory fence at the thread block level.</p>
                        <div class="p"><pre xml:space="preserve">
declare void @llvm.nvvm.membar.gl()
    </pre></div>
                        <p class="p">is a memory fence at the device level.</p>
                        <div class="p"><pre xml:space="preserve">
declare void @llvm.nvvm.membar.sys()
    </pre></div>
                        <p class="p">is a memory fence at the system level.</p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvvm-intrin-addrsp"><a name="nvvm-intrin-addrsp" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-addrsp" name="nvvm-intrin-addrsp" shape="rect">13.3.&nbsp;Address space conversion</a></h3>
                     <div class="body conbody">
                        <div class="note attention"><span class="attentiontitle">Attention:</span> The NVVM address space conversion intrinsics are deprecated and may be removed
                           from future versions of the NVVM IR specification.  Please use the <samp class="ph codeph">addrspacecast</samp>
                           IR instruction instead.
                        </div>
                        <p class="p">The following intrinsic functions are provided to support converting pointers from specific address
                           spaces to the generic address space.
                        </p>
                        <div class="p"><pre xml:space="preserve">
declare i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)*)
declare i8* @llvm.nvvm.ptr.shared.to.gen.p0i8.p3i8(i8 addrspace(3)*)
declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
declare i8* @llvm.nvvm.ptr.local.to.gen.p0i8.p5i8(i8 addrspace(5)*)

declare i16* @llvm.nvvm.ptr.global.to.gen.p0i16.p1i16(i16 addrspace(1)*)
declare i16* @llvm.nvvm.ptr.shared.to.gen.p0i16.p3i16(i16 addrspace(3)*)
declare i16* @llvm.nvvm.ptr.constant.to.gen.p0i16.p4i16(i16 addrspace(4)*)
declare i16* @llvm.nvvm.ptr.local.to.gen.p0i16.p5i16(i16 addrspace(5)*)

declare i32* @llvm.nvvm.ptr.global.to.gen.p0i32.p1i32(i32 addrspace(1)*)
declare i32* @llvm.nvvm.ptr.shared.to.gen.p0i32.p3i32(i32 addrspace(3)*)
declare i32* @llvm.nvvm.ptr.constant.to.gen.p0i32.p4i32(i32 addrspace(4)*)
declare i32* @llvm.nvvm.ptr.local.to.gen.p0i32.p5i32(i32 addrspace(5)*)

declare i64* @llvm.nvvm.ptr.global.to.gen.p0i64.p1i64(i64 addrspace(1)*)
declare i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p3i64(i64 addrspace(3)*)
declare i64* @llvm.nvvm.ptr.constant.to.gen.p0i64.p4i64(i64 addrspace(4)*)
declare i64* @llvm.nvvm.ptr.local.to.gen.p0i64.p5i64(i64 addrspace(5)*)

declare f32* @llvm.nvvm.ptr.global.to.gen.p0f32.p1f32(f32 addrspace(1)*)
declare f32* @llvm.nvvm.ptr.shared.to.gen.p0f32.p3f32(f32 addrspace(3)*)
declare f32* @llvm.nvvm.ptr.constant.to.gen.p0f32.p4f32(f32 addrspace(4)*)
declare f32* @llvm.nvvm.ptr.local.to.gen.p0f32.p5f32(f32 addrspace(5)*)

declare f64* @llvm.nvvm.ptr.global.to.gen.p0f64.p1f64(f64 addrspace(1)*)
declare f64* @llvm.nvvm.ptr.shared.to.gen.p0f64.p3f64(f64 addrspace(3)*)
declare f64* @llvm.nvvm.ptr.constant.to.gen.p0f64.p4f64(f64 addrspace(4)*)
declare f64* @llvm.nvvm.ptr.local.to.gen.p0f64.p5f64(f64 addrspace(5)*)
    </pre></div>
                        <p class="p">The following intrinsic functions are provided to support converting pointers from the generic
                           address spaces to specific address spaces. 
                        </p>
                        <div class="p"><pre xml:space="preserve">
declare i8 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i8.p0i8(i8*)
declare i8 addrspace(3)* @llvm.nvvm.ptr.gen.to.shared.p3i8.p0i8(i8*)
declare i8 addrspace(4)* @llvm.nvvm.ptr.gen.to.constant.p4i8.p0i8(i8*)
declare i8 addrspace(5)* @llvm.nvvm.ptr.gen.to.local.p5i8.p0i8(i8*)

declare i16 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i16.p0i16(i16*)
declare i16 addrspace(3)* @llvm.nvvm.ptr.gen.to.shared.p3i16.p0i16(i16*)
declare i16 addrspace(4)* @llvm.nvvm.ptr.gen.to.constant.p4i16.p0i16(i16*)
declare i16 addrspace(5)* @llvm.nvvm.ptr.gen.to.local.p5i16.p0i16(i16*)

declare i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32*)
declare i32 addrspace(3)* @llvm.nvvm.ptr.gen.to.shared.p3i32.p0i32(i32*)
declare i32 addrspace(4)* @llvm.nvvm.ptr.gen.to.constant.p4i32.p0i32(i32*)
declare i32 addrspace(5)* @llvm.nvvm.ptr.gen.to.local.p5i32.p0i32(i32*)

declare i64 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i64.p0i64(i64*)
declare i64 addrspace(3)* @llvm.nvvm.ptr.gen.to.shared.p3i64.p0i64(i64*)
declare i64 addrspace(4)* @llvm.nvvm.ptr.gen.to.constant.p4i64.p0i64(i64*)
declare i64 addrspace(5)* @llvm.nvvm.ptr.gen.to.local.p5i64.p0i64(i64*)

declare f32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1f32.p0f32(f32*)
declare f32 addrspace(3)* @llvm.nvvm.ptr.gen.to.shared.p3f32.p0f32(f32*)
declare f32 addrspace(4)* @llvm.nvvm.ptr.gen.to.constant.p4f32.p0f32(f32*)
declare f32 addrspace(5)* @llvm.nvvm.ptr.gen.to.local.p5f32.p0f32(f32*)

declare f64 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1f64.p0f64(f64*)
declare f64 addrspace(3)* @llvm.nvvm.ptr.gen.to.shared.p3f64.p0f64(f64*)
declare f64 addrspace(4)* @llvm.nvvm.ptr.gen.to.constant.p4f64.p0f64(f64*)
declare f64 addrspace(5)* @llvm.nvvm.ptr.gen.to.local.p5f64.p0f64(f64*)
    </pre></div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvvm-intrin-spreg"><a name="nvvm-intrin-spreg" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-spreg" name="nvvm-intrin-spreg" shape="rect">13.4.&nbsp;Special Registers</a></h3>
                     <div class="body conbody">
                        <p class="p">The following intrinsic functions are provided to support reading special PTX registers.</p>
                        <div class="p"><pre xml:space="preserve">
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
    </pre></div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvvm-intrin-texture-surface"><a name="nvvm-intrin-texture-surface" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-texture-surface" name="nvvm-intrin-texture-surface" shape="rect">13.5.&nbsp;Texture/Surface Access</a></h3>
                     <div class="body conbody">
                        <p class="p">The following intrinsic function is provided to convert a global texture/surface variable into
                           a texture/surface handle.
                        </p>
                        <div class="p"><pre xml:space="preserve">
declare i64 %llvm.nvvm.texsurf.handle.p1i64(metadata, i64 addrspace(1)*)
    </pre></div>
                        <p class="p">See <a class="xref" href="index.html#accessing-texture-memory-or-surface-memory" shape="rect">Accessing Texture Memory or Surface Memory</a> for details.
                        </p>
                        <p class="p">The following IR definitions apply to all intrinsics in this section:</p>
                        <div class="p"><pre xml:space="preserve">
type %float4 = { float, float, float, float }
type %long2 = { i64, i64 }
type %int4 = { i32, i32, i32, i32 }
type %int2 = { i32, i32 }
type %short4 = { i16, i16, i16, i16 }
type %short2 = { i16, i16 }
</pre></div>
                     </div>
                     <div class="topic concept nested2" id="nvvm-intrin-texture"><a name="nvvm-intrin-texture" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-texture" name="nvvm-intrin-texture" shape="rect">13.5.1.&nbsp;Texture Reads</a></h3>
                        <div class="body conbody">
                           <p class="p">Sampling a 1D texture:</p>
                           <div class="p"><pre xml:space="preserve">
%float4 @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %tex, i32 %x)
%float4 @llvm.nvvm.tex.unified.1d.v4f32.f32(i64 %tex, float %x)
%float4 @llvm.nvvm.tex.unified.1d.level.v4f32.f32(i64 %tex, float %x,
                                                  float %level)
%float4 @llvm.nvvm.tex.unified.1d.grad.v4f32.f32(i64 %tex, float %x,
                                                 float %dPdx,
                                                 float %dPdy)

%int4 @llvm.nvvm.tex.unified.1d.v4s32.s32(i64 %tex, i32 %x)
%int4 @llvm.nvvm.tex.unified.1d.v4s32.f32(i64 %tex, float %x)
%int4 @llvm.nvvm.tex.unified.1d.level.v4s32.f32(i64 %tex, float %x,
                                                float %level)
%int4 @llvm.nvvm.tex.unified.1d.grad.v4s32.f32(i64 %tex, float %x,
                                               float %dPdx,
                                               float %dPdy)

%int4 @llvm.nvvm.tex.unified.1d.v4u32.s32(i64 %tex, i32 %x)
%int4 @llvm.nvvm.tex.unified.1d.v4u32.f32(i64 %tex, float %x)
%int4 @llvm.nvvm.tex.unified.1d.level.v4u32.f32(i64 %tex, float %x,
                                                float %level)
%int4 @llvm.nvvm.tex.unified.1d.grad.v4u32.f32(i64 %tex, float %x,
                                               float %dPdx,
                                               float %dPdy)
    </pre></div>
                           <p class="p">Sampling a 1D texture array:</p>
                           <div class="p"><pre xml:space="preserve">
%float4 @llvm.nvvm.tex.unified.1d.array.v4f32.s32(i64 %tex, i32 %idx, i32 %x)
%float4 @llvm.nvvm.tex.unified.1d.array.v4f32.f32(i64 %tex, i32 %idx, float %x)
%float4 @llvm.nvvm.tex.unified.1d.array.level.v4f32.f32(i64 %tex, i32 %idx,
                                                        float %x,
                                                        float %level)
%float4 @llvm.nvvm.tex.unified.1d.array.grad.v4f32.f32(i64 %tex, i32 %idx,
                                                       float %x,
                                                       float %dPdx,
                                                       float %dPdy)

%int4 @llvm.nvvm.tex.unified.1d.array.v4s32.s32(i64 %tex, i32 %idx, i32 %x)
%int4 @llvm.nvvm.tex.unified.1d.array.v4s32.f32(i64 %tex, i32 %idx, float %x)
%int4 @llvm.nvvm.tex.unified.1d.array.level.v4s32.f32(i64 %tex, i32 %idx,
                                                      float %x,
                                                      float %level)
%int4 @llvm.nvvm.tex.unified.1d.array.grad.v4s32.f32(i64 %tex, i32 %idx,
                                                     float %x,
                                                     float %dPdx,
                                                     float %dPdy)

%int4 @llvm.nvvm.tex.unified.1d.array.v4u32.s32(i64 %tex, i32 %idx, i32 %x)
%int4 @llvm.nvvm.tex.unified.1d.array.v4u32.f32(i64 %tex, i32 %idx, float %x)
%int4 @llvm.nvvm.tex.unified.1d.array.level.v4u32.f32(i64 %tex, i32 %idx,
                                                      float %x,
                                                      float %level)
%int4 @llvm.nvvm.tex.unified.1d.array.grad.v4u32.f32(i64 %tex, i32 %idx,
                                                     float %x,
                                                     float %dPdx,
                                                     float %dPdy)
    </pre></div>
                           <p class="p">Sampling a 2D texture:</p>
                           <div class="p"><pre xml:space="preserve">
%float4 @llvm.nvvm.tex.unified.2d.v4f32.s32(i64 %tex, i32 %x, i32 %y)
%float4 @llvm.nvvm.tex.unified.2d.v4f32.f32(i64 %tex, float %x, float %y)
%float4 @llvm.nvvm.tex.unified.2d.level.v4f32.f32(i64 %tex, float %x, float %y,
                                                  float %level)
%float4 @llvm.nvvm.tex.unified.2d.grad.v4f32.f32(i64 %tex, float %x, float %y,
                                                 float %dPdx_x, float %dPdx_y,
                                                 float %dPdy_x, float %dPdy_y)

%int4 @llvm.nvvm.tex.unified.2d.v4s32.s32(i64 %tex, i32 %x, i32 %y)
%int4 @llvm.nvvm.tex.unified.2d.v4s32.f32(i64 %tex, float %x, float %y,)
%int4 @llvm.nvvm.tex.unified.2d.level.v4s32.f32(i64 %tex, float %x, float %y,
                                                float %level)
%int4 @llvm.nvvm.tex.unified.2d.grad.v4s32.f32(i64 %tex, float %x, float %y,
                                               float %dPdx_x, float %dPdx_y,
                                               float %dPdy_x, float %dPdy_y)

%int4 @llvm.nvvm.tex.unified.2d.v4u32.s32(i64 %tex, i32 %x i32 %y)
%int4 @llvm.nvvm.tex.unified.2d.v4u32.f32(i64 %tex, float %x float %y)
%int4 @llvm.nvvm.tex.unified.2d.level.v4u32.f32(i64 %tex, float %x, float %y,
                                                float %level)
%int4 @llvm.nvvm.tex.unified.2d.grad.v4u32.f32(i64 %tex, float %x, float %y,
                                               float %dPdx_x, float %dPdx_y,
                                               float %dPdy_x, float %dPdy_y)
    </pre></div>
                           <p class="p">Sampling a 2D texture array:</p>
                           <div class="p"><pre xml:space="preserve">
%float4 @llvm.nvvm.tex.unified.2d.array.v4f32.s32(i64 %tex, i32 %idx,
                                                  i32 %x, i32 %y)
%float4 @llvm.nvvm.tex.unified.2d.array.v4f32.f32(i64 %tex, i32 %idx,
                                                  float %x, float %y)
%float4 @llvm.nvvm.tex.unified.2d.array.level.v4f32.f32(i64 %tex, i32 %idx,
                                                        float %x, float %y,
                                                        float %level)
%float4 @llvm.nvvm.tex.unified.2d.array.grad.v4f32.f32(i64 %tex, i32 %idx,
                                                       float %x, float %y,
                                                       float %dPdx_x,
                                                       float %dPdx_y,
                                                       float %dPdy_x,
                                                       float %dPdy_y)

%int4 @llvm.nvvm.tex.unified.2d.array.v4s32.s32(i64 %tex, i32 %idx,
                                                i32 %x, i32 %y)
%int4 @llvm.nvvm.tex.unified.2d.array.v4s32.f32(i64 %tex, i32 %idx,
                                                float %x, float %y)
%int4 @llvm.nvvm.tex.unified.2d.array.level.v4s32.f32(i64 %tex, i32 %idx,
                                                      float %x, float %y,
                                                      float %level)
%int4 @llvm.nvvm.tex.unified.2d.array.grad.v4s32.f32(i64 %tex, i32 %idx,
                                                     float %x, float %y,
                                                     float %dPdx_x,
                                                     float %dPdx_y,
                                                     float %dPdy_x,
                                                     float %dPdy_y)

%int4 @llvm.nvvm.tex.unified.2d.array.v4u32.s32(i64 %tex, i32 %idx,
                                                i32 %x i32 %y)
%int4 @llvm.nvvm.tex.unified.2d.array.v4u32.f32(i64 %tex, i32 %idx,
                                                float %x float %y)
%int4 @llvm.nvvm.tex.unified.2d.array.level.v4u32.f32(i64 %tex, i32 %idx,
                                                      float %x, float %y,
                                                      float %level)
%int4 @llvm.nvvm.tex.unified.2d.array.grad.v4u32.f32(i64 %tex, i32 %idx,
                                                     float %x, float %y,
                                                     float %dPdx_x,
                                                     float %dPdx_y,
                                                     float %dPdy_x,
                                                     float %dPdy_y)
    </pre></div>
                           <p class="p">Sampling a 3D texture:</p>
                           <div class="p"><pre xml:space="preserve">
%float4 @llvm.nvvm.tex.unified.3d.v4f32.s32(i64 %tex, i32 %x, i32 %y, i32 %z)
%float4 @llvm.nvvm.tex.unified.3d.v4f32.f32(i64 %tex, float %x, float %y,
                                            float %z)
%float4 @llvm.nvvm.tex.unified.3d.level.v4f32.f32(i64 %tex,float %x, float %y,
                                                  float %z, float %level)
%float4 @llvm.nvvm.tex.unified.3d.grad.v4f32.f32(i64 %tex, float %x, float %y,
                                                 float %z, float %dPdx_x,
                                                 float %dPdx_y, float %dPdx_z,
                                                 float %dPdy_x, float %dPdy_y,
                                                 float %dPdy_z)

%int4 @llvm.nvvm.tex.unified.3d.v4s32.s32(i64 %tex, i32 %x, i32 %y, i32 %z)
%int4 @llvm.nvvm.tex.unified.3d.v4s32.f32(i64 %tex, float %x, float %y,
                                          float %z)
%int4 @llvm.nvvm.tex.unified.3d.level.v4s32.f32(i64 %tex, float %x, float %y,
                                                float %z, float %level)
%int4 @llvm.nvvm.tex.unified.3d.grad.v4s32.f32(i64 %tex, float %x, float %y,
                                               float %z, float %dPdx_x,
                                               float %dPdx_y, float %dPdx_z,
                                               float %dPdy_x, float %dPdy_y,
                                               float %dPdy_z)

%int4 @llvm.nvvm.tex.unified.3d.v4u32.s32(i64 %tex, i32 %x i32 %y, i32 %z)
%int4 @llvm.nvvm.tex.unified.3d.v4u32.f32(i64 %tex, float %x, float %y,
                                          float %z)
%int4 @llvm.nvvm.tex.unified.3d.level.v4u32.f32(i64 %tex, float %x, float %y,
                                                float %z, float %level)
%int4 @llvm.nvvm.tex.unified.3d.grad.v4u32.f32(i64 %tex, float %x, float %y,
                                               float %z, float %dPdx_x,
                                               float %dPdx_y, float %dPdx_z,
                                               float %dPdy_x, float %dPdy_y,
                                               float %dPdy_z)
    </pre></div>
                           <p class="p">Sampling a cube texture:</p>
                           <div class="p"><pre xml:space="preserve">
%float4 @llvm.nvvm.tex.unified.cube.v4f32.f32(i64 %tex, float %x, float %y,
                                              float %z)
%float4 @llvm.nvvm.tex.unified.cube.level.v4f32.f32(i64 %tex,float %x, float %y,
                                                    float %z, float %level)

%int4 @llvm.nvvm.tex.unified.cube.v4s32.f32(i64 %tex, float %x, float %y,
                                            float %z)
%int4 @llvm.nvvm.tex.unified.cube.level.v4s32.f32(i64 %tex, float %x, float %y,
                                                  float %z, float %level)

%int4 @llvm.nvvm.tex.unified.cube.v4u32.f32(i64 %tex, float %x, float %y,
                                            float %z)
%int4 @llvm.nvvm.tex.unified.cube.level.v4u32.f32(i64 %tex, float %x, float %y,
                                                  float %z, float %level)
    </pre></div>
                           <p class="p">Sampling a cube texture array:</p>
                           <div class="p"><pre xml:space="preserve">
%float4 @llvm.nvvm.tex.unified.cube.array.v4f32.f32(i64 %tex, i32 %idx,
                                                    float %x, float %y,
                                                    float %z)
%float4 @llvm.nvvm.tex.unified.cube.array.level.v4f32.f32(i64 %tex, i32 %idx,
                                                          float %x, float %y,
                                                          float %z,
                                                          float %level)

%int4 @llvm.nvvm.tex.unified.cube.array.v4s32.f32(i64 %tex, i32 %idx, float %x,
                                                  float %y, float %z)
%int4 @llvm.nvvm.tex.unified.cube.array.level.v4s32.f32(i64 %tex, i32 %idx,
                                                        float %x, float %y,
                                                        float %z, float %level)

%int4 @llvm.nvvm.tex.unified.cube.array.v4u32.f32(i64 %tex, i32 %idx, float %x,
                                                  float %y, float %z)
%int4 @llvm.nvvm.tex.unified.cube.array.level.v4u32.f32(i64 %tex, i32 %idx,
                                                        float %x, float %y,
                                                        float %z, float %level)
    </pre></div>
                           <p class="p">Fetching a four-texel bilerp footprint:</p>
                           <div class="p"><pre xml:space="preserve">
%float4 @llvm.nvvm.tld4.unified.r.2d.v4f32.f32(i64 %tex, float %x, float %y)
%float4 @llvm.nvvm.tld4.unified.g.2d.v4f32.f32(i64 %tex, float %x, float %y)
%float4 @llvm.nvvm.tld4.unified.b.2d.v4f32.f32(i64 %tex, float %x, float %y)
%float4 @llvm.nvvm.tld4.unified.a.2d.v4f32.f32(i64 %tex, float %x, float %y)

%int4 @llvm.nvvm.tld4.unified.r.2d.v4s32.f32(i64 %tex, float %x, float %y)
%int4 @llvm.nvvm.tld4.unified.g.2d.v4s32.f32(i64 %tex, float %x, float %y)
%int4 @llvm.nvvm.tld4.unified.b.2d.v4s32.f32(i64 %tex, float %x, float %y)
%int4 @llvm.nvvm.tld4.unified.a.2d.v4s32.f32(i64 %tex, float %x, float %y)

%int4 @llvm.nvvm.tld4.unified.r.2d.v4u32.f32(i64 %tex, float %x, float %y)
%int4 @llvm.nvvm.tld4.unified.g.2d.v4u32.f32(i64 %tex, float %x, float %y)
%int4 @llvm.nvvm.tld4.unified.b.2d.v4u32.f32(i64 %tex, float %x, float %y)
%int4 @llvm.nvvm.tld4.unified.a.2d.v4u32.f32(i64 %tex, float %x, float %y)
    </pre></div>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="nvvm-intrin-surface-load"><a name="nvvm-intrin-surface-load" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-surface-load" name="nvvm-intrin-surface-load" shape="rect">13.5.2.&nbsp;Surface Loads</a></h3>
                        <div class="body conbody">
                           <p class="p">In the following intrinsics, <samp class="ph codeph">&lt;clamp&gt;</samp> represents the surface clamp mode
                              and can be one of the following: <samp class="ph codeph">clamp</samp>, <samp class="ph codeph">trap</samp>, or
                              <samp class="ph codeph">zero</samp>.
                           </p>
                           <p class="p">For surface load instructions that operate on 8-bit data channels, the output operands are
                              of type <samp class="ph codeph">i16</samp>.  The high-order eight bits are undefined.
                           </p>
                           <p class="p">Reading a 1D surface:</p>
                           <div class="p"><pre xml:space="preserve">
i16 @llvm.nvvm.suld.1d.i8.&lt;clamp&gt;(i64 %tex, i32 %x)
i16 @llvm.nvvm.suld.1d.i16.&lt;clamp&gt;(i64 %tex, i32 %x)
i32 @llvm.nvvm.suld.1d.i32.&lt;clamp&gt;(i64 %tex, i32 %x)
i64 @llvm.nvvm.suld.1d.i64.&lt;clamp&gt;(i64 %tex, i32 %x)

%short2 @llvm.nvvm.suld.1d.v2i8.&lt;clamp&gt;(i64 %tex, i32 %x)
%short2 @llvm.nvvm.suld.1d.v2i16.&lt;clamp&gt;(i64 %tex, i32 %x)
%int2 @llvm.nvvm.suld.1d.v2i32.&lt;clamp&gt;(i64 %tex, i32 %x)
%long2 @llvm.nvvm.suld.1d.v2i64.&lt;clamp&gt;(i64 %tex, i32 %x)

%short4 @llvm.nvvm.suld.1d.v4i8.&lt;clamp&gt;(i64 %tex, i32 %x)
%short4 @llvm.nvvm.suld.1d.v4i16.&lt;clamp&gt;(i64 %tex, i32 %x)
%int4 @llvm.nvvm.suld.1d.v4i32.&lt;clamp&gt;(i64 %tex, i32 %x)
    </pre></div>
                           <p class="p">Reading a 1D surface array:</p>
                           <div class="p"><pre xml:space="preserve">
i16 @llvm.nvvm.suld.1d.array.i8.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)
i16 @llvm.nvvm.suld.1d.array.i16.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)
i32 @llvm.nvvm.suld.1d.array.i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)
i64 @llvm.nvvm.suld.1d.array.i64.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)

%short2 @llvm.nvvm.suld.1d.array.v2i8.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)
%short2 @llvm.nvvm.suld.1d.array.v2i16.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)
%int2 @llvm.nvvm.suld.1d.array.v2i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)
%long2 @llvm.nvvm.suld.1d.array.v2i64.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)

%short4 @llvm.nvvm.suld.1d.array.v4i8.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)
%short4 @llvm.nvvm.suld.1d.array.v4i16.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)
%int4 @llvm.nvvm.suld.1d.array.v4i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x)
    </pre></div>
                           <p class="p">Reading a 2D surface:</p>
                           <div class="p"><pre xml:space="preserve">
i16 @llvm.nvvm.suld.2d.i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)
i16 @llvm.nvvm.suld.2d.i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)
i32 @llvm.nvvm.suld.2d.i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)
i64 @llvm.nvvm.suld.2d.i64.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)

%short2 @llvm.nvvm.suld.2d.v2i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)
%short2 @llvm.nvvm.suld.2d.v2i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)
%int2 @llvm.nvvm.suld.2d.v2i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)
%long2 @llvm.nvvm.suld.2d.v2i64.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)

%short4 @llvm.nvvm.suld.2d.v4i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)
%short4 @llvm.nvvm.suld.2d.v4i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)
%int4 @llvm.nvvm.suld.2d.v4i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y)
    </pre></div>
                           <p class="p">Reading a 2D surface array:</p>
                           <div class="p"><pre xml:space="preserve">
i16 @llvm.nvvm.suld.2d.array.i8.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x, i32 %y)
i16 @llvm.nvvm.suld.2d.array.i16.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x, i32 %y)
i32 @llvm.nvvm.suld.2d.array.i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x, i32 %y)
i64 @llvm.nvvm.suld.2d.array.i64.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x, i32 %y)

%short2 @llvm.nvvm.suld.2d.array.v2i8.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y)
%short2 @llvm.nvvm.suld.2d.array.v2i16.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                               i32 %x, i32 %y)
%int2 @llvm.nvvm.suld.2d.array.v2i32.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                             i32 %x, i32 %y)
%long2 @llvm.nvvm.suld.2d.array.v2i64.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y)

%short4 @llvm.nvvm.suld.2d.array.v4i8.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y)
%short4 @llvm.nvvm.suld.2d.array.v4i16.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                               i32 %x, i32 %y)
%int4 @llvm.nvvm.suld.2d.array.v4i32.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                             i32 %x, i32 %y)
    </pre></div>
                           <p class="p">Reading a 3D surface:</p>
                           <div class="p"><pre xml:space="preserve">
i16 @llvm.nvvm.suld.3d.i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z)
i16 @llvm.nvvm.suld.3d.i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z)
i32 @llvm.nvvm.suld.3d.i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z)
i64 @llvm.nvvm.suld.3d.i64.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z)

%short2 @llvm.nvvm.suld.3d.v2i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z)
%short2 @llvm.nvvm.suld.3d.v2i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z)
%int2 @llvm.nvvm.suld.3d.v2i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z)
%long2 @llvm.nvvm.suld.3d.v2i64.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z)

%short4 @llvm.nvvm.suld.3d.v4i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                        i32 %z)
%short4 @llvm.nvvm.suld.3d.v4i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                         i32 %z)
%int4 @llvm.nvvm.suld.3d.v4i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                       i32 %z)
    </pre></div>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="unique_1950908514"><a name="unique_1950908514" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#unique_1950908514" name="unique_1950908514" shape="rect">Surface Stores</a></h3>
                        <div class="body conbody">
                           <p class="p">In the following intrinsics, <samp class="ph codeph">&lt;clamp&gt;</samp> represents the surface clamp mode.
                              It is <samp class="ph codeph">trap</samp> for the formatted stores, and can be one
                              of the following for unformatted stores: <samp class="ph codeph">clamp</samp>, <samp class="ph codeph">trap</samp>, or
                              <samp class="ph codeph">zero</samp>.
                           </p>
                           <p class="p">For surface store instructions that operate on 8-bit data channels, the input operands are
                              of type <samp class="ph codeph">i16</samp>.  The high-order eight bits are ignored.
                           </p>
                           <p class="p">Writing a 1D surface:</p>
                           <div class="p"><pre xml:space="preserve">
;; Unformatted
void @llvm.nvvm.sust.b.1d.i8.&lt;clamp&gt;(i64 %tex, i32 %x, i16 %r)
void @llvm.nvvm.sust.b.1d.i16.&lt;clamp&gt;(i64 %tex, i32 %x, i16 %r)
void @llvm.nvvm.sust.b.1d.i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %r)
void @llvm.nvvm.sust.b.1d.i64.&lt;clamp&gt;(i64 %tex, i32 %x, i64 %r)

void @llvm.nvvm.sust.b.1d.v2i8.&lt;clamp&gt;(i64 %tex, i32 %x, i16 %r, i16 %g)
void @llvm.nvvm.sust.b.1d.v2i16.&lt;clamp&gt;(i64 %tex, i32 %x, i16 %r, i16 %g)
void @llvm.nvvm.sust.b.1d.v2i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %r, i32 %g)
void @llvm.nvvm.sust.b.1d.v2i64.&lt;clamp&gt;(i64 %tex, i32 %x, i64 %r, i64 %g)

void @llvm.nvvm.sust.b.1d.v4i8.&lt;clamp&gt;(i64 %tex, i32 %x,
                                       i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.1d.v4i16.&lt;clamp&gt;(i64 %tex, i32 %x,
                                        i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.1d.v4i32.&lt;clamp&gt;(i64 %tex, i32 %x,
                                        i32 %r, i32 %g, i32 %b, i32 %a)

;; Formatted
void @llvm.nvvm.sust.p.1d.i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %r)

void @llvm.nvvm.sust.p.1d.v2i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %r, i32 %g)

void @llvm.nvvm.sust.p.1d.v4i32.&lt;clamp&gt;(i64 %tex, i32 %x,
                                        i32 %r, i32 %g, i32 %b, i32 %a)
    </pre></div>
                           <p class="p">Writing a 1D surface array:</p>
                           <div class="p"><pre xml:space="preserve">
;; Unformatted
void @llvm.nvvm.sust.b.1d.array.i8.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                                 i16 %r)
void @llvm.nvvm.sust.b.1d.array.i16.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                                  i16 %r)
void @llvm.nvvm.sust.b.1d.array.i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                                  i32 %r)
void @llvm.nvvm.sust.b.1d.array.i64.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                                  i64 %r)

void @llvm.nvvm.sust.b.1d.array.v2i8.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                             i16 %r, i16 %g)
void @llvm.nvvm.sust.b.1d.array.v2i16.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                              i16 %r, i16 %g)
void @llvm.nvvm.sust.b.1d.array.v2i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                              i32 %r, i32 %g)
void @llvm.nvvm.sust.b.1d.array.v2i64.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                              i64 %r, i64 %g)

void @llvm.nvvm.sust.b.1d.array.v4i8.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                             i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.1d.array.v4i16.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                              i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.1d.array.v4i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                              i32 %r, i32 %g, i32 %b, i32 %a)

;; Formatted
void @llvm.nvvm.sust.p.1d.array.i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                                  i32 %r)

void @llvm.nvvm.sust.p.1d.array.v2i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                              i32 %r, i32 %g)

void @llvm.nvvm.sust.p.1d.array.v4i32.&lt;clamp&gt;(i64 %tex, i32 %idx, i32 %x,
                                              i32 %r, i32 %g, i32 %b, i32 %a)
    </pre></div>
                           <p class="p">Writing a 2D surface:</p>
                           <div class="p"><pre xml:space="preserve">
;; Unformatted
void @llvm.nvvm.sust.b.2d.i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i16 %r)
void @llvm.nvvm.sust.b.2d.i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i16 %r)
void @llvm.nvvm.sust.b.2d.i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %r)
void @llvm.nvvm.sust.b.2d.i64.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i64 %r)

void @llvm.nvvm.sust.b.2d.v2i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                       i16 %r, i16 %g)
void @llvm.nvvm.sust.b.2d.v2i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                        i16 %r, i16 %g)
void @llvm.nvvm.sust.b.2d.v2i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                        i32 %r, i32 %g)
void @llvm.nvvm.sust.b.2d.v2i64.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                        i64 %r, i64 %g)

void @llvm.nvvm.sust.b.2d.v4i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                       i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.2d.v4i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                        i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.2d.v4i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                        i32 %r, i32 %g, i32 %b, i32 %a)

;; Formatted
void @llvm.nvvm.sust.p.2d.i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %r)

void @llvm.nvvm.sust.p.2d.v2i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                        i32 %r, i32 %g)

void @llvm.nvvm.sust.p.2d.v4i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y,
                                        i32 %r, i32 %g, i32 %b, i32 %a)
    </pre></div>
                           <p class="p">Writing a 2D surface array:</p>
                           <div class="p"><pre xml:space="preserve">
;; Unformatted
void @llvm.nvvm.sust.b.2d.array.i8.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                           i32 %x, i32 %y, i16 %r)
void @llvm.nvvm.sust.b.2d.array.i16.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                            i32 %x, i32 %y, i16 %r)
void @llvm.nvvm.sust.b.2d.array.i32.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                            i32 %x, i32 %y, i32 %r)
void @llvm.nvvm.sust.b.2d.array.i64.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                            i32 %x, i32 %y, i64 %r)

void @llvm.nvvm.sust.b.2d.array.v2i8.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                             i32 %x, i32 %y,
                                             i16 %r, i16 %g)
void @llvm.nvvm.sust.b.2d.array.v2i16.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y,
                                              i16 %r, i16 %g)
void @llvm.nvvm.sust.b.2d.array.v2i32.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y,
                                              i32 %r, i32 %g)
void @llvm.nvvm.sust.b.2d.array.v2i64.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y,
                                              i64 %r, i64 %g)

void @llvm.nvvm.sust.b.2d.array.v4i8.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                             i32 %x, i32 %y,
                                             i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.2d.array.v4i16.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y,
                                              i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.2d.array.v4i32.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y,
                                              i32 %r, i32 %g, i32 %b, i32 %a)

;; Formatted
void @llvm.nvvm.sust.p.2d.array.i32.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                            i32 %x, i32 %y, i32 %r)

void @llvm.nvvm.sust.p.2d.array.v2i32.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y,
                                              i32 %r, i32 %g)

void @llvm.nvvm.sust.p.2d.array.v4i32.&lt;clamp&gt;(i64 %tex, i32 %idx,
                                              i32 %x, i32 %y,
                                              i32 %r, i32 %g, i32 %b, i32 %a)
    </pre></div>
                           <p class="p">Writing a 3D surface:</p>
                           <div class="p"><pre xml:space="preserve">
;; Unformatted
void @llvm.nvvm.sust.b.3d.i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z, i16 %r)
void @llvm.nvvm.sust.b.3d.i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z, i16 %r)
void @llvm.nvvm.sust.b.3d.i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z, i32 %r)
void @llvm.nvvm.sust.b.3d.i64.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z, i64 %r)

void @llvm.nvvm.sust.b.3d.v2i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z,
                                       i16 %r, i16 %g)
void @llvm.nvvm.sust.b.3d.v2i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z,
                                        i16 %r, i16 %g)
void @llvm.nvvm.sust.b.3d.v2i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z,
                                        i32 %r, i32 %g)
void @llvm.nvvm.sust.b.3d.v2i64.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z,
                                        i64 %r, i64 %g)

void @llvm.nvvm.sust.b.3d.v4i8.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z,
                                       i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.3d.v4i16.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z,
                                        i16 %r, i16 %g, i16 %b, i16 %a)
void @llvm.nvvm.sust.b.3d.v4i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z,
                                        i32 %r, i32 %g, i32 %b, i32 %a)

;; Formatted
void @llvm.nvvm.sust.p.3d.i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z, i32 %r)

void @llvm.nvvm.sust.p.3d.v2i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z,
                                        i32 %r, i32 %g)

void @llvm.nvvm.sust.p.3d.v4i32.&lt;clamp&gt;(i64 %tex, i32 %x, i32 %y, i32 %z,
                                        i32 %r, i32 %g, i32 %b, i32 %a)
    </pre></div>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvvm-intrin-warp-level"><a name="nvvm-intrin-warp-level" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-warp-level" name="nvvm-intrin-warp-level" shape="rect">13.6.&nbsp;Warp-level Operations</a></h3>
                     <div class="body conbody"></div>
                     <div class="topic concept nested2" id="nvvm-intrin-warp-level-sync"><a name="nvvm-intrin-warp-level-sync" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-warp-level-sync" name="nvvm-intrin-warp-level-sync" shape="rect">13.6.1.&nbsp;Barrier Synchronization</a></h3>
                        <div class="body conbody">
                           <p class="p">The following intrinsic performs a barrier synchronization among a subset of threads in a warp.</p>
                           <div class="p"><pre xml:space="preserve">
declare void @llvm.nvvm.bar.warp.sync(i32 %membermask)
</pre></div>
                           <p class="p">This intrinsic causes executing thread to wait until all threads corresponding
                              to <samp class="ph codeph">%membermask</samp> have executed the same intrinsic with the same 
                              <samp class="ph codeph">%membermask</samp> value before resuming execution.
                           </p>
                           <p class="p">The argument <samp class="ph codeph">%membership</samp> is a 32bit mask, with each bit corresponding
                              to a lane in the warp. 1 means the thread is in the subset.
                           </p>
                           <p class="p">The behavior of this intrinsic is undefined if any thread participating in
                              the intrinsic has exited or the executing thread is not in the <samp class="ph codeph">%membermask</samp>.
                           </p>
                           <p class="p">For <samp class="ph codeph">compute_62</samp> or below, all threads in <samp class="ph codeph">%membermask</samp> must call the same 
                              <samp class="ph codeph">@llvm.nvvm.bar.warp.sync()</samp> in convergence, and only threads belonging to the 
                              <samp class="ph codeph">%membermask</samp> can be active when the intrinsic is called. Otherwise, the behavior is undefined.
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="nvvm-intrin-warp-level-datamove"><a name="nvvm-intrin-warp-level-datamove" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-warp-level-datamove" name="nvvm-intrin-warp-level-datamove" shape="rect">13.6.2.&nbsp;Data Movement</a></h3>
                        <div class="body conbody">
                           <p class="p">The following intrinsic synchronizes a subset of threads in a warp and then 
                              performs data movement among these threads.
                           </p>
                           <div class="p"><pre xml:space="preserve">
declare {i32, i1} @llvm.nvvm.shfl.sync.i32(i32 %membermask, i32 %mode, i32 %a, i32 %b, i32 %c)
</pre></div>
                           <p class="p">This intrinsic causes executing thread to wait until all threads corresponding
                              to <samp class="ph codeph">%membermask</samp> have executed the same intrinsic with the same 
                              <samp class="ph codeph">%membermask</samp> value before reading data from other threads in the same warp.
                           </p>
                           <p class="p">The argument <samp class="ph codeph">%membership</samp> is a 32bit mask, with each bit corresponding
                              to a lane in the warp. 1 means the thread is in the subset.
                           </p>
                           <p class="p">Each thread in the currently executing warp will compute a source lane index j based
                              on input arguments <samp class="ph codeph">%b</samp>, <samp class="ph codeph">%c</samp>, and <samp class="ph codeph">%mode</samp>. 
                              If the computed source lane index j is in range,
                              the returned <samp class="ph codeph">i32</samp> value will be the value of <samp class="ph codeph">%a</samp> from 
                              lane j; otherwise, it will be the the value of <samp class="ph codeph">%a</samp> from the current thread. 
                              If the thread corresponding to lane j is inactive, then the returned <samp class="ph codeph">i32</samp> value 
                              is undefined. The returned <samp class="ph codeph">i1</samp> value is set to 1 if the source lane 
                              j is in range, and otherwise set to 0.
                           </p>
                           <p class="p">The argument <samp class="ph codeph">%mode</samp> must be a constant and its encoding is specified
                              in the following table. 
                           </p>
                           <div class="p">
                              <div class="tablenoborder">
                                 <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                                    <thead class="thead" align="left">
                                       <tr class="row">
                                          <th class="entry" valign="top" id="d54e3902" rowspan="1" colspan="1">Encoding</th>
                                          <th class="entry" valign="top" id="d54e3905" rowspan="1" colspan="1">Meaning</th>
                                       </tr>
                                    </thead>
                                    <tbody class="tbody">
                                       <tr class="row">
                                          <td class="entry" valign="top" headers="d54e3902 d54e3905" rowspan="1" colspan="1">0</td>
                                          <td class="entry" valign="top" headers="d54e3902 d54e3905" rowspan="1" colspan="1">IDX</td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" headers="d54e3902 d54e3905" rowspan="1" colspan="1">1</td>
                                          <td class="entry" valign="top" headers="d54e3902 d54e3905" rowspan="1" colspan="1">UP</td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" headers="d54e3902 d54e3905" rowspan="1" colspan="1">2</td>
                                          <td class="entry" valign="top" headers="d54e3902 d54e3905" rowspan="1" colspan="1">DOWN</td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" headers="d54e3902 d54e3905" rowspan="1" colspan="1">3</td>
                                          <td class="entry" valign="top" headers="d54e3902 d54e3905" rowspan="1" colspan="1">BFLY</td>
                                       </tr>
                                    </tbody>
                                 </table>
                              </div>
                           </div>
                           <p class="p">Argument <samp class="ph codeph">%b</samp> specifies a source lane or source lane offset, depending on 
                              <samp class="ph codeph">%mode</samp>.
                           </p>
                           <p class="p">Argument <samp class="ph codeph">%c</samp> contains two packed values specifying a mask for logically splitting warps
                              into sub-segments and an upper bound for clamping the source lane index.
                           </p>
                           <p class="p">The following pseudo code illustrates the semantics of this intrinsic. </p>
                           <div class="p"><pre xml:space="preserve">
wait until all threads in %membermask have arrived;

%lane[4:0] = current_lane_id; // position of thread in warp
%bval[4:0] = %b[4:0]; // source lane or lane offset (0..31)
%cval[4:0] = %c[4:0]; // clamp value
%mask[4:0] = %c[12:8];

%maxLane = (%lane[4:0] &amp; %mask[4:0]) | (%cval[4:0] &amp; ~%mask[4:0]);
%minLane = (%lane[4:0] &amp; %mask[4:0]);
switch (%mode) {
case UP: %j = %lane - %bval; %pval = (%j &gt;= %maxLane); break;
case DOWN: %j = %lane + %bval; %pval = (%j &lt;= %maxLane); break;
case BFLY: %j = %lane ^ %bval; %pval = (%j &lt;= %maxLane); break;
case IDX: %j = %minLane | (%bval[4:0] &amp; ~%mask[4:0]); %pval = (%j &lt;= %maxLane); break;
}
if (!%pval) %j = %lane; // copy from own lane
if (thread at lane %j is active)
   %d = %a from lane %j
else
   %d = undef
return {%d, %pval}
</pre></div>
                           <p class="p">Note that the return values are undefined if the thread at the source lane is not in
                              <samp class="ph codeph">%membermask</samp>.
                           </p>
                           <p class="p">The behavior of this intrinsic is undefined if any thread participating in
                              the intrinsic has exited or the executing thread is not in the <samp class="ph codeph">%membermask</samp>.
                           </p>
                           <p class="p">For <samp class="ph codeph">compute_62</samp> or below, all threads in <samp class="ph codeph">%membermask</samp> must call the same 
                              <samp class="ph codeph">@llvm.nvvm.shfl.sync.i32()</samp> in convergence, and only threads belonging to the 
                              <samp class="ph codeph">%membermask</samp> can be active when the intrinsic is called. Otherwise, the behavior is undefined.
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="nvvm-intrin-warp-level-vote"><a name="nvvm-intrin-warp-level-vote" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-warp-level-vote" name="nvvm-intrin-warp-level-vote" shape="rect">13.6.3.&nbsp;Vote</a></h3>
                        <div class="body conbody">
                           <p class="p">The following intrinsic synchronizes a subset of threads in a warp and then 
                              performs a reduce-and-broadcast of a predicate over all threads in the subset.
                           </p>
                           <div class="p"><pre xml:space="preserve">
declare {i32, i1} @llvm.nvvm.vote.sync(i32 %membermask, i32 %mode, i1 %predicate)
</pre></div>
                           <p class="p">This intrinsic causes executing thread to wait until all threads corresponding
                              to <samp class="ph codeph">%membermask</samp> have executed the same intrinsic with the same 
                              <samp class="ph codeph">%membermask</samp> value before
                              performing a reduce-and-broadcast of a predicate over all threads in the subset.
                           </p>
                           <p class="p">The argument <samp class="ph codeph">%membermask</samp> is a 32-bit mask, with each bit corresponding
                              to a lane in the warp. 1 means the thread is in the subset.
                           </p>
                           <p class="p"><samp class="ph codeph">@llvm.nvvm.vote.sync()</samp> performs a reduction of the source 
                              <samp class="ph codeph">%predicate</samp> across 
                              all threads in <samp class="ph codeph">%membermask</samp> after the synchronization. The return value is
                              the same across all threads in the <samp class="ph codeph">%membermask</samp>. The element in the
                              returned aggregate that holds the return value depends on <samp class="ph codeph">%mode</samp>. 
                           </p>
                           <p class="p">The argument <samp class="ph codeph">%mode</samp> must be a constant and its encoding is specified
                              in the following table. 
                           </p>
                           <div class="p">
                              <div class="tablenoborder">
                                 <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                                    <thead class="thead" align="left">
                                       <tr class="row">
                                          <th class="entry" valign="top" id="d54e4054" rowspan="1" colspan="1">Encoding</th>
                                          <th class="entry" valign="top" id="d54e4057" rowspan="1" colspan="1">Meaning</th>
                                          <th class="entry" valign="top" id="d54e4059" rowspan="1" colspan="1">return value</th>
                                       </tr>
                                    </thead>
                                    <tbody class="tbody">
                                       <tr class="row">
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1">0</td>
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1">ALL</td>
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1"><samp class="ph codeph">i1:</samp>1 if the source predicates is 1 for all thread in <samp class="ph codeph">%membermask</samp>, 0 otherwise
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1">1</td>
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1">ANY</td>
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1"><samp class="ph codeph">i1:</samp>1 if the source predicate is 1 for any thread in <samp class="ph codeph">%membermask</samp>, 0 otherwise
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1">2</td>
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1">EQ</td>
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1"><samp class="ph codeph">i1:</samp>1 if the source predicates are the same for all thread in <samp class="ph codeph">%membermask</samp>, 0 otherwise
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1">3</td>
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1">BALLOT</td>
                                          <td class="entry" valign="top" headers="d54e4054 d54e4057 d54e4059" rowspan="1" colspan="1"><samp class="ph codeph">i32:</samp>ballot data, containing the <samp class="ph codeph">%predicate</samp> value from each thread in <samp class="ph codeph">%membermask</samp></td>
                                       </tr>
                                    </tbody>
                                 </table>
                              </div>
                           </div>
                           <p class="p">
                              For the <samp class="ph codeph">BALLOT</samp> mode, the <samp class="ph codeph">i32</samp> value represents the ballot data, which contains the <samp class="ph codeph">%predicate</samp> value
                              from each thread in <samp class="ph codeph">%membermask</samp> in the bit position corresponding to the thread's
                              land id. The bit value corresponding to a thread not in <samp class="ph codeph">%membermask</samp> is 0.
                           </p>
                           <p class="p">Note that the return values are undefined if the thread at the source lane is not in
                              <samp class="ph codeph">%membermask</samp>.
                           </p>
                           <p class="p">The behavior of this intrinsic is undefined if any thread participating in
                              the intrinsic has exited or the executing thread is not in the <samp class="ph codeph">%membermask</samp>.
                           </p>
                           <p class="p">For <samp class="ph codeph">compute_62</samp> or below, all threads in <samp class="ph codeph">%membermask</samp> must call the same 
                              <samp class="ph codeph">@llvm.nvvm.vote.sync()</samp> in convergence, and only threads belonging to the 
                              <samp class="ph codeph">%membermask</samp> can be active when the intrinsic is called. Otherwise, the behavior is undefined.
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="unique_389030136"><a name="unique_389030136" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#unique_389030136" name="unique_389030136" shape="rect">Match</a></h3>
                        <div class="body conbody">
                           <p class="p">The following intrinsics synchronize a subset of threads in a warp and then 
                              broadcast and compare a value across threads in the subset.
                           </p>
                           <div class="p"><pre xml:space="preserve">
declare i32 @llvm.nvvm.match.any.sync.i32(i32 %membermask, i32 %value)
declare i32 @llvm.nvvm.match.any.sync.i64(i32 %membermask, i64 %value)
declare {i32, i1} @llvm.nvvm.match.all.sync.i32(i32 %membermask, i32 %value)
declare {i32, i1} @llvm.nvvm.match.all.sync.i64(i32 %membermask, i64 %value)
</pre></div>
                           <p class="p">These intrinsics cause executing thread to wait until all threads corresponding
                              to <samp class="ph codeph">%membermask</samp> have executed the same intrinsic with the same 
                              <samp class="ph codeph">%membermask</samp> value before 
                              performing broadcast and compare of operand <samp class="ph codeph">%value</samp> 
                              across all threads in the subset.
                           </p>
                           <p class="p">The argument <samp class="ph codeph">%membership</samp> is a 32bit mask, with each bit corresponding
                              to a lane in the warp. 1 means the thread is in the subset.
                           </p>
                           <p class="p">The <samp class="ph codeph">i32</samp> return value is a 32-bit mask where bit position in mask corresponds to thread’s laneid.
                           </p>
                           <p class="p">In the <samp class="ph codeph">any</samp> version, the <samp class="ph codeph">i32</samp> return value is set to the mask of active threads in 
                              <samp class="ph codeph">%membermask</samp> that have same value as operand <samp class="ph codeph">%value</samp>.
                           </p>
                           <p class="p">In the <samp class="ph codeph">all</samp> version, if all active threads in <samp class="ph codeph">%membermask</samp> have same value as 
                              operand <samp class="ph codeph">%value</samp>, the <samp class="ph codeph">i32</samp> 
                              return value is set to <samp class="ph codeph">%membermask</samp>, and the <samp class="ph codeph">i1</samp> value is set to 1. 
                              Otherwise, the <samp class="ph codeph">i32</samp> return value is set to 0 and the <samp class="ph codeph">i1</samp> return value is also set to 0.
                           </p>
                           <p class="p">The behavior of this intrinsic is undefined if any thread participating in
                              the intrinsic has exited or the executing thread is not in the <samp class="ph codeph">%membermask</samp>.
                           </p>
                           <p class="p">These intrinsics are only available on <samp class="ph codeph">compute_70</samp> or higher.
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="nvvm-intrin-warp-level-matrix"><a name="nvvm-intrin-warp-level-matrix" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-warp-level-matrix" name="nvvm-intrin-warp-level-matrix" shape="rect">13.6.5.&nbsp;Matrix Operation</a></h3>
                        <div class="body conbody">
                           <p class="p">THIS IS PREVIEW FEATURE. SUPPORT MAY BE REMOVED IN FUTURE RELEASES.</p>
                           <p class="p">
                              NVVM provides warp-level intrinsics for matrix multiply operations. 
                              The core operation is a matrix multiply and accumulate of the form:
                           </p>
                           <div class="p"><pre xml:space="preserve">
    D = A*B + C, or
    C = A*B + C
</pre></div>
                           <p class="p">where <samp class="ph codeph">A</samp> is an <samp class="ph codeph">MxK</samp> matrix, <samp class="ph codeph">B</samp> is a <samp class="ph codeph">KxN</samp> matrix, while <samp class="ph codeph">C</samp> and <samp class="ph codeph">D</samp> are <samp class="ph codeph">MxN</samp> matrices.
                              <samp class="ph codeph">C</samp> and <samp class="ph codeph">D</samp> are also called accumulators. The element type of the <samp class="ph codeph">A</samp> and <samp class="ph codeph">B</samp> matrices is
                              16-bit floating point. The element type of the accumulators can be either
                              32-bit floating point or 16-bit floating point.
                           </p>
                           <p class="p">All threads in a warp will collectively hold the contents of each matrix <samp class="ph codeph">A</samp>, <samp class="ph codeph">B</samp>, <samp class="ph codeph">C</samp> 
                              and <samp class="ph codeph">D</samp>. Each thread will hold only a fragment of matrix <samp class="ph codeph">A</samp>, a fragment of matrix <samp class="ph codeph">B</samp>, 
                              a fragment of matrix <samp class="ph codeph">C</samp>, and a fragment of the result matrix <samp class="ph codeph">D</samp>. How the elements of 
                              a matrix are distributed among the fragments is opaque to the user and is different
                              for matrix <samp class="ph codeph">A</samp>, <samp class="ph codeph">B</samp> and the accumulator. 
                           </p>
                           <p class="p">A fragment is represented by a sequence of element values. For fp32
                              matrices, the element type is <samp class="ph codeph">float</samp>. For fp16 matrices, the element type is <samp class="ph codeph">i32</samp> 
                              (each <samp class="ph codeph">i32</samp> value holds two fp16 values). The number of elements varies with the 
                              shape of the matrix. 
                           </p>
                        </div>
                        <div class="topic concept nested3" id="nvvm-intrin-warp-level-matrix-ld"><a name="nvvm-intrin-warp-level-matrix-ld" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-warp-level-matrix-ld" name="nvvm-intrin-warp-level-matrix-ld" shape="rect">13.6.5.1.&nbsp;Load Fragments</a></h3>
                           <div class="body conbody">
                              <p class="p">The following intrinsics synchronize all threads in a warp and then 
                                 load a fragment of a matrix for each thread.
                              </p>
                              <div class="p"><pre xml:space="preserve">
; load fragment A
declare {i32, i32, i32, i32, i32, i32, i32, i32} @llvm.nvvm.hmma.m16n16k16.ld.a.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);
declare {i32, i32, i32, i32, i32, i32, i32, i32} @llvm.nvvm.hmma.m32n8k16.ld.a.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);
declare {i32, i32, i32, i32, i32, i32, i32, i32} @llvm.nvvm.hmma.m8n32k16.ld.a.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);

; load fragment B
declare {i32, i32, i32, i32, i32, i32, i32, i32} @llvm.nvvm.hmma.m16n16k16.ld.b.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);
declare {i32, i32, i32, i32, i32, i32, i32, i32} @llvm.nvvm.hmma.m32n8k16.ld.b.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);
declare {i32, i32, i32, i32, i32, i32, i32, i32} @llvm.nvvm.hmma.m8n32k16.ld.b.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);

; load fragment C
declare {float, float, float, float, float, float, float, float} @llvm.nvvm.hmma.m16n16k16.ld.c.f32.p&lt;n&gt;f32(float addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);
declare {float, float, float, float, float, float, float, float} @llvm.nvvm.hmma.m32n8k16.ld.c.f32.p&lt;n&gt;f32(float addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);
declare {float, float, float, float, float, float, float, float} @llvm.nvvm.hmma.m8n32k16.ld.c.f32.p&lt;n&gt;f32(float addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);

; load fragment C
declare {i32, i32, i32, i32} @llvm.nvvm.hmma.m16n16k16.ld.c.f16.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);
declare {i32, i32, i32, i32} @llvm.nvvm.hmma.m32n8k16.ld.c.f16.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);
declare {i32, i32, i32, i32} @llvm.nvvm.hmma.m8n32k16.ld.c.f16.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol);
</pre></div>
                              <p class="p">These intrinsics load and return a matrix fragment from memory at location <samp class="ph codeph">%ptr</samp>. The matrix in memory must 
                                 be in a canonical matrix layout with leading dimension <samp class="ph codeph">%ldm</samp>. <samp class="ph codeph">%rowcol</samp> specifies which the 
                                 matrix in memory is row-major (0) or column-major (1). <samp class="ph codeph">%rowcol</samp> must be a constant value.
                              </p>
                              <p class="p">The returned sequence of values represent the fragment held by the calling thread. 
                                 How the elements of a matrix are distributed among the fragments is opaque to the user and is different
                                 for matrix <samp class="ph codeph">A</samp>, <samp class="ph codeph">B</samp> and the accumulator. Therefore, three variants (i.e. 
                                 <samp class="ph codeph">ld.a</samp>, <samp class="ph codeph">ld.b</samp>, and <samp class="ph codeph">ld.c</samp>) are provided. 
                              </p>
                              <p class="p">These intrinsics are overloaded based on the address spaces. The address space number <samp class="ph codeph">&lt;n&gt;</samp> must be 
                                 either 0 (generic), 1 (global) or 3 (shared).
                              </p>
                              <p class="p">The behavior of this intrinsic is undefined if any thread in the warp has exited.</p>
                              <p class="p">These intrinsics are only available on <samp class="ph codeph">compute_70</samp> or higher.
                              </p>
                           </div>
                        </div>
                        <div class="topic concept nested3" id="nvvm-intrin-warp-level-matrix-st"><a name="nvvm-intrin-warp-level-matrix-st" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-warp-level-matrix-st" name="nvvm-intrin-warp-level-matrix-st" shape="rect">13.6.5.2.&nbsp;Store Fragments</a></h3>
                           <div class="body conbody">
                              <p class="p">The following intrinsics synchronize all threads in a warp and then 
                                 store a fragment of a matrix for each thread.
                              </p>
                              <div class="p"><pre xml:space="preserve">
; The last 8 arguments are the elements of the C fragment  
declare void @llvm.nvvm.hmma.m16n16k16.st.c.f32.p&lt;n&gt;float(float addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol, float, float, float, float, float, float, float, float);
declare void @llvm.nvvm.hmma.m32n8k16.st.c.f32.p&lt;n&gt;float(float addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol, float, float, float, float, float, float, float, float);
declare void @llvm.nvvm.hmma.m8n32k16.st.c.f32.p&lt;n&gt;float(float addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol, float, float, float, float, float, float, float, float);

; The last 4 arguments are the elements of the C fragment  
declare void @llvm.nvvm.hmma.m16n16k16.st.c.f16.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol, i32, i32, i32, i32);
declare void @llvm.nvvm.hmma.m32n8k16.st.c.f16.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol, i32, i32, i32, i32);
declare void @llvm.nvvm.hmma.m8n32k16.st.c.f16.p&lt;n&gt;i32(i32 addrspace(&lt;n&gt;)* %ptr, i32 %ldm, i32 %rowcol, i32, i32, i32, i32);
</pre></div>
                              <p class="p">These intrinsics store an accumulator fragment to memory at location <samp class="ph codeph">%ptr</samp>. The matrix in memory must 
                                 be in a canonical matrix layout with leading dimension <samp class="ph codeph">%ldm</samp>. <samp class="ph codeph">%rowcol</samp> specifies which the 
                                 matrix in memory is row-major (0) or column-major (1). <samp class="ph codeph">%rowcol</samp> must be a constant value.
                              </p>
                              <p class="p">These intrinsics are overloaded based on the address spaces. The address space number <samp class="ph codeph">&lt;n&gt;</samp> must be 
                                 either 0 (generic), 1 (global) or 3 (shared).
                              </p>
                              <p class="p">The behavior of this intrinsic is undefined if any thread in the warp has exited.</p>
                              <p class="p">These intrinsics are only available on <samp class="ph codeph">compute_70</samp> or higher.
                              </p>
                           </div>
                        </div>
                        <div class="topic concept nested3" id="nvvm-intrin-warp-level-matrix-mma"><a name="nvvm-intrin-warp-level-matrix-mma" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#nvvm-intrin-warp-level-matrix-mma" name="nvvm-intrin-warp-level-matrix-mma" shape="rect">13.6.5.3.&nbsp;Matrix Multiply-and-Accumulate</a></h3>
                           <div class="body conbody">
                              <p class="p">The following intrinsics synchronize all threads in a warp and then 
                                 perform a matrix multiply-and-accumulate operation.
                              </p>
                              <div class="p"><pre xml:space="preserve">
declare {i32, i32, i32, i32} @llvm.nvvm.hmma.m16n16k16.mma.f16.f16(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, i32 %c0, i32 %c1, i32 %c2, i32 %c3);
declare {i32, i32, i32, i32} @llvm.nvvm.hmma.m32n8k16.mma.f16.f16(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, i32 %c0, i32 %c1, i32 %c2, i32 %c3);
declare {i32, i32, i32, i32} @llvm.nvvm.hmma.m8n32k16.mma.f16.f16(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, i32 %c0, i32 %c1, i32 %c2, i32 %c3);

declare {float, float, float, float, float, float, float, float} @llvm.nvvm.hmma.m16n16k16.mma.f32.f16(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, i32 %c0, i32 %c1, i32 %c2, i32 %c3);
declare {float, float, float, float, float, float, float, float} @llvm.nvvm.hmma.m32n8k16.mma.f32.f16(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, i32 %c0, i32 %c1, i32 %c2, i32 %c3);
declare {float, float, float, float, float, float, float, float} @llvm.nvvm.hmma.m8n32k16.mma.f32.f16(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, i32 %c0, i32 %c1, i32 %c2, i32 %c3);

declare {float, float, float, float, float, float, float, float} @llvm.nvvm.hmma.m16n16k16.mma.f32.f32(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, float %c0, float %c1, float %c2, float %c3, float %c4, float %c5, float %c6, float %c7);
declare {float, float, float, float, float, float, float, float} @llvm.nvvm.hmma.m32n8k16.mma.f32.f32(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, float %c0, float %c1, float %c2, float %c3, float %c4, float %c5, float %c6, float %c7);
declare {float, float, float, float, float, float, float, float} @llvm.nvvm.hmma.m8n32k16.mma.f32.f32(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, float %c0, float %c1, float %c2, float %c3, float %c4, float %c5, float %c6, float %c7);

declare {i32, i32, i32, i32} @llvm.nvvm.hmma.m16n16k16.mma.f16.f32(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, float %c0, float %c1, float %c2, float %c3, float %c4, float %c5, float %c6, float %c7);
declare {i32, i32, i32, i32} @llvm.nvvm.hmma.m32n8k16.mma.f16.f32(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, float %c0, float %c1, float %c2, float %c3, float %c4, float %c5, float %c6, float %c7);
declare {i32, i32, i32, i32} @llvm.nvvm.hmma.m8n32k16.mma.f16.f32(i32 %rowcol, i32 %satf, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %b0, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6, i32 %b7, float %c0, float %c1, float %c2, float %c3, float %c4, float %c5, float %c6, float %c7);
</pre></div>
                              <p class="p">These intrinsics perform a matrix multiply-and-accumulate operation. <samp class="ph codeph">%rowcol</samp> 
                                 specifies the layout of <samp class="ph codeph">A</samp> and <samp class="ph codeph">B</samp> fragments. It must be a constant value, which
                                 can have the following values and semantics.
                              </p>
                              <div class="p">
                                 <div class="tablenoborder">
                                    <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                                       <thead class="thead" align="left">
                                          <tr class="row">
                                             <th class="entry" valign="top" id="d54e4518" rowspan="1" colspan="1">Encoding</th>
                                             <th class="entry" valign="top" id="d54e4521" rowspan="1" colspan="1">Meaning</th>
                                          </tr>
                                       </thead>
                                       <tbody class="tbody">
                                          <tr class="row">
                                             <td class="entry" valign="top" headers="d54e4518 d54e4521" rowspan="1" colspan="1">0</td>
                                             <td class="entry" valign="top" headers="d54e4518 d54e4521" rowspan="1" colspan="1">A fragment is row-major, B fragment is row-major</td>
                                          </tr>
                                          <tr class="row">
                                             <td class="entry" valign="top" headers="d54e4518 d54e4521" rowspan="1" colspan="1">1</td>
                                             <td class="entry" valign="top" headers="d54e4518 d54e4521" rowspan="1" colspan="1">A fragment is row-major, B fragment is column-major</td>
                                          </tr>
                                          <tr class="row">
                                             <td class="entry" valign="top" headers="d54e4518 d54e4521" rowspan="1" colspan="1">2</td>
                                             <td class="entry" valign="top" headers="d54e4518 d54e4521" rowspan="1" colspan="1">A fragment is column-major, B fragment is row-major</td>
                                          </tr>
                                          <tr class="row">
                                             <td class="entry" valign="top" headers="d54e4518 d54e4521" rowspan="1" colspan="1">3</td>
                                             <td class="entry" valign="top" headers="d54e4518 d54e4521" rowspan="1" colspan="1">A fragment is column-major, B fragment is column-major</td>
                                          </tr>
                                       </tbody>
                                    </table>
                                 </div>
                              </div>
                              <p class="p"><samp class="ph codeph">%satf</samp> must be a constant value of 0 or 1, which specifies whether the 
                                 following additional numerical properties should be applied on the destination accumulator,
                              </p>
                              <ul class="ul">
                                 <li class="li"> If an element result is +Infinity, the corresponding accumulator will contain +MAX_NORM</li>
                                 <li class="li"> If an element result is -Infinity, the corresponding accumulator will contain -MAX_NORM</li>
                                 <li class="li"> If an element result is NaN, the corresponding accumulator will contain +0</li>
                              </ul>
                              <p class="p">Support for <samp class="ph codeph">%satf</samp> value of 1 is deprecated and will be
                                 removed in a future release.
                              </p>
                              <p class="p">The behavior of these intrinsics are undefined if any thread in the warp has exited.</p>
                              <p class="p">These intrinsics are only available on <samp class="ph codeph">compute_70</samp> or higher.
                              </p>
                           </div>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="source-level-debugging-support"><a name="source-level-debugging-support" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#source-level-debugging-support" name="source-level-debugging-support" shape="rect">14.&nbsp;Source Level Debugging Support</a></h2>
                  <div class="body conbody">
                     <p class="p"> To enable source level debugging of an IR module, NVVM IR supports
                        debug intrinsics and debug information descriptors to express the
                        debugging information. Debug information descriptors are
                        represented using metadata. The current NVVM IR debug metadata
                        version is 2.0.
                        
                     </p>
                     <p class="p">The current NVVM IR debugging support is based on that in LLVM 3.4. For the complete semantics of the IR, readers
                        of this chapter should check the official Source Level Debugging with LLVM Manual (<a class="xref" href="http://llvm.org/releases/3.4/docs/SourceLevelDebugging.html" target="_blank" shape="rect">http://llvm.org/releases/3.4/docs/SourceLevelDebugging.html</a>). Additionally, LLVM 5.0 bitcode format is not supported
                        for source level debugging.
                     </p>
                     <div class="p"> The following metadata nodes need to be present in the module
                        when debugging support is requested:
                        
                        <ul class="ul">
                           <li class="li"> Named metadata node <samp class="ph codeph">!llvm.dbg.cu</samp></li>
                           <li class="li"> Module flags metadata for <samp class="ph codeph">"Debug Info Version"</samp> flag: The <em class="ph i">behavior</em> flag should be <samp class="ph codeph">Error</samp>. The value of
                              the flag should be <samp class="ph codeph">DEBUG_METADATA_VERSION</samp>
                              in LLVM 3.4, which is 1.
                              
                           </li>
                        </ul>
                     </div>
                     <p class="p"> Source level debugging is supported only for a single debug compile
                        unit. If there are multiple input NVVM IR modules, at most one
                        module may have a single debug compile unit.
                        
                     </p>
                     <div class="p">For the following debug information descriptors, NVVM IR supports
                        a modified form of the descriptors:
                        
                        <ul class="ul">
                           <li class="li"><samp class="ph codeph">DW_TAG_pointer_type</samp><pre xml:space="preserve">
!5 = metadata !{
  i32,      ;; Tag = DW_TAG_pointer_type
  metadata, ;; Source directory (including trailing slash) and
            ;; file pair (may be null)
  metadata, ;; Reference to context
  metadata, ;; Name (may be "" for anonymous types)
  i32,      ;; Line number where defined (may be 0)
  i64,      ;; Size in bits
  i64,      ;; Alignment in bits
  i64,      ;; Offset in bits
  i32,      ;; Flags to encode attributes, e.g. private
  metadata, ;; Reference to type derived from
  i32       ;; (optional) numeric value of address space for memory
            ;; pointed to
}
        </pre></li>
                           <li class="li"><samp class="ph codeph">DW_TAG_subrange_type</samp><pre xml:space="preserve">
!42 = metadata !{
  i32,      ;; Tag = DW_TAG_subrange_type
  i64,      ;; Low value
  i64,      ;; Count of number of elements
  metadata, ;; (optional) reference to variable holding low value.
            ;; If present, takes precedence over i64 constant
  metadata  ;; (optional) reference to variable holding count.
            ;; If present, takes precedence over i64 constant
}
        </pre></li>
                        </ul>
                     </div>
                     <div class="p"> NVVM IR supports the following additional debug information
                        descriptor for <samp class="ph codeph">DW_TAG_module</samp>:
                        <pre xml:space="preserve">
!45 = metadata !{
  i32,      ;; Tag = DW_TAG_module
  metadata, ;; Source directory (including trailing slash) and
            ;; file pair (may be null)
  metadata, ;; Reference to context descriptor
  metadata, ;; Name
  i32       ;; Line number
}
        </pre></div>
                  </div>
               </div>
               <div class="topic concept nested0" id="nvvm-abi-for-ptx"><a name="nvvm-abi-for-ptx" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#nvvm-abi-for-ptx" name="nvvm-abi-for-ptx" shape="rect">15.&nbsp;NVVM ABI for PTX</a></h2>
                  <div class="topic concept nested1" id="linkage-types-nvvm"><a name="linkage-types-nvvm" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#linkage-types-nvvm" name="linkage-types-nvvm" shape="rect">15.1.&nbsp;Linkage Types</a></h3>
                     <div class="body conbody">
                        <p class="p">The following table provides the mapping of NVVM IR linkage types associated with
                           functions and global variables to PTX linker directives .
                        </p>
                        <div class="tablenoborder"><a name="linkage-types-nvvm__table_ech_f53_n3" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="linkage-types-nvvm__table_ech_f53_n3" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" colspan="2" valign="top" id="d54e4702" rowspan="1">LLVM Linkage Type</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e4705" rowspan="1" colspan="1">PTX Linker Directive</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" colspan="2" valign="top" headers="d54e4702" rowspan="1"><samp class="ph codeph">private</samp>, 
                                       			                                <samp class="ph codeph">internal</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4705" rowspan="1" colspan="1">This is the default linkage type and does not require a linker
                                       directive.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" rowspan="4" valign="top" width="33.33333333333333%" headers="d54e4702" colspan="1"><samp class="ph codeph">external</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4702" rowspan="1" colspan="1">function with definition</td>
                                    <td class="entry" rowspan="2" valign="top" width="33.33333333333333%" headers="d54e4705" colspan="1"><samp class="ph codeph">.visible</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4702" rowspan="1" colspan="1">global variable with initialization</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4702" rowspan="1" colspan="1">function without definition</td>
                                    <td class="entry" rowspan="2" valign="top" width="33.33333333333333%" headers="d54e4705" colspan="1"><samp class="ph codeph">.extern</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4702" rowspan="1" colspan="1">global variable without initialization</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" colspan="2" valign="top" headers="d54e4702" rowspan="1"><samp class="ph codeph">common</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4705" rowspan="1" colspan="1"><samp class="ph codeph">.common</samp> for the global address space, otherwise <samp class="ph codeph">.weak</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" colspan="2" valign="top" headers="d54e4702" rowspan="1"><samp class="ph codeph">available_externally</samp>, 
                                       			       <samp class="ph codeph">linkonce</samp>, 
                                       			       <samp class="ph codeph">linkonce_odr</samp>,
                                       			       <samp class="ph codeph">weak</samp>,
                                       <samp class="ph codeph">weak_odr</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4705" rowspan="1" colspan="1"><samp class="ph codeph">.weak</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" colspan="2" valign="top" headers="d54e4702" rowspan="1">all other linkage types</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4705" rowspan="1" colspan="1">Not supported.</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="argument-passing-and-return"><a name="argument-passing-and-return" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#argument-passing-and-return" name="argument-passing-and-return" shape="rect">15.2.&nbsp;Parameter Passing and Return</a></h3>
                     <div class="body conbody">
                        <p class="p">The following table shows the mapping of function argument and return types in NVVM IR to
                           PTX types.
                        </p>
                        <div class="tablenoborder"><a name="argument-passing-and-return__table_ech_f53_n3" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="argument-passing-and-return__table_ech_f53_n3" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e4839" rowspan="1" colspan="1">Source Type</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e4842" rowspan="1" colspan="1">Size in Bits</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e4845" rowspan="1" colspan="1">PTX Type </th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" rowspan="2" valign="top" width="33.33333333333333%" headers="d54e4839" colspan="1">Integer types</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4842" rowspan="1" colspan="1">&lt;= 32</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4845" rowspan="1" colspan="1">
                                       <p class="p"><samp class="ph codeph">.u32</samp> or <samp class="ph codeph">.b32</samp> (zero-extended if unsigned)
                                       </p>
                                       <p class="p"><samp class="ph codeph">.s32</samp> or <samp class="ph codeph">.b32</samp> (sign-extended if signed)
                                       </p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4842" rowspan="1" colspan="1">64</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4845" rowspan="1" colspan="1">
                                       <p class="p"><samp class="ph codeph">.u64</samp> or <samp class="ph codeph">.b64</samp> (if unsigned)
                                       </p>
                                       <p class="p"><samp class="ph codeph">.s64</samp> or <samp class="ph codeph">.b64</samp> (if signed)
                                       </p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" rowspan="2" valign="top" width="33.33333333333333%" headers="d54e4839" colspan="1">Pointer types (without <samp class="ph codeph">byval</samp>
                                       attribute)
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4842" rowspan="1" colspan="1">32</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4845" rowspan="1" colspan="1"><samp class="ph codeph">.u32</samp> or <samp class="ph codeph">.b32</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4842" rowspan="1" colspan="1">64</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4845" rowspan="1" colspan="1"><samp class="ph codeph">.u64</samp> or <samp class="ph codeph">.b64</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" rowspan="2" valign="top" width="33.33333333333333%" headers="d54e4839" colspan="1">Floating-point types</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4842" rowspan="1" colspan="1">32</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4845" rowspan="1" colspan="1"><samp class="ph codeph">.f32</samp> or <samp class="ph codeph">.b32</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4842" rowspan="1" colspan="1">64</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4845" rowspan="1" colspan="1"><samp class="ph codeph">.f64</samp> or <samp class="ph codeph">.b64</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4839" rowspan="1" colspan="1">Aggregate types</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4842" rowspan="1" colspan="1">Any size</td>
                                    <td class="entry" rowspan="3" valign="top" width="33.33333333333333%" headers="d54e4845" colspan="1">
                                       <p class="p"><samp class="ph codeph">.align </samp><em class="ph i">align </em><samp class="ph codeph">.b8 </samp><em class="ph i">name</em>[<em class="ph i">size</em>]
                                       </p>
                                       <p class="p">Where <em class="ph i">align</em> is overall aggregate or vector alignment in bytes,
                                          <em class="ph i">name</em> is variable name associated with aggregate or
                                          vector, and <em class="ph i">size</em> is the aggregate or vector size in
                                          bytes.
                                       </p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4839" rowspan="1" colspan="1">Pointer types to aggregate with <samp class="ph codeph">byval</samp>
                                       attribute
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4842" rowspan="1" colspan="1">32 or 64</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4839" rowspan="1" colspan="1">Vector type</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e4842" rowspan="1" colspan="1">Any size</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic reference nested0" id="revision-history"><a name="revision-history" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#revision-history" name="revision-history" shape="rect">A.&nbsp;Revision History</a></h2>
                  <div class="body refbody">
                     <div class="section">
                        <h2 class="title sectiontitle">Version 1.0</h2>
                        <ul class="ul">
                           <li class="li">Initial Release</li>
                        </ul>
                     </div>
                     <div class="section">
                        <h2 class="title sectiontitle">Version 1.1</h2>
                        <ul class="ul">
                           <li class="li">Added support for UVM managed variables in global property annotation. See <a class="xref" href="index.html#supported-properties" shape="rect">Supported Properties</a>.
                           </li>
                        </ul>
                     </div>
                     <div class="section">
                        <h2 class="title sectiontitle">Version 1.2</h2>
                        <ul class="ul">
                           <li class="li">Update to LLVM 3.4 for CUDA 7.0</li>
                           <li class="li">Remove address space intrinsics in favor of <samp class="ph codeph">addrspacecast</samp></li>
                           <li class="li">Add information about source level debugging support</li>
                        </ul>
                     </div>
                     <div class="section">
                        <h2 class="title sectiontitle">Version 1.3</h2>
                        <ul class="ul">
                           <li class="li">Add support for LLVM 3.8 for CUDA 8.0</li>
                        </ul>
                     </div>
                     <div class="section">
                        <h2 class="title sectiontitle">Version 1.4</h2>
                        <ul class="ul">
                           <li class="li">Add support for warp-level intrinsics</li>
                        </ul>
                     </div>
                     <div class="section">
                        <h2 class="title sectiontitle">Version 1.5</h2>
                        <ul class="ul">
                           <li class="li">Add support for LLVM 5.0 for CUDA 9.2</li>
                        </ul>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="notices-header"><a name="notices-header" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#notices-header" name="notices-header" shape="rect">Notices</a></h2>
                  <div class="topic reference nested1" id="notice"><a name="notice" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#notice" name="notice" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Notice</h3>
                           <p class="p">ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND
                              SEPARATELY, "MATERIALS") ARE BEING PROVIDED "AS IS." NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE
                              WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS
                              FOR A PARTICULAR PURPOSE. 
                           </p>
                           <p class="p">Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the
                              consequences of use of such information or for any infringement of patents or other rights of third parties that may result
                              from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications
                              mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information
                              previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems
                              without express written approval of NVIDIA Corporation.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="trademarks"><a name="trademarks" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#trademarks" name="trademarks" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Trademarks</h3>
                           <p class="p">NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation
                              in the U.S. and other countries.  Other company and product names may be trademarks of
                              the respective companies with which they are associated.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="copyright-past-to-present"><a name="copyright-past-to-present" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#copyright-past-to-present" name="copyright-past-to-present" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Copyright</h3>
                           <p class="p">© <span class="ph">2012</span>-<span class="ph">2019</span> NVIDIA
                              Corporation. All rights reserved.
                           </p>
                           <p class="p">This product includes software developed by the Syncro Soft SRL (http://www.sync.ro/).</p>
                        </div>
                     </div>
                  </div>
               </div>
               
               <hr id="contents-end"></hr>
               
            </article>
         </div>
      </div>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/formatting/common.min.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-write.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-tracker.js"></script>
      <script type="text/javascript">var switchTo5x=true;</script><script type="text/javascript" src="http://w.sharethis.com/button/buttons.js"></script><script type="text/javascript">stLight.options({publisher: "998dc202-a267-4d8e-bce9-14debadb8d92", doNotHash: false, doNotCopy: false, hashAddressBar: false});</script><script type="text/javascript">_satellite.pageBottom();</script></body>
</html>