Sophie

Sophie

distrib > Mageia > 7 > x86_64 > media > nonfree-updates > by-pkgid > b86a85131cc739c1c53d0b55840a4328 > files > 1363

nvidia-cuda-toolkit-devel-10.1.168-1.2.mga7.nonfree.x86_64.rpm

<!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-us" xml:lang="en-us">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
      <meta http-equiv="X-UA-Compatible" content="IE=edge"></meta>
      <meta name="copyright" content="(C) Copyright 2005"></meta>
      <meta name="DC.rights.owner" content="(C) Copyright 2005"></meta>
      <meta name="DC.Type" content="concept"></meta>
      <meta name="DC.Title" content="Changes from Previous Version"></meta>
      <meta name="DC.Format" content="XHTML"></meta>
      <meta name="DC.Identifier" content="changes-from-previous-version"></meta>
      <link rel="stylesheet" type="text/css" href="../common/formatting/commonltr.css"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/site.css"></link>
      <title>NVCC :: CUDA Toolkit Documentation</title>
      <!--[if lt IE 9]>
      <script src="../common/formatting/html5shiv-printshiv.min.js"></script>
      <![endif]-->
      <script type="text/javascript" charset="utf-8" src="//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.ba-hashchange.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.scrollintoview.min.js"></script>
      <script type="text/javascript" src="../search/htmlFileList.js"></script>
      <script type="text/javascript" src="../search/htmlFileInfoList.js"></script>
      <script type="text/javascript" src="../search/nwSearchFnt.min.js"></script>
      <script type="text/javascript" src="../search/stemmers/en_stemmer.min.js"></script>
      <script type="text/javascript" src="../search/index-1.js"></script>
      <script type="text/javascript" src="../search/index-2.js"></script>
      <script type="text/javascript" src="../search/index-3.js"></script>
      <link rel="canonical" href="http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/qwcode.highlight.css"></link>
   </head>
   <body>
      
      <header id="header"><span id="company">NVIDIA</span><span id="site-title">CUDA Toolkit Documentation</span><form id="search" method="get" action="search">
            <input type="text" name="search-text"></input><fieldset id="search-location">
               <legend>Search In:</legend>
               <label><input type="radio" name="search-type" value="site"></input>Entire Site</label>
               <label><input type="radio" name="search-type" value="document"></input>Just This Document</label></fieldset>
            <button type="reset">clear search</button>
            <button id="submit" type="submit">search</button></form>
      </header>
      <div id="site-content">
         <nav id="site-nav">
            <div class="category closed"><a href="../index.html" title="The root of the site.">CUDA Toolkit 
                  
                  
                  v10.1.168</a></div>
            <div class="category"><a href="index.html" title="NVCC">NVCC</a></div>
            <ul>
               <li>
                  <div class="section-link"><a href="#introduction">1.&nbsp;Introduction</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#overview">1.1.&nbsp;Overview</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#cuda-programming-model">1.1.1.&nbsp;CUDA Programming Model</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cuda-sources">1.1.2.&nbsp;CUDA Sources</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#purpose-of-nvcc">1.1.3.&nbsp;Purpose of NVCC</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#supported-host-compilers">1.2.&nbsp;Supported Host Compilers</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#compilation-phases">2.&nbsp;Compilation Phases</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#nvcc-identification-macro">2.1.&nbsp;NVCC Identification Macro</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvcc-phases">2.2.&nbsp;NVCC Phases</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#supported-input-file-suffixes">2.3.&nbsp;Supported Input File Suffixes</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#supported-phases">2.4.&nbsp;Supported Phases</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#cuda-compilation-trajectory">3.&nbsp;The CUDA Compilation Trajectory</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#nvcc-command-options">4.&nbsp;NVCC Command Options</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#command-option-types-and-notation">4.1.&nbsp;Command Option Types and Notation</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#command-option-description">4.2.&nbsp;Command Option Description</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#file-and-path-specifications">4.2.1.&nbsp;File and Path Specifications</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-output-file">4.2.1.1.&nbsp;--output-file file (-o)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-pre-include">4.2.1.2.&nbsp;--pre-include file,... (-include)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-library">4.2.1.3.&nbsp;--library library,... (-l) </a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-define-macro">4.2.1.4.&nbsp;--define-macro def,... (-D)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-undefine-macro">4.2.1.5.&nbsp;--undefine-macro def,... (-U)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-include-path">4.2.1.6.&nbsp;--include-path path,... (-I)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-system-include">4.2.1.7.&nbsp;--system-include path,... (-isystem)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-library-path">4.2.1.8.&nbsp;--library-path path,... (-L)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-output-directory">4.2.1.9.&nbsp;--output-directory directory (-odir)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-dependency-output">4.2.1.10.&nbsp;--dependency-output file (-MF)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-compiler-bindir">4.2.1.11.&nbsp;--compiler-bindir directory (-ccbin)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-cudart">4.2.1.12.&nbsp;--cudart {none|shared|static} (-cudart)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#file-and-path-specifications-libdevice-directory">4.2.1.13.&nbsp;--libdevice-directory directory (-ldir)</a></div>
                                 </li>
                              </ul>
                           </li>
                           <li>
                              <div class="section-link"><a href="#options-for-specifying-compilation-phase">4.2.2.&nbsp;Options for Specifying the Compilation Phase</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-link">4.2.2.1.&nbsp;--link (-link)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-lib">4.2.2.2.&nbsp;--lib (-lib)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-device-link">4.2.2.3.&nbsp;--device-link (-dlink)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-device-c">4.2.2.4.&nbsp;--device-c (-dc)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-device-w">4.2.2.5.&nbsp;--device-w (-dw)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-cuda">4.2.2.6.&nbsp;--cuda (-cuda)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-compile">4.2.2.7.&nbsp;--compile (-c)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-fatbin">4.2.2.8.&nbsp;--fatbin (-fatbin)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-cubin">4.2.2.9.&nbsp;--cubin (-cubin)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-ptx">4.2.2.10.&nbsp;--ptx (-ptx)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-preprocess">4.2.2.11.&nbsp;--preprocess (-E)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-generate-dependencies">4.2.2.12.&nbsp;--generate-dependencies (-M)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-generate-nonsystem-dependencies">4.2.2.13.&nbsp;--generate-nonsystem-dependencies (-MM)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-specifying-compilation-phase-run">4.2.2.14.&nbsp;--run (-run)</a></div>
                                 </li>
                              </ul>
                           </li>
                           <li>
                              <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior">4.2.3.&nbsp;Options for Specifying Behavior of Compiler/Linker</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-profile">4.2.3.1.&nbsp;--profile (-pg)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-debug">4.2.3.2.&nbsp;--debug (-g)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-device-debug">4.2.3.3.&nbsp;--device-debug (-G)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-extensible-whole-program">4.2.3.4.&nbsp;--extensible-whole-program (-ewp)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-generate-line-info">4.2.3.5.&nbsp;--generate-line-info (-lineinfo)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-optimize">4.2.3.6.&nbsp;--optimize level (-O)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-ftemplate-backtrace-limit">4.2.3.7.&nbsp;--ftemplate-backtrace-limit limit (-ftemplate-backtrace-limit)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-ftemplate-depth">4.2.3.8.&nbsp;--ftemplate-depth limit (-ftemplate-depth)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-shared">4.2.3.9.&nbsp;--shared (-shared)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-x">4.2.3.10.&nbsp;--x {c|c++|cu} (-x)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-std">4.2.3.11.&nbsp;--std {c++03|c++11|c++14} (-std)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-no-host-device-initializer-list">4.2.3.12.&nbsp;--no-host-device-initializer-list (-nohdinitlist)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-expt-relaxed-constexpr">4.2.3.13.&nbsp;--expt-relaxed-constexpr (-expt-relaxed-constexpr)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-expt-extended-lambda">4.2.3.14.&nbsp;--expt-extended-lambda (-expt-extended-lambda)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-altering-compiler-linker-behavior-machine">4.2.3.15.&nbsp;--machine {32|64} (-m)</a></div>
                                 </li>
                              </ul>
                           </li>
                           <li>
                              <div class="section-link"><a href="#options-for-passing-specific-phase-options">4.2.4.&nbsp;Options for Passing Specific Phase Options</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#options-for-passing-specific-phase-options-compiler-options">4.2.4.1.&nbsp;--compiler-options options,... (-Xcompiler)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-passing-specific-phase-options-linker-options">4.2.4.2.&nbsp;--linker-options options,... (-Xlinker)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-passing-specific-phase-options-archive-options">4.2.4.3.&nbsp;--archive-options options,... (-Xarchive)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-passing-specific-phase-options-ptxas-options">4.2.4.4.&nbsp;--ptxas-options options,... (-Xptxas)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-passing-specific-phase-options-nvlink-options">4.2.4.5.&nbsp;--nvlink-options options,... (-Xnvlink)</a></div>
                                 </li>
                              </ul>
                           </li>
                           <li>
                              <div class="section-link"><a href="#options-for-guiding-compiler-driver">4.2.5.&nbsp;Options for Guiding the Compiler Driver</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-dont-use-profile">4.2.5.1.&nbsp;--dont-use-profile (-noprof)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-dryrun">4.2.5.2.&nbsp;--dryrun (-dryrun)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-verbose">4.2.5.3.&nbsp;--verbose (-v)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-keep">4.2.5.4.&nbsp;--keep (-keep)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-keep-dir">4.2.5.5.&nbsp;--keep-dir directory (-keep-dir)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-save-temps">4.2.5.6.&nbsp;--save-temps (-save-temps)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-clean-targets">4.2.5.7.&nbsp;--clean-targets (-clean)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-run-args">4.2.5.8.&nbsp;--run-args arguments,... (-run-args)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-input-drive-prefix">4.2.5.9.&nbsp;--input-drive-prefix prefix (-idp)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-dependency-drive-prefix">4.2.5.10.&nbsp;--dependency-drive-prefix prefix (-ddp)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-drive-prefix">4.2.5.11.&nbsp;--drive-prefix prefix (-dp)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-dependency-target-name">4.2.5.12.&nbsp;--dependency-target-name target (-MT)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-no-align-double">4.2.5.13.&nbsp;--no-align-double</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-guiding-compiler-driver-no-device-link">4.2.5.14.&nbsp;--no-device-link (-nodlink)</a></div>
                                 </li>
                              </ul>
                           </li>
                           <li>
                              <div class="section-link"><a href="#options-for-steering-cuda-compilation">4.2.6.&nbsp;Options for Steering CUDA Compilation</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-cuda-compilation-default-stream">4.2.6.1.&nbsp;--default-stream {legacy|null|per-thread} (-default-stream)</a></div>
                                 </li>
                              </ul>
                           </li>
                           <li>
                              <div class="section-link"><a href="#options-for-steering-gpu-code-generation">4.2.7.&nbsp;Options for Steering GPU Code Generation</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-gpu-architecture">4.2.7.1.&nbsp;--gpu-architecture arch (-arch)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-gpu-code">4.2.7.2.&nbsp;--gpu-code code,... (-code)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-generate-code">4.2.7.3.&nbsp;--generate-code specification (-gencode)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-relocatable-device-code">4.2.7.4.&nbsp;--relocatable-device-code {true|false} (-rdc)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-entries">4.2.7.5.&nbsp;--entries entry,... (-e)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-maxrregcount">4.2.7.6.&nbsp;--maxrregcount amount (-maxrregcount)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-use_fast_math">4.2.7.7.&nbsp;--use_fast_math (-use_fast_math)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-ftz">4.2.7.8.&nbsp;--ftz {true|false} (-ftz)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-prec-div">4.2.7.9.&nbsp;--prec-div {true|false} (-prec-div)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-prec-sqrt">4.2.7.10.&nbsp;--prec-sqrt {true|false} (-prec-sqrt)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#options-for-steering-gpu-code-generation-fmad">4.2.7.11.&nbsp;--fmad {true|false} (-fmad)</a></div>
                                 </li>
                              </ul>
                           </li>
                           <li>
                              <div class="section-link"><a href="#generic-tool-options">4.2.8.&nbsp;Generic Tool Options</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-disable-warnings">4.2.8.1.&nbsp;--disable-warnings (-w)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-source-in-ptx">4.2.8.2.&nbsp;--source-in-ptx (-src-in-ptx)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-restrict">4.2.8.3.&nbsp;--restrict (-restrict)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-Wno-deprecated-gpu-targets">4.2.8.4.&nbsp;--Wno-deprecated-gpu-targets (-Wno-deprecated-gpu-targets)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-Wno-deprecated-declarations">4.2.8.5.&nbsp;--Wno-deprecated-declarations (-Wno-deprecated-declarations)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-Wreorder">4.2.8.6.&nbsp;--Wreorder (-Wreorder)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-Werror">4.2.8.7.&nbsp;--Werror kind,... (-Werror)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-resource-usage">4.2.8.8.&nbsp;--resource-usage (-res-usage)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-help">4.2.8.9.&nbsp;--help (-h)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-version">4.2.8.10.&nbsp;--version (-V)</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#generic-tool-options-options-file">4.2.8.11.&nbsp;--options-file file,... (-optf)</a></div>
                                 </li>
                              </ul>
                           </li>
                           <li>
                              <div class="section-link"><a href="#phase-options">4.2.9.&nbsp;Phase Options</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#ptxas-options">4.2.9.1.&nbsp;Ptxas Options</a></div>
                                    <ul>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-allow-expensive-optimizations">4.2.9.1.1.&nbsp;--allow-expensive-optimizations (-allow-expensive-optimizations)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-compile-only">4.2.9.1.2.&nbsp;--compile-only (-c)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-def-load-cache">4.2.9.1.3.&nbsp;--def-load-cache (-dlcm)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-def-store-cache">4.2.9.1.4.&nbsp;--def-store-cache (-dscm)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-device-debug">4.2.9.1.5.&nbsp;--device-debug (-g)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-disable-optimizer-constants">4.2.9.1.6.&nbsp;--disable-optimizer-constants (-disable-optimizer-consts)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-entry">4.2.9.1.7.&nbsp;--entry entry,... (-e)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-fmad">4.2.9.1.8.&nbsp;--fmad (-fmad)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-force-load-cache">4.2.9.1.9.&nbsp;--force-load-cache (-flcm)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-force-store-cache">4.2.9.1.10.&nbsp;--force-store-cache (-fscm)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-generate-line-info">4.2.9.1.11.&nbsp;--generate-line-info (-lineinfo)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-gpu-name">4.2.9.1.12.&nbsp;--gpu-name gpuname (-arch)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-help">4.2.9.1.13.&nbsp;--help (-h)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-machine">4.2.9.1.14.&nbsp;--machine (-m)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-maxrregcount">4.2.9.1.15.&nbsp;--maxrregcount amount (-maxrregcount)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-opt-level">4.2.9.1.16.&nbsp;--opt-level N (-O)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-options-file">4.2.9.1.17.&nbsp;--options-file file,... (-optf)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-output-file">4.2.9.1.18.&nbsp;--output-file file (-o)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-preserve-relocs">4.2.9.1.19.&nbsp;--preserve-relocs (-preserve-relocs)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-sp-bound-check">4.2.9.1.20.&nbsp;--sp-bound-check (-sp-bound-check)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-verbose">4.2.9.1.21.&nbsp;--verbose (-v)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-version">4.2.9.1.22.&nbsp;--version (-V)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-warning-as-error">4.2.9.1.23.&nbsp;--warning-as-error (-Werror)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-warn-on-double-precision-use">4.2.9.1.24.&nbsp;--warn-on-double-precision-use (-warn-double-usage)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-warn-on-local-memory-usage">4.2.9.1.25.&nbsp;--warn-on-local-memory-usage (-warn-lmem-usage)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#ptxas-options-warn-on-spills">4.2.9.1.26.&nbsp;--warn-on-spills (-warn-spills)</a></div>
                                       </li>
                                    </ul>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#nvlink-options">4.2.9.2.&nbsp;NVLINK Options</a></div>
                                    <ul>
                                       <li>
                                          <div class="section-link"><a href="#nvlink-options-disable-warnings">4.2.9.2.1.&nbsp;--disable-warnings (-w)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#nvlink-options-preserve-relocs">4.2.9.2.2.&nbsp;--preserve-relocs (-preserve-relocs)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#nvlink-options-verbose">4.2.9.2.3.&nbsp;--verbose (-v)</a></div>
                                       </li>
                                       <li>
                                          <div class="section-link"><a href="#nvlink-options-warning-as-error">4.2.9.2.4.&nbsp;--warning-as-error (-Werror)</a></div>
                                       </li>
                                    </ul>
                                 </li>
                              </ul>
                           </li>
                        </ul>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#gpu-compilation">5.&nbsp;GPU Compilation</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#gpu-generations">5.1.&nbsp;GPU Generations</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#gpu-feature-list">5.2.&nbsp;GPU Feature List</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#application-compatibility">5.3.&nbsp;Application Compatibility</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#virtual-architectures">5.4.&nbsp;Virtual Architectures</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#virtual-architecture-feature-list">5.5.&nbsp;Virtual Architecture Feature List</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#further-mechanisms">5.6.&nbsp;Further Mechanisms</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#just-in-time-compilation">5.6.1.&nbsp;Just-in-Time Compilation</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#fatbinaries">5.6.2.&nbsp;Fatbinaries</a></div>
                           </li>
                        </ul>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvcc-examples">5.7.&nbsp;NVCC Examples</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#base-notation">5.7.1.&nbsp;Base Notation</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#shorthand">5.7.2.&nbsp;Shorthand</a></div>
                              <ul>
                                 <li>
                                    <div class="section-link"><a href="#shorthand-1">5.7.2.1.&nbsp;Shorthand 1</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#shorthand-2">5.7.2.2.&nbsp;Shorthand 2</a></div>
                                 </li>
                                 <li>
                                    <div class="section-link"><a href="#shorthand-3">5.7.2.3.&nbsp;Shorthand 3</a></div>
                                 </li>
                              </ul>
                           </li>
                           <li>
                              <div class="section-link"><a href="#extended-notation">5.7.3.&nbsp;Extended Notation</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#virtual-architecture-identification-macro">5.7.4.&nbsp;Virtual Architecture Identification Macro</a></div>
                           </li>
                        </ul>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#using-separate-compilation-in-cuda">6.&nbsp;Using Separate Compilation in CUDA</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#code-changes-for-separate-compilation">6.1.&nbsp;Code Changes for Separate Compilation</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvcc-options-for-separate-compilation">6.2.&nbsp;NVCC Options for Separate Compilation</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#libraries">6.3.&nbsp;Libraries</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#examples">6.4.&nbsp;Examples</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#potential-separate-compilation-issues">6.5.&nbsp;Potential Separate Compilation Issues</a></div>
                        <ul>
                           <li>
                              <div class="section-link"><a href="#object-compatibility">6.5.1.&nbsp;Object Compatibility</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#jit-linking-not-supported">6.5.2.&nbsp;JIT Linking Support</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#implicit-cuda-host-code">6.5.3.&nbsp;Implicit CUDA Host Code</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#cuda-arch">6.5.4.&nbsp;Using __CUDA_ARCH__</a></div>
                           </li>
                           <li>
                              <div class="section-link"><a href="#library-device-code">6.5.5.&nbsp;Device Code in Libraries</a></div>
                           </li>
                        </ul>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#miscellaneous-nvcc-usage">7.&nbsp;Miscellaneous NVCC Usage</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#cross-compilation">7.1.&nbsp;Cross Compilation</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#keeping-intermediate-phase-files">7.2.&nbsp;Keeping Intermediate Phase Files</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#cleaning-generated-files">7.3.&nbsp;Cleaning Up Generated Files</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#printing-code-generation-statistics">7.4.&nbsp;Printing Code Generation Statistics</a></div>
                     </li>
                  </ul>
               </li>
            </ul>
         </nav>
         <div id="resize-nav"></div>
         <nav id="search-results">
            <h2>Search Results</h2>
            <ol></ol>
         </nav>
         
         <div id="contents-container">
            <div id="breadcrumbs-container">
               <div id="release-info">NVCC
                  (<a href="../../pdf/CUDA_Compiler_Driver_NVCC.pdf">PDF</a>)
                  -
                   
                  
                  
                  v10.1.168
                  (<a href="https://developer.nvidia.com/cuda-toolkit-archive">older</a>)
                  -
                  Last updated April 24, 2019
                  -
                  <a href="mailto:CUDAIssues@nvidia.com?subject=CUDA Toolkit Documentation Feedback: NVCC">Send Feedback</a></div>
            </div>
            <article id="contents">
               <div class="topic nested0" id="changes-from-previous-version"><a name="changes-from-previous-version" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#changes-from-previous-version" name="changes-from-previous-version" shape="rect">Changes from Previous Version</a></h2>
                  <div class="body conbody">
                     <ul class="ul">
                        <li class="li">
                           Major update to the document to reflect recent <samp class="ph codeph">nvcc</samp>
                           changes.
                           
                        </li>
                     </ul>
                  </div>
               </div>
               <div class="topic concept nested0" id="introduction"><a name="introduction" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#introduction" name="introduction" shape="rect">1.&nbsp;Introduction</a></h2>
                  <div class="topic concept nested1" id="overview"><a name="overview" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#overview" name="overview" shape="rect">1.1.&nbsp;Overview</a></h3>
                     <div class="topic concept nested2" id="cuda-programming-model"><a name="cuda-programming-model" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-programming-model" name="cuda-programming-model" shape="rect">1.1.1.&nbsp;CUDA Programming Model</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              The CUDA Toolkit targets a class of applications whose control part runs
                              as a process on a general purpose computing device, and which use one or
                              more NVIDIA GPUs as coprocessors for accelerating
                              <em class="ph i">single program, multiple data</em> (SPMD) parallel jobs.
                              Such jobs are self-contained, in the sense that they can be executed and
                              completed by a batch of GPU threads entirely without intervention by the
                              host process, thereby gaining optimal benefit from the parallel graphics
                              hardware.
                              
                           </p>
                           <p class="p">
                              The GPU code is implemented as a collection of functions in a language
                              that is essentially C++, but with some annotations for distinguishing
                              them from the host code, plus annotations for distinguishing different
                              types of data memory that exists on the GPU.
                              Such functions may have parameters, and they can be called using a
                              syntax that is very similar to regular C function calling, but slightly
                              extended for being able to specify the matrix of GPU threads that must
                              execute the called function.
                              During its life time, the host process may dispatch many parallel GPU
                              tasks.
                              
                           </p>
                           <p class="p">
                              For more information on the CUDA programming model, consult the
                              <a class="xref" href="http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html" target="_blank" shape="rect">
                                 CUDA C Programming Guide</a>.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cuda-sources"><a name="cuda-sources" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-sources" name="cuda-sources" shape="rect">1.1.2.&nbsp;CUDA Sources</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              Source files for CUDA applications consist of a mixture of conventional
                              C++ host code, plus GPU device functions.
                              The CUDA compilation trajectory separates the device functions from the
                              host code, compiles the device functions using the proprietary NVIDIA
                              compilers and assembler, compiles the host code using a C++ host
                              compiler that is available, and afterwards embeds the compiled GPU
                              functions as fatbinary images in the host object file.
                              In the linking stage, specific CUDA runtime libraries are added for
                              supporting remote SPMD procedure calling and for providing explicit GPU
                              manipulation such as allocation of GPU memory buffers and host-GPU data
                              transfer.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="purpose-of-nvcc"><a name="purpose-of-nvcc" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#purpose-of-nvcc" name="purpose-of-nvcc" shape="rect">1.1.3.&nbsp;Purpose of NVCC</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              The compilation trajectory involves several splitting, compilation,
                              preprocessing, and merging steps for each CUDA source file.
                              It is the purpose of <samp class="ph codeph">nvcc</samp>, the CUDA compiler driver, to
                              hide the intricate details of CUDA compilation from developers.
                              It accepts a range of conventional compiler options, such as for
                              defining macros and include/library paths, and for steering the
                              compilation process.
                              All non-CUDA compilation steps are forwarded to a C++ host compiler that
                              is supported by <samp class="ph codeph">nvcc</samp>, and <samp class="ph codeph">nvcc</samp>
                              translates its options to appropriate host compiler command line
                              options.
                              
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="supported-host-compilers"><a name="supported-host-compilers" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#supported-host-compilers" name="supported-host-compilers" shape="rect">1.2.&nbsp;Supported Host Compilers</a></h3>
                     <div class="body conbody">
                        <div class="p">
                           A general purpose C++ host compiler is needed by <samp class="ph codeph">nvcc</samp>
                           in the following situations:
                           
                           <ul class="ul">
                              <li class="li">
                                 During non-CUDA phases (except the run phase), because these phases
                                 will be forwarded by <samp class="ph codeph">nvcc</samp> to this compiler.
                                 
                              </li>
                              <li class="li">
                                 During CUDA phases, for several preprocessing stages and host code
                                 compilation (see also <a class="xref" href="index.html#cuda-compilation-trajectory" shape="rect">The CUDA Compilation Trajectory</a>).
                                 
                              </li>
                           </ul>
                        </div>
                        <p class="p"><samp class="ph codeph">nvcc</samp> assumes that the host compiler is installed with
                           the standard method designed by the compiler provider.
                           If the host compiler installation is non-standard, the user must make
                           sure that the environment is set appropriately and use relevant
                           <samp class="ph codeph">nvcc</samp> compile options.
                           
                        </p>
                        <div class="p">
                           The following documents provide detailed information about supported
                           host compilers:
                           
                           <ul class="ul">
                              <li class="li"><a class="xref" href="http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html" target="_blank" shape="rect">
                                    NVIDIA CUDA Installation Guide for Linux
                                    </a></li>
                              <li class="li"><a class="xref" href="http://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html" target="_blank" shape="rect">
                                    NVIDIA CUDA Installation Guide for Mac OS X
                                    </a></li>
                              <li class="li"><a class="xref" href="http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html" target="_blank" shape="rect">
                                    NVIDIA CUDA Installation Guide for Microsoft Windows
                                    </a></li>
                           </ul>
                        </div>
                        <p class="p">
                           On all platforms, the default host compiler executable
                           (<samp class="ph codeph">gcc</samp> and <samp class="ph codeph">g++</samp> on Linux,
                           <samp class="ph codeph">clang</samp> and <samp class="ph codeph">clang++</samp> on Mac OS X, and
                           <samp class="ph codeph">cl.exe</samp> on Windows) found in the current execution
                           search path will be used, unless specified otherwise with appropriate
                           options (see <a class="xref" href="index.html#file-and-path-specifications" shape="rect">File and Path Specifications</a>).
                           
                        </p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="compilation-phases"><a name="compilation-phases" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#compilation-phases" name="compilation-phases" shape="rect">2.&nbsp;Compilation Phases</a></h2>
                  <div class="topic concept nested1" id="nvcc-identification-macro"><a name="nvcc-identification-macro" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvcc-identification-macro" name="nvcc-identification-macro" shape="rect">2.1.&nbsp;NVCC Identification Macro</a></h3>
                     <div class="body conbody">
                        <div class="p"><samp class="ph codeph">nvcc</samp> predefines the following macros:
                           
                           <dl class="dl">
                              <dt class="dt dlterm"><samp class="ph codeph">__NVCC__</samp></dt>
                              <dd class="dd">Defined when compiling C/C++/CUDA source files.</dd>
                              <dt class="dt dlterm"><samp class="ph codeph">__CUDACC__</samp></dt>
                              <dd class="dd">Defined when compiling CUDA source files.</dd>
                              <dt class="dt dlterm"><samp class="ph codeph">__CUDACC_RDC__</samp></dt>
                              <dd class="dd">
                                 Defined when compiling CUDA sources files in relocatable device
                                 code mode (see
                                 <a class="xref" href="index.html#nvcc-options-for-separate-compilation" shape="rect">NVCC Options for Separate Compilation</a>).
                                 
                              </dd>
                              <dt class="dt dlterm"><samp class="ph codeph">__CUDACC_DEBUG__</samp></dt>
                              <dd class="dd">
                                 Defined when compiler CUDA source files in the device-debug mode
                                 (see
                                 <a class="xref" href="index.html#options-for-altering-compiler-linker-behavior" shape="rect">Options for Specifying Behavior of Compiler/Linker</a>).
                                 
                              </dd>
                              <dt class="dt dlterm"><samp class="ph codeph">__CUDACC_RELAXED_CONSTEXPR__</samp></dt>
                              <dd class="dd">
                                 Defined when the <samp class="ph codeph">--expt-relaxed-constexpr</samp> flag is specified
                                 on the command line. Refer to 
                                 <a class="xref" href="http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html" target="_blank" shape="rect">CUDA C Programming Guide
                                    </a> for more details.
                                 
                              </dd>
                              <dt class="dt dlterm"><samp class="ph codeph">__CUDACC_EXTENDED_LAMBDA__</samp></dt>
                              <dd class="dd">
                                 Defined when the <samp class="ph codeph">--expt-extended-lambda</samp> flag is specified
                                 on the command line. Refer to 
                                 <a class="xref" href="http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html" target="_blank" shape="rect">CUDA C Programming Guide
                                    </a> for more details.
                                 
                              </dd>
                              <dt class="dt dlterm"><samp class="ph codeph">__CUDACC_VER_MAJOR__</samp></dt>
                              <dd class="dd">
                                 Defined with the major version number of <samp class="ph codeph">nvcc</samp>.
                                 
                              </dd>
                              <dt class="dt dlterm"><samp class="ph codeph">__CUDACC_VER_MINOR__</samp></dt>
                              <dd class="dd">
                                 Defined with the minor version number of <samp class="ph codeph">nvcc</samp>.
                                 
                              </dd>
                              <dt class="dt dlterm"><samp class="ph codeph">__CUDACC_VER_BUILD__</samp></dt>
                              <dd class="dd">
                                 Defined with the build version number of <samp class="ph codeph">nvcc</samp>.
                                 
                              </dd>
                           </dl>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvcc-phases"><a name="nvcc-phases" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvcc-phases" name="nvcc-phases" shape="rect">2.2.&nbsp;NVCC Phases</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           A compilation phase is the a logical translation step that can be
                           selected by command line options to <samp class="ph codeph">nvcc</samp>.
                           A single compilation phase can still be broken up by
                           <samp class="ph codeph">nvcc</samp> into smaller steps, but these smaller steps are
                           just implementations of the phase: they depend on seemingly arbitrary
                           capabilities of the internal tools that <samp class="ph codeph">nvcc</samp> uses, and
                           all of these internals may change with a new release of the CUDA
                           Toolkit.
                           Hence, only compilation phases are stable across releases, and although
                           <samp class="ph codeph">nvcc</samp> provides options to display the compilation steps
                           that it executes, these are for debugging purposes only and must not be
                           copied and used into build scripts.
                           
                        </p>
                        <p class="p"><samp class="ph codeph">nvcc</samp> phases are selected by a combination of command
                           line options and input file name suffixes, and the execution of these
                           phases may be modified by other command line options.
                           In phase selection, the input file suffix defines the phase input, while
                           the command line option defines the required output of the phase.
                           
                        </p>
                        <p class="p">
                           The following paragraphs will list the recognized file name suffixes and
                           the supported compilation phases.
                           A full explanation of the <samp class="ph codeph">nvcc</samp> command line options can
                           be found in <a class="xref" href="index.html#nvcc-command-options" shape="rect">NVCC Command Options</a>.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="supported-input-file-suffixes"><a name="supported-input-file-suffixes" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#supported-input-file-suffixes" name="supported-input-file-suffixes" shape="rect">2.3.&nbsp;Supported Input File Suffixes</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The following table defines how <samp class="ph codeph">nvcc</samp> interprets its
                           input files:
                           
                        </p>
                        <div class="tablenoborder">
                           <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e383" rowspan="1" colspan="1">Input File Prefix</th>
                                    <th class="entry" valign="top" width="66.66666666666666%" id="d54e386" rowspan="1" colspan="1">Description</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.cu</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">
                                       CUDA source file, containing host code and device functions
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.c</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">C source file</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.cc</samp>,
                                       <samp class="ph codeph">.cxx</samp>,
                                       <samp class="ph codeph">.cpp</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">C++ source file</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.ptx</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">
                                       PTX intermediate assembly file (see
                                       <a class="xref" href="index.html#cuda-compilation-trajectory__cuda-compilation-from-cu-to-executable" shape="rect">Figure 1</a>)
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.cubin</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">
                                       CUDA device code binary file (CUBIN) for a single GPU architecture (see
                                       <a class="xref" href="index.html#cuda-compilation-trajectory__cuda-compilation-from-cu-to-executable" shape="rect">Figure 1</a>)
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.fatbin</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">
                                       CUDA fat binary file that may contain multiple PTX and CUBIN
                                       files (see
                                       <a class="xref" href="index.html#cuda-compilation-trajectory__cuda-compilation-from-cu-to-executable" shape="rect">Figure 1</a>)
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.o</samp>, <samp class="ph codeph">.obj</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">Object file</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.a</samp>, <samp class="ph codeph">.lib</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">Library file</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.res</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">Resource file</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e383" rowspan="1" colspan="1"><samp class="ph codeph">.so</samp></td>
                                    <td class="entry" valign="top" width="66.66666666666666%" headers="d54e386" rowspan="1" colspan="1">Shared object file</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <p class="p">
                           Note that <samp class="ph codeph">nvcc</samp> does not make any distinction
                           between object, library or resource files.
                           It just passes files of these types to the linker when the linking
                           phase is executed.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="supported-phases"><a name="supported-phases" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#supported-phases" name="supported-phases" shape="rect">2.4.&nbsp;Supported Phases</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The following table specifies the supported compilation phases, plus
                           the option to <samp class="ph codeph">nvcc</samp> that enables execution of this
                           phase.
                           It also lists the default name of the output file generated by this
                           phase, which will take effect when no explicit output file name is
                           specified using option
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--output-file</a></samp>:
                           
                        </p>
                        <div class="tablenoborder">
                           <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" rowspan="2" valign="middle" width="16.666666666666664%" id="d54e561" colspan="1">Phase</th>
                                    <th class="entry" colspan="2" valign="top" id="d54e564" rowspan="1"><samp class="ph codeph">nvcc</samp> Option
                                       
                                    </th>
                                    <th class="entry" rowspan="2" valign="middle" width="50%" id="d54e570" colspan="1">
                                       Default Output File Name
                                       
                                    </th>
                                 </tr>
                                 <tr class="row">
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e576" rowspan="1" colspan="1">Long Name</th>
                                    <th class="entry" valign="top" width="16.666666666666664%" id="d54e579" rowspan="1" colspan="1">Short Name</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">CUDA compilation to C/C++ source file</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--cuda</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-cuda</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1"><samp class="ph codeph">.cpp.ii</samp> appended to source file name, as in
                                       <samp class="ph codeph"><em class="ph i">x</em>.cu.cpp.ii</samp>.
                                       This output file can be compiled by the host compiler that was
                                       used by <samp class="ph codeph">nvcc</samp> to preprocess the
                                       <samp class="ph codeph">.cu</samp> file.
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">C/C++ preprocessing</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--preprocess</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-E</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1">&lt;<em class="ph i">result on standard output</em>&gt;
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1"> C/C++ compilation to object file </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--compile</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-c</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1">
                                       Source file name with suffix replaced by <samp class="ph codeph">o</samp> on
                                       Linux and Mac OS X, or <samp class="ph codeph">obj</samp> on Windows
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">Cubin generation from CUDA source files</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--cubin</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-cubin</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1">
                                       Source file name with suffix replaced by
                                       <samp class="ph codeph">cubin</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">
                                       Cubin generation from PTX intermediate
                                       files.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--cubin</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-cubin</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1">
                                       Source file name with suffix replaced by
                                       <samp class="ph codeph">cubin</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">PTX generation from CUDA source files</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--ptx</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-ptx</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1">
                                       Source file name with suffix replaced by <samp class="ph codeph">ptx</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">
                                       Fatbinary generation from source, PTX or cubin files
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--fatbin</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-fatbin</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1">
                                       Source file name with suffix replaced by
                                       <samp class="ph codeph">fatbin</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">Linking relocatable device code.</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--device-link</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-dlink</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1"><samp class="ph codeph">a_dlink.obj</samp> on Windows or
                                       <samp class="ph codeph">a_dlink.o</samp> on other platforms
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">
                                       Cubin generation from linked relocatable device code.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--device-link</a></samp><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--cubin</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-dlink</a></samp><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-cubin</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1"><samp class="ph codeph">a_dlink.cubin</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">
                                       Fatbinary generation from linked relocatable device code
                                       
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--device-link</a></samp><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--fatbin</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-dlink</a></samp><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-fatbin</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1"><samp class="ph codeph">a_dlink.fatbin</samp></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">Linking an executable</td>
                                    <td class="entry" colspan="2" valign="top" headers="d54e564 d54e579" rowspan="1">
                                       &lt;<em class="ph i">no phase option</em>&gt;
                                       
                                    </td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1"><samp class="ph codeph">a.exe</samp> on Windows or <samp class="ph codeph">a.out</samp>
                                       on other platforms
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">Constructing an object file archive, or library</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--lib</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-lib</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1"><samp class="ph codeph">a.lib</samp> on Windows or <samp class="ph codeph">a.a</samp> on
                                       other platforms
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1"><samp class="ph codeph">make</samp> dependency generation
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--generate-dependencies</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-M</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1">&lt;<em class="ph i">result on standard output</em>&gt;
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1"><samp class="ph codeph">make</samp> dependency generation without headers in system paths.
                                    </td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--generate-nonsystem-dependencies</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-MM</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1">&lt;<em class="ph i">result on standard output</em>&gt;
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e561 d54e576" rowspan="1" colspan="1">Running an executable</td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564 d54e579" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--run</a></samp></td>
                                    <td class="entry" valign="top" width="16.666666666666664%" headers="d54e564" rowspan="1" colspan="1"><samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">-run</a></samp></td>
                                    <td class="entry" valign="top" width="50%" headers="d54e570" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <div class="p"><strong class="ph b">Notes:</strong><ul class="ul">
                              <li class="li">
                                 The last phase in this list is more of a convenience phase.
                                 It allows running the compiled and linked executable without
                                 having to explicitly set the library path to the CUDA dynamic
                                 libraries.
                                 
                              </li>
                              <li class="li">
                                 Unless a phase option is specified, <samp class="ph codeph">nvcc</samp> will
                                 compile and link all its input files.
                                 
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic reference nested0" id="cuda-compilation-trajectory"><a name="cuda-compilation-trajectory" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#cuda-compilation-trajectory" name="cuda-compilation-trajectory" shape="rect">3.&nbsp;The CUDA Compilation Trajectory</a></h2>
                  <div class="body refbody">
                     <div class="section">
                        <p class="p">
                           CUDA compilation works as follows: the input program is
                           preprocessed for device compilation compilation and is compiled to CUDA
                           binary (<samp class="ph codeph">cubin</samp>) and/or PTX intermediate code, which are
                           placed in a fatbinary.
                           The input program is preprocessed once again for host compilation and is
                           synthesized to embed the fatbinary and transform CUDA specific C++
                           extensions into standard C++ constructs.
                           Then the C++ host compiler compiles the synthesized host code with the
                           embedded fatbinary into a host object.
                           The exact steps that are followed to achieve this are displayed in
                           <a class="xref" href="index.html#cuda-compilation-trajectory__cuda-compilation-from-cu-to-executable" shape="rect">Figure 1</a>.
                           
                        </p>
                        <p class="p">
                           The embedded fatbinary is inspected by the CUDA runtime system whenever
                           the device code is launched by the host program to obtain an appropriate
                           fatbinary image for the current GPU.
                           
                        </p>
                        <p class="p">
                           CUDA programs are compiled in the whole program compilation mode by
                           default, i.e., the device code cannot reference an entity from a
                           separate file.
                           In the whole program compilation mode, device link steps have no effect.
                           For more information on the separate compilation and the whole program
                           compilation, see
                           <a class="xref" href="index.html#using-separate-compilation-in-cuda" shape="rect">Using Separate Compilation in CUDA</a>.
                           
                        </p>
                        <div class="fig fignone" id="cuda-compilation-trajectory__cuda-compilation-from-cu-to-executable"><a name="cuda-compilation-trajectory__cuda-compilation-from-cu-to-executable" shape="rect">
                              <!-- --></a><span class="figcap">Figure 1. CUDA Compilation Trajectory</span><br clear="none"></br><div class="imagecenter"><img class="image imagecenter" src="graphics/cuda-compilation-from-cu-to-executable.png" alt="CUDA Compilation from .cu to an executable"></img></div><br clear="none"></br></div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="nvcc-command-options"><a name="nvcc-command-options" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#nvcc-command-options" name="nvcc-command-options" shape="rect">4.&nbsp;NVCC Command Options</a></h2>
                  <div class="topic concept nested1" id="command-option-types-and-notation"><a name="command-option-types-and-notation" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-option-types-and-notation" name="command-option-types-and-notation" shape="rect">4.1.&nbsp;Command Option Types and Notation</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           Each <samp class="ph codeph">nvcc</samp> option has a long name and a short name,
                           which are interchangeable with each other.
                           These two variants are distinguished by the number of hyphens that must
                           precede the option name: long names must be preceded by two hyphens,
                           while short names must be preceded by a single hyphen.
                           For example,
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">-I</a></samp>
                           is the short name of
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--include-path</a></samp>.
                           Long options are intended for use in build scripts, where size of the
                           option is less important than descriptive value.
                           In contrast, short options are intended for interactive use.
                           
                        </p>
                        <p class="p"><samp class="ph codeph">nvcc</samp> recognizes three types of command options: boolean
                           options, single value options, and list options.
                           
                        </p>
                        <p class="p">
                           Boolean options do not have an argument; they are either specified on a
                           command line or not.
                           Single value options must be specified at most once, and list options
                           may be repeated.
                           Examples of each of these option types are, respectively:
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--verbose</a></samp>
                           (switch to verbose mode),
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--output-file</a></samp>
                           (specify output file), and
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--include-path</a></samp>
                           (specify include path).
                           
                        </p>
                        <p class="p">
                           Single value options and list options must have arguments, which must
                           follow the name of the option itself by either one of more spaces or an
                           equals character.
                           When a one-character short name such as
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">-I</a></samp>,
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">-l</a></samp>,
                           and
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">-L</a></samp>
                           is used, the value of the option may also immediately follow the option
                           itself without being seperated by spaces or an equal character.
                           The individual values of list options may be separated by commas in a
                           single instance of the option, or the option may be repeated, or any
                           combination of these two cases.
                           
                        </p>
                        <p class="p">
                           Hence, for the two sample options mentioned above that may take values,
                           the following notations are legal:
                           
                        </p><pre class="pre screen" xml:space="preserve">-o file
-o=file
-Idir1,dir2 -I=dir3 -I dir4,dir5</pre><p class="p">
                           Long option names are used throughout the document, unless specified
                           otherwise, however, short names can be used instead of long names to have
                           the same effect.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="command-option-description"><a name="command-option-description" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-option-description" name="command-option-description" shape="rect">4.2.&nbsp;Command Option Description</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           This section presents tables of <samp class="ph codeph">nvcc</samp> options.
                           The option type in the tables can be recognized as follows: boolean
                           options do not have arguments specified in the first column, while the
                           other two types do.
                           List options can be recognized by the repeat indicator
                           <samp class="ph codeph"><em class="ph i">,...</em></samp> at the end of the argument.
                           
                        </p>
                        <p class="p">
                           Long options are described in the first columns of the options tables,
                           and short options occupy the second columns.
                           
                        </p>
                     </div>
                     <div class="topic reference nested2" id="file-and-path-specifications"><a name="file-and-path-specifications" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications" name="file-and-path-specifications" shape="rect">4.2.1.&nbsp;File and Path Specifications</a></h3>
                        <div class="topic reference nested3" id="file-and-path-specifications-output-file"><a name="file-and-path-specifications-output-file" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-output-file" name="file-and-path-specifications-output-file" shape="rect">4.2.1.1.&nbsp;<samp class="ph codeph">--output-file <em class="ph i">file</em></samp> (<samp class="ph codeph">-o</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify name and location of the output file.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-pre-include"><a name="file-and-path-specifications-pre-include" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-pre-include" name="file-and-path-specifications-pre-include" shape="rect">4.2.1.2.&nbsp;<samp class="ph codeph">--pre-include <em class="ph i">file,...</em></samp> (<samp class="ph codeph">-include</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify header files that must be pre-included during
                                       preprocessing.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-library"><a name="file-and-path-specifications-library" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-library" name="file-and-path-specifications-library" shape="rect">4.2.1.3.&nbsp;<samp class="ph codeph">--library <em class="ph i">library,...</em></samp> (<samp class="ph codeph">-l</samp>) </a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify libraries to be used in the linking stage without
                                       the library file extension.
                                       </em></p>
                                 <p class="p">
                                    The libraries are searched for on the library search paths
                                    that have been specified using option
                                    <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--library-path</a></samp>
                                    (see <a class="xref" href="index.html#libraries" shape="rect">Libraries</a>).
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-define-macro"><a name="file-and-path-specifications-define-macro" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-define-macro" name="file-and-path-specifications-define-macro" shape="rect">4.2.1.4.&nbsp;<samp class="ph codeph">--define-macro <em class="ph i">def,...</em></samp> (<samp class="ph codeph">-D</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Define macros to be used during preprocessing.
                                       </em></p>
                                 <div class="p"><em class="ph i">def</em> can be either <em class="ph i">name</em> or <em class="ph i">name</em>=<em class="ph i">definition</em>.
                                    
                                    <ul class="ul">
                                       <li class="li"><em class="ph i">name</em><ul class="ul">
                                             <li class="li">Predefine <em class="ph i">name</em> as a macro.
                                             </li>
                                          </ul>
                                       </li>
                                       <li class="li"><em class="ph i">name</em>=<em class="ph i">definition</em><ul class="ul">
                                             <li class="li">
                                                The contents of <em class="ph i">definition</em> are tokenized and
                                                preprocessed as if they appear during translation phase
                                                three in a <samp class="ph codeph">#define</samp> directive.
                                                The definition will be truncated by embedded new line
                                                characters.
                                                
                                             </li>
                                          </ul>
                                       </li>
                                    </ul>
                                 </div>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-undefine-macro"><a name="file-and-path-specifications-undefine-macro" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-undefine-macro" name="file-and-path-specifications-undefine-macro" shape="rect">4.2.1.5.&nbsp;<samp class="ph codeph">--undefine-macro <em class="ph i">def,...</em></samp> (<samp class="ph codeph">-U</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Undefine an existing macro during preprocessing or
                                       compilation.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-include-path"><a name="file-and-path-specifications-include-path" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-include-path" name="file-and-path-specifications-include-path" shape="rect">4.2.1.6.&nbsp;<samp class="ph codeph">--include-path <em class="ph i">path,...</em></samp> (<samp class="ph codeph">-I</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify include search paths.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-system-include"><a name="file-and-path-specifications-system-include" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-system-include" name="file-and-path-specifications-system-include" shape="rect">4.2.1.7.&nbsp;<samp class="ph codeph">--system-include <em class="ph i">path,...</em></samp> (<samp class="ph codeph">-isystem</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify system include search paths.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-library-path"><a name="file-and-path-specifications-library-path" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-library-path" name="file-and-path-specifications-library-path" shape="rect">4.2.1.8.&nbsp;<samp class="ph codeph">--library-path <em class="ph i">path,...</em></samp> (<samp class="ph codeph">-L</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify library search paths (see <a class="xref" href="index.html#libraries" shape="rect">Libraries</a>).
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-output-directory"><a name="file-and-path-specifications-output-directory" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-output-directory" name="file-and-path-specifications-output-directory" shape="rect">4.2.1.9.&nbsp;<samp class="ph codeph">--output-directory <em class="ph i">directory</em></samp> (<samp class="ph codeph">-odir</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the directory of the output file.
                                       </em></p>
                                 <p class="p">
                                    This option is intended for letting the dependency generation
                                    step (see
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--generate-dependencies</a></samp>)
                                    generate a rule that defines the target object file in the
                                    proper directory.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-dependency-output"><a name="file-and-path-specifications-dependency-output" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-dependency-output" name="file-and-path-specifications-dependency-output" shape="rect">4.2.1.10.&nbsp;<samp class="ph codeph">--dependency-output <em class="ph i">file</em></samp> (<samp class="ph codeph">-MF</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the dependency output file.
                                       </em></p>
                                 <p class="p">
                                    This option specifies the output file for the dependency generation step (see
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--generate-dependencies</a></samp>).
                                    The option <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--generate-dependencies</a></samp>
                                    or <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--generate-nonystem-dependencies</a></samp>
                                    must be specified if a dependency output file is set.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-compiler-bindir"><a name="file-and-path-specifications-compiler-bindir" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-compiler-bindir" name="file-and-path-specifications-compiler-bindir" shape="rect">4.2.1.11.&nbsp;<samp class="ph codeph">--compiler-bindir <em class="ph i">directory</em></samp> (<samp class="ph codeph">-ccbin</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the directory in which the compiler executable
                                       resides.
                                       </em></p>
                                 <p class="p">
                                    The host compiler executable name can be also specified to
                                    ensure that the correct host compiler is selected.
                                    In addition, driver prefix options
                                    (<samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--input-drive-prefix</a></samp>,
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--dependency-drive-prefix</a></samp>,
                                    or
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--drive-prefix</a></samp>)
                                    may need to be specified, if <samp class="ph codeph">nvcc</samp> is executed
                                    in a Cygwin shell or a MinGW shell on Windows.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-cudart"><a name="file-and-path-specifications-cudart" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-cudart" name="file-and-path-specifications-cudart" shape="rect">4.2.1.12.&nbsp;<samp class="ph codeph">--cudart</samp> {<samp class="ph codeph">none</samp>|<samp class="ph codeph">shared</samp>|<samp class="ph codeph">static</samp>} (<samp class="ph codeph">-cudart</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the type of CUDA runtime library to be used: no CUDA
                                       runtime library, shared/dynamic CUDA runtime library, or
                                       static CUDA runtime library.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">none</samp></li>
                                    <li class="li"><samp class="ph codeph">shared</samp></li>
                                    <li class="li"><samp class="ph codeph">static</samp></li>
                                 </ul>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    The static CUDA runtime library is used by default.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="file-and-path-specifications-libdevice-directory"><a name="file-and-path-specifications-libdevice-directory" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#file-and-path-specifications-libdevice-directory" name="file-and-path-specifications-libdevice-directory" shape="rect">4.2.1.13.&nbsp;<samp class="ph codeph">--libdevice-directory <em class="ph i">directory</em></samp> (<samp class="ph codeph">-ldir</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the directory that contains the libdevice library
                                       files.
                                       </em></p>
                                 <p class="p">
                                    Libdevice library files are located in the
                                    <samp class="ph codeph">nvvm/libdevice</samp> directory in the CUDA Toolkit.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                     </div>
                     <div class="topic reference nested2" id="options-for-specifying-compilation-phase"><a name="options-for-specifying-compilation-phase" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase" name="options-for-specifying-compilation-phase" shape="rect">4.2.2.&nbsp;Options for Specifying the Compilation Phase</a></h3>
                        <div class="body refbody">
                           <div class="section">
                              <p class="p">
                                 Options of this category specify up to which stage the input files
                                 must be compiled.
                                 
                              </p>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-link"><a name="options-for-specifying-compilation-phase-link" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-link" name="options-for-specifying-compilation-phase-link" shape="rect">4.2.2.1.&nbsp;<samp class="ph codeph">--link</samp> (<samp class="ph codeph">-link</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the default behavior: compile and link all input files.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p"><samp class="ph codeph">a.exe</samp> on Windows or
                                    <samp class="ph codeph">a.out</samp> on other platforms
                                    is used as the default output file name.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-lib"><a name="options-for-specifying-compilation-phase-lib" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-lib" name="options-for-specifying-compilation-phase-lib" shape="rect">4.2.2.2.&nbsp;<samp class="ph codeph">--lib</samp> (<samp class="ph codeph">-lib</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Compile all input files into object files, if necessary,
                                       and add the results to the specified library output file. 
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p"><samp class="ph codeph">a.lib</samp> on Windows or
                                    <samp class="ph codeph">a.a</samp> on other platforms
                                    is used as the default output file name.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-device-link"><a name="options-for-specifying-compilation-phase-device-link" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-device-link" name="options-for-specifying-compilation-phase-device-link" shape="rect">4.2.2.3.&nbsp;<samp class="ph codeph">--device-link</samp> (<samp class="ph codeph">-dlink</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Link object files with relocatable device code and
                                       <samp class="ph codeph">.ptx</samp>, <samp class="ph codeph">.cubin</samp>, and
                                       <samp class="ph codeph">.fatbin</samp>
                                       files into an object file with executable device code, which
                                       can be passed to the host linker.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p"><samp class="ph codeph">a_dlink.obj</samp> on Windows or
                                    <samp class="ph codeph">a_dlink.o</samp> on other platforms
                                    is used as the default output file name.
                                    When this option is used in conjunction with
                                    <samp class="ph codeph">--fatbin</samp>, <samp class="ph codeph">a_dlink.fatbin</samp> is used
                                    as the default output file name.
                                    When this option is used in conjunction with
                                    <samp class="ph codeph">--cubin</samp>, <samp class="ph codeph">a_dlink.cubin</samp> is used
                                    as the default output file name.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-device-c"><a name="options-for-specifying-compilation-phase-device-c" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-device-c" name="options-for-specifying-compilation-phase-device-c" shape="rect">4.2.2.4.&nbsp;<samp class="ph codeph">--device-c</samp> (<samp class="ph codeph">-dc</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Compile each
                                       <samp class="ph codeph">.c</samp>, <samp class="ph codeph">.cc</samp>, <samp class="ph codeph">.cpp</samp>,
                                       <samp class="ph codeph">.cxx</samp>, and <samp class="ph codeph">.cu</samp>
                                       input file into an object file that contains relocatable
                                       device code.
                                       </em></p>
                                 <p class="p">
                                    It is equivalent to
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--relocatable-device-code</a>=true
                                       <a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--compile</a></samp>.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p">
                                    The source file name extension is replaced by <samp class="ph codeph">.obj</samp>
                                    on Windows and <samp class="ph codeph">.o</samp> on other platforms
                                    to create the default output file name.
                                    For example, the default output file name for <samp class="ph codeph">x.cu</samp>
                                    is <samp class="ph codeph">x.obj</samp> on Windows and <samp class="ph codeph">x.o</samp> on
                                    other platforms.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-device-w"><a name="options-for-specifying-compilation-phase-device-w" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-device-w" name="options-for-specifying-compilation-phase-device-w" shape="rect">4.2.2.5.&nbsp;<samp class="ph codeph">--device-w</samp> (<samp class="ph codeph">-dw</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Compile each
                                       <samp class="ph codeph">.c</samp>, <samp class="ph codeph">.cc</samp>, <samp class="ph codeph">.cpp</samp>,
                                       <samp class="ph codeph">.cxx</samp>, and <samp class="ph codeph">.cu</samp>
                                       input file into an object file that contains executable device
                                       code.
                                       </em></p>
                                 <p class="p">
                                    It is equivalent to
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--relocatable-device-code</a>=false
                                       <a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--compile</a></samp>.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p">
                                    The source file name extension is replaced by <samp class="ph codeph">.obj</samp>
                                    on Windows and <samp class="ph codeph">.o</samp> on other platforms
                                    to create the default output file name.
                                    For example, the default output file name for <samp class="ph codeph">x.cu</samp>
                                    is <samp class="ph codeph">x.obj</samp> on Windows and <samp class="ph codeph">x.o</samp> on
                                    other platforms.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-cuda"><a name="options-for-specifying-compilation-phase-cuda" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-cuda" name="options-for-specifying-compilation-phase-cuda" shape="rect">4.2.2.6.&nbsp;<samp class="ph codeph">--cuda</samp> (<samp class="ph codeph">-cuda</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Compile each <samp class="ph codeph">.cu</samp> input file to a
                                       <samp class="ph codeph">.cu.cpp.ii</samp> file.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p"><samp class="ph codeph">.cu.cpp.ii</samp> is appended to the basename of the
                                    source file name to create the default output file name.
                                    For example, the default output file name for <samp class="ph codeph">x.cu</samp>
                                    is <samp class="ph codeph">x.cu.cpp.ii</samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-compile"><a name="options-for-specifying-compilation-phase-compile" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-compile" name="options-for-specifying-compilation-phase-compile" shape="rect">4.2.2.7.&nbsp;<samp class="ph codeph">--compile</samp> (<samp class="ph codeph">-c</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Compile each
                                       <samp class="ph codeph">.c</samp>, <samp class="ph codeph">.cc</samp>, <samp class="ph codeph">.cpp</samp>,
                                       <samp class="ph codeph">.cxx</samp>, and <samp class="ph codeph">.cu</samp> input file into
                                       an object file.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p">
                                    The source file name extension is replaced by <samp class="ph codeph">.obj</samp>
                                    on Windows and <samp class="ph codeph">.o</samp> on other platforms
                                    to create the default output file name.
                                    For example, the default output file name for <samp class="ph codeph">x.cu</samp>
                                    is <samp class="ph codeph">x.obj</samp> on Windows and <samp class="ph codeph">x.o</samp> on
                                    other platforms.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-fatbin"><a name="options-for-specifying-compilation-phase-fatbin" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-fatbin" name="options-for-specifying-compilation-phase-fatbin" shape="rect">4.2.2.8.&nbsp;<samp class="ph codeph">--fatbin</samp> (<samp class="ph codeph">-fatbin</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Compile all
                                       <samp class="ph codeph">.cu</samp>, <samp class="ph codeph">.ptx</samp>, and
                                       <samp class="ph codeph">.cubin</samp> input files to device-only
                                       <samp class="ph codeph">.fatbin</samp> files.
                                       </em></p>
                                 <p class="p"><samp class="ph codeph">nvcc</samp> discards the host code for
                                    each <samp class="ph codeph">.cu</samp> input file with this
                                    option.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p">
                                    The source file name extension is replaced by <samp class="ph codeph">.fatbin</samp>
                                    to create the default output file name.
                                    For example, the default output file name for <samp class="ph codeph">x.cu</samp>
                                    is <samp class="ph codeph">x.fatbin</samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-cubin"><a name="options-for-specifying-compilation-phase-cubin" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-cubin" name="options-for-specifying-compilation-phase-cubin" shape="rect">4.2.2.9.&nbsp;<samp class="ph codeph">--cubin</samp> (<samp class="ph codeph">-cubin</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Compile all
                                       <samp class="ph codeph">.cu</samp> and <samp class="ph codeph">.ptx</samp>
                                       input files to device-only <samp class="ph codeph">.cubin</samp> files.
                                       </em></p>
                                 <p class="p"><samp class="ph codeph">nvcc</samp> discards the host code for
                                    each <samp class="ph codeph">.cu</samp> input file with this
                                    option.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p">
                                    The source file name extension is replaced by <samp class="ph codeph">.cubin</samp>
                                    to create the default output file name.
                                    For example, the default output file name for <samp class="ph codeph">x.cu</samp>
                                    is <samp class="ph codeph">x.cubin</samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-ptx"><a name="options-for-specifying-compilation-phase-ptx" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-ptx" name="options-for-specifying-compilation-phase-ptx" shape="rect">4.2.2.10.&nbsp;<samp class="ph codeph">--ptx</samp> (<samp class="ph codeph">-ptx</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Compile all <samp class="ph codeph">.cu</samp> input files to
                                       device-only <samp class="ph codeph">.ptx</samp> files.
                                       </em></p>
                                 <p class="p"><samp class="ph codeph">nvcc</samp> discards the host code for
                                    each <samp class="ph codeph">.cu</samp> input file with this
                                    option.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p">
                                    The source file name extension is replaced by <samp class="ph codeph">.ptx</samp>
                                    to create the default output file name.
                                    For example, the default output file name for <samp class="ph codeph">x.cu</samp>
                                    is <samp class="ph codeph">x.ptx</samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-preprocess"><a name="options-for-specifying-compilation-phase-preprocess" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-preprocess" name="options-for-specifying-compilation-phase-preprocess" shape="rect">4.2.2.11.&nbsp;<samp class="ph codeph">--preprocess</samp> (<samp class="ph codeph">-E</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Preprocess all <samp class="ph codeph">.c</samp>, <samp class="ph codeph">.cc</samp>,
                                       <samp class="ph codeph">.cpp</samp>, <samp class="ph codeph">.cxx</samp>,
                                       and <samp class="ph codeph">.cu</samp> input files.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p">
                                    The output is generated in <em class="ph i">stdout</em> by default.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-generate-dependencies"><a name="options-for-specifying-compilation-phase-generate-dependencies" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-generate-dependencies" name="options-for-specifying-compilation-phase-generate-dependencies" shape="rect">4.2.2.12.&nbsp;<samp class="ph codeph">--generate-dependencies</samp> (<samp class="ph codeph">-M</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Generate a dependency file that can be included in a
                                       <samp class="ph codeph">Makefile</samp> for the
                                       <samp class="ph codeph">.c</samp>, <samp class="ph codeph">.cc</samp>, <samp class="ph codeph">.cpp</samp>,
                                       <samp class="ph codeph">.cxx</samp>, and <samp class="ph codeph">.cu</samp>
                                       input file.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p">
                                    The output is generated in <em class="ph i">stdout</em> by default.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-generate-nonsystem-dependencies"><a name="options-for-specifying-compilation-phase-generate-nonsystem-dependencies" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-generate-nonsystem-dependencies" name="options-for-specifying-compilation-phase-generate-nonsystem-dependencies" shape="rect">4.2.2.13.&nbsp;<samp class="ph codeph">--generate-nonsystem-dependencies</samp> (<samp class="ph codeph">-MM</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Same as <samp class="ph codeph">--generate-dependencies</samp> but skip headers
                                       files found in system directories (Linux only).
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default Output File Name</h5>
                                 <p class="p">
                                    The output is generated in <em class="ph i">stdout</em> by default.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-specifying-compilation-phase-run"><a name="options-for-specifying-compilation-phase-run" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-specifying-compilation-phase-run" name="options-for-specifying-compilation-phase-run" shape="rect">4.2.2.14.&nbsp;<samp class="ph codeph">--run</samp> (<samp class="ph codeph">-run</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Compile and link all input files into an executable,
                                       and executes it.
                                       </em></p>
                                 <p class="p">
                                    When the input is a single executable,
                                    it is executed without any compilation or linking.
                                    This step is intended for developers who do not want to be bothered
                                    with setting the necessary environment variables;
                                    these are set temporarily by <samp class="ph codeph">nvcc</samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                     </div>
                     <div class="topic reference nested2" id="options-for-altering-compiler-linker-behavior"><a name="options-for-altering-compiler-linker-behavior" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior" name="options-for-altering-compiler-linker-behavior" shape="rect">4.2.3.&nbsp;Options for Specifying Behavior of Compiler/Linker</a></h3>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-profile"><a name="options-for-altering-compiler-linker-behavior-profile" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-profile" name="options-for-altering-compiler-linker-behavior-profile" shape="rect">4.2.3.1.&nbsp;<samp class="ph codeph">--profile</samp> (<samp class="ph codeph">-pg</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Instrument generated code/executable for use by
                                       <samp class="ph codeph">gprof</samp>.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-debug"><a name="options-for-altering-compiler-linker-behavior-debug" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-debug" name="options-for-altering-compiler-linker-behavior-debug" shape="rect">4.2.3.2.&nbsp;<samp class="ph codeph">--debug</samp> (<samp class="ph codeph">-g</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Generate debug information for host code.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-device-debug"><a name="options-for-altering-compiler-linker-behavior-device-debug" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-device-debug" name="options-for-altering-compiler-linker-behavior-device-debug" shape="rect">4.2.3.3.&nbsp;<samp class="ph codeph">--device-debug</samp> (<samp class="ph codeph">-G</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Generate debug information for device code.
                                       </em></p>
                                 <p class="p">
                                    This option turns off all optimizations on device code.
                                    It is not intended for profiling; use
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-altering-compiler-linker-behavior" shape="rect">--generate-line-info</a></samp>
                                    instead for profiling.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-extensible-whole-program"><a name="options-for-altering-compiler-linker-behavior-extensible-whole-program" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-extensible-whole-program" name="options-for-altering-compiler-linker-behavior-extensible-whole-program" shape="rect">4.2.3.4.&nbsp;<samp class="ph codeph">--extensible-whole-program</samp> (<samp class="ph codeph">-ewp</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Generate extensible whole program device code, which allows some
                                       calls to not be resolved until linking with libcudadevrt.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-generate-line-info"><a name="options-for-altering-compiler-linker-behavior-generate-line-info" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-generate-line-info" name="options-for-altering-compiler-linker-behavior-generate-line-info" shape="rect">4.2.3.5.&nbsp;<samp class="ph codeph">--generate-line-info</samp> (<samp class="ph codeph">-lineinfo</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Generate line-number information for device code.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-optimize"><a name="options-for-altering-compiler-linker-behavior-optimize" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-optimize" name="options-for-altering-compiler-linker-behavior-optimize" shape="rect">4.2.3.6.&nbsp;<samp class="ph codeph">--optimize <em class="ph i">level</em></samp> (<samp class="ph codeph">-O</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify optimization level for host code.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-ftemplate-backtrace-limit"><a name="options-for-altering-compiler-linker-behavior-ftemplate-backtrace-limit" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-ftemplate-backtrace-limit" name="options-for-altering-compiler-linker-behavior-ftemplate-backtrace-limit" shape="rect">4.2.3.7.&nbsp;<samp class="ph codeph">--ftemplate-backtrace-limit <em class="ph i">limit</em></samp> (<samp class="ph codeph">-ftemplate-backtrace-limit</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Set the maximum number of template instantiation notes for a
                                       single warning or error to <em class="ph i">limit</em>.
                                       </em></p>
                                 <p class="p">
                                    A value of <samp class="ph codeph">0</samp> is allowed,
                                    and indicates that no limit should be enforced.
                                    This value is also passed to the host compiler if it provides
                                    an equivalent flag.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-ftemplate-depth"><a name="options-for-altering-compiler-linker-behavior-ftemplate-depth" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-ftemplate-depth" name="options-for-altering-compiler-linker-behavior-ftemplate-depth" shape="rect">4.2.3.8.&nbsp;<samp class="ph codeph">--ftemplate-depth <em class="ph i">limit</em></samp> (<samp class="ph codeph">-ftemplate-depth</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Set the maximum instantiation depth for template classes to 
                                       <em class="ph i">limit</em>.
                                       </em></p>
                                 <p class="p">
                                    This value is also passed to the host compiler if 
                                    it provides an equivalent flag.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-shared"><a name="options-for-altering-compiler-linker-behavior-shared" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-shared" name="options-for-altering-compiler-linker-behavior-shared" shape="rect">4.2.3.9.&nbsp;<samp class="ph codeph">--shared</samp> (<samp class="ph codeph">-shared</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Generate a shared library during linking.
                                       </em></p>
                                 <p class="p">
                                    Use option
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-passing-specific-phase-options" shape="rect">--linker-options</a></samp>
                                    when other linker options are required for more control.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-x"><a name="options-for-altering-compiler-linker-behavior-x" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-x" name="options-for-altering-compiler-linker-behavior-x" shape="rect">4.2.3.10.&nbsp;<samp class="ph codeph">--x</samp> {<samp class="ph codeph">c</samp>|<samp class="ph codeph">c++</samp>|<samp class="ph codeph">cu</samp>} (<samp class="ph codeph">-x</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Explicitly specify the language for the input files, rather
                                       than letting the compiler choose a default based on the file
                                       name suffix.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">c</samp></li>
                                    <li class="li"><samp class="ph codeph">c++</samp></li>
                                    <li class="li"><samp class="ph codeph">cu</samp></li>
                                 </ul>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    The language of the source code is determined based
                                    on the file name suffix.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-std"><a name="options-for-altering-compiler-linker-behavior-std" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-std" name="options-for-altering-compiler-linker-behavior-std" shape="rect">4.2.3.11.&nbsp;<samp class="ph codeph">--std</samp> {<samp class="ph codeph">c++03</samp>|<samp class="ph codeph">c++11</samp>|<samp class="ph codeph">c++14</samp>} (<samp class="ph codeph">-std</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Select a particular C++ dialect.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">c++03</samp></li>
                                    <li class="li"><samp class="ph codeph">c++11</samp></li>
                                    <li class="li"><samp class="ph codeph">c++14</samp></li>
                                 </ul>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    The default C++ dialect depends on the host compiler.
                                    <samp class="ph codeph">nvcc</samp> matches the default C++ dialect that the host
                                    compiler uses.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-no-host-device-initializer-list"><a name="options-for-altering-compiler-linker-behavior-no-host-device-initializer-list" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-no-host-device-initializer-list" name="options-for-altering-compiler-linker-behavior-no-host-device-initializer-list" shape="rect">4.2.3.12.&nbsp;<samp class="ph codeph">--no-host-device-initializer-list</samp> (<samp class="ph codeph">-nohdinitlist</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Do not consider member functions of
                                       <samp class="ph codeph">std::initializer_list</samp> as
                                       <samp class="ph codeph">__host__</samp><samp class="ph codeph">__device__</samp>
                                       functions implicitly.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-expt-relaxed-constexpr"><a name="options-for-altering-compiler-linker-behavior-expt-relaxed-constexpr" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-expt-relaxed-constexpr" name="options-for-altering-compiler-linker-behavior-expt-relaxed-constexpr" shape="rect">4.2.3.13.&nbsp;<samp class="ph codeph">--expt-relaxed-constexpr</samp> (<samp class="ph codeph">-expt-relaxed-constexpr</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i"><strong class="ph b">Experimental flag</strong>:
                                       Allow host code to invoke
                                       <samp class="ph codeph">__device__</samp><samp class="ph codeph">constexpr</samp>
                                       functions, and device code to invoke
                                       <samp class="ph codeph">__host__</samp><samp class="ph codeph">constexpr</samp>
                                       functions.
                                       </em></p>
                                 <p class="p">
                                    Note that the behavior of this flag may change in future compiler
                                    releases.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-expt-extended-lambda"><a name="options-for-altering-compiler-linker-behavior-expt-extended-lambda" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-expt-extended-lambda" name="options-for-altering-compiler-linker-behavior-expt-extended-lambda" shape="rect">4.2.3.14.&nbsp;<samp class="ph codeph">--expt-extended-lambda</samp> (<samp class="ph codeph">-expt-extended-lambda</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i"><strong class="ph b">Experimental flag</strong>:
                                       Allow <samp class="ph codeph">__host__</samp>, <samp class="ph codeph">__device__</samp>
                                       annotations in lambda declarations.
                                       </em></p>
                                 <p class="p">
                                    Note that the behavior of this flag may change in future compiler
                                    releases.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-altering-compiler-linker-behavior-machine"><a name="options-for-altering-compiler-linker-behavior-machine" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-altering-compiler-linker-behavior-machine" name="options-for-altering-compiler-linker-behavior-machine" shape="rect">4.2.3.15.&nbsp;<samp class="ph codeph">--machine</samp> {<samp class="ph codeph">32</samp>|<samp class="ph codeph">64</samp>} (<samp class="ph codeph">-m</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify 32-bit vs. 64-bit architecture.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">32</samp></li>
                                    <li class="li"><samp class="ph codeph">64</samp></li>
                                 </ul>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    This option is set based on the host platform on which
                                    <samp class="ph codeph">nvcc</samp> is executed.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                     </div>
                     <div class="topic reference nested2" id="options-for-passing-specific-phase-options"><a name="options-for-passing-specific-phase-options" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#options-for-passing-specific-phase-options" name="options-for-passing-specific-phase-options" shape="rect">4.2.4.&nbsp;Options for Passing Specific Phase Options</a></h3>
                        <div class="body refbody">
                           <div class="section">
                              <p class="p">
                                 These allow for passing specific options directly to the internal
                                 compilation tools that <samp class="ph codeph">nvcc</samp> encapsulates, without
                                 burdening <samp class="ph codeph">nvcc</samp> with too-detailed knowledge on these
                                 tools.
                                 A table of useful sub-tool options can be found at the end of this
                                 chapter.
                                 
                              </p>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-passing-specific-phase-options-compiler-options"><a name="options-for-passing-specific-phase-options-compiler-options" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-passing-specific-phase-options-compiler-options" name="options-for-passing-specific-phase-options-compiler-options" shape="rect">4.2.4.1.&nbsp;<samp class="ph codeph">--compiler-options <em class="ph i">options,...</em></samp> (<samp class="ph codeph">-Xcompiler</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify options directly to the compiler/preprocessor.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-passing-specific-phase-options-linker-options"><a name="options-for-passing-specific-phase-options-linker-options" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-passing-specific-phase-options-linker-options" name="options-for-passing-specific-phase-options-linker-options" shape="rect">4.2.4.2.&nbsp;<samp class="ph codeph">--linker-options <em class="ph i">options,...</em></samp> (<samp class="ph codeph">-Xlinker</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify options directly to the host linker.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-passing-specific-phase-options-archive-options"><a name="options-for-passing-specific-phase-options-archive-options" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-passing-specific-phase-options-archive-options" name="options-for-passing-specific-phase-options-archive-options" shape="rect">4.2.4.3.&nbsp;<samp class="ph codeph">--archive-options <em class="ph i">options,...</em></samp> (<samp class="ph codeph">-Xarchive</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify options directly to the library manager.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-passing-specific-phase-options-ptxas-options"><a name="options-for-passing-specific-phase-options-ptxas-options" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-passing-specific-phase-options-ptxas-options" name="options-for-passing-specific-phase-options-ptxas-options" shape="rect">4.2.4.4.&nbsp;<samp class="ph codeph">--ptxas-options <em class="ph i">options,...</em></samp> (<samp class="ph codeph">-Xptxas</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify options directly to <samp class="ph codeph">ptxas</samp>,
                                       the PTX optimizing assembler.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-passing-specific-phase-options-nvlink-options"><a name="options-for-passing-specific-phase-options-nvlink-options" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-passing-specific-phase-options-nvlink-options" name="options-for-passing-specific-phase-options-nvlink-options" shape="rect">4.2.4.5.&nbsp;<samp class="ph codeph">--nvlink-options <em class="ph i">options,...</em></samp> (<samp class="ph codeph">-Xnvlink</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify options directly to <samp class="ph codeph">nvlink</samp>,
                                       the device linker.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                     </div>
                     <div class="topic reference nested2" id="options-for-guiding-compiler-driver"><a name="options-for-guiding-compiler-driver" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver" name="options-for-guiding-compiler-driver" shape="rect">4.2.5.&nbsp;Options for Guiding the Compiler Driver</a></h3>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-dont-use-profile"><a name="options-for-guiding-compiler-driver-dont-use-profile" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-dont-use-profile" name="options-for-guiding-compiler-driver-dont-use-profile" shape="rect">4.2.5.1.&nbsp;<samp class="ph codeph">--dont-use-profile</samp> (<samp class="ph codeph">-noprof</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Do not use configurations from the <samp class="ph codeph">nvcc.profile</samp>
                                       file for compilation.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-dryrun"><a name="options-for-guiding-compiler-driver-dryrun" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-dryrun" name="options-for-guiding-compiler-driver-dryrun" shape="rect">4.2.5.2.&nbsp;<samp class="ph codeph">--dryrun</samp> (<samp class="ph codeph">-dryrun</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       List the compilation sub-commands without executing them.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-verbose"><a name="options-for-guiding-compiler-driver-verbose" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-verbose" name="options-for-guiding-compiler-driver-verbose" shape="rect">4.2.5.3.&nbsp;<samp class="ph codeph">--verbose</samp> (<samp class="ph codeph">-v</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       List the compilation sub-commands while executing them.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-keep"><a name="options-for-guiding-compiler-driver-keep" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-keep" name="options-for-guiding-compiler-driver-keep" shape="rect">4.2.5.4.&nbsp;<samp class="ph codeph">--keep</samp> (<samp class="ph codeph">-keep</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Keep all intermediate files that are generated during internal
                                       compilation steps.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-keep-dir"><a name="options-for-guiding-compiler-driver-keep-dir" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-keep-dir" name="options-for-guiding-compiler-driver-keep-dir" shape="rect">4.2.5.5.&nbsp;<samp class="ph codeph">--keep-dir <em class="ph i">directory</em></samp> (<samp class="ph codeph">-keep-dir</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Keep all intermediate files that are generated during internal
                                       compilation steps in this directory.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-save-temps"><a name="options-for-guiding-compiler-driver-save-temps" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-save-temps" name="options-for-guiding-compiler-driver-save-temps" shape="rect">4.2.5.6.&nbsp;<samp class="ph codeph">--save-temps</samp> (<samp class="ph codeph">-save-temps</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       This option is an alias of
                                       <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--keep</a></samp>.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-clean-targets"><a name="options-for-guiding-compiler-driver-clean-targets" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-clean-targets" name="options-for-guiding-compiler-driver-clean-targets" shape="rect">4.2.5.7.&nbsp;<samp class="ph codeph">--clean-targets</samp> (<samp class="ph codeph">-clean</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Delete all the non-temporary files that the same
                                       <samp class="ph codeph">nvcc</samp> command would generate without this option.
                                       </em></p>
                                 <p class="p">
                                    This option reverses the behavior of <samp class="ph codeph">nvcc</samp>.
                                    When specified, none of the compilation phases will be executed.
                                    Instead, all of the non-temporary files that
                                    <samp class="ph codeph">nvcc</samp> would otherwise create will be deleted.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-run-args"><a name="options-for-guiding-compiler-driver-run-args" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-run-args" name="options-for-guiding-compiler-driver-run-args" shape="rect">4.2.5.8.&nbsp;<samp class="ph codeph">--run-args <em class="ph i">arguments,...</em></samp> (<samp class="ph codeph">-run-args</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify command line arguments for the executable
                                       when used in conjunction with 
                                       <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--run</a></samp>.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-input-drive-prefix"><a name="options-for-guiding-compiler-driver-input-drive-prefix" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-input-drive-prefix" name="options-for-guiding-compiler-driver-input-drive-prefix" shape="rect">4.2.5.9.&nbsp;<samp class="ph codeph">--input-drive-prefix <em class="ph i">prefix</em></samp> (<samp class="ph codeph">-idp</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the input drive prefix.
                                       </em></p>
                                 <p class="p">
                                    On Windows, all command line arguments that refer to file names
                                    must be converted to the Windows native format before they are 
                                    passed to pure Windows executables.
                                    This option specifies how the current development environment
                                    represents absolute paths.
                                    Use <samp class="ph codeph">/cygwin/</samp> as <samp class="ph codeph"><em class="ph i">prefix</em></samp>
                                    for Cygwin build environments and <samp class="ph codeph">/</samp> as
                                    <samp class="ph codeph"><em class="ph i">prefix</em></samp> for MinGW.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-dependency-drive-prefix"><a name="options-for-guiding-compiler-driver-dependency-drive-prefix" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-dependency-drive-prefix" name="options-for-guiding-compiler-driver-dependency-drive-prefix" shape="rect">4.2.5.10.&nbsp;<samp class="ph codeph">--dependency-drive-prefix <em class="ph i">prefix</em></samp> (<samp class="ph codeph">-ddp</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the dependency drive prefix.
                                       </em></p>
                                 <p class="p">
                                    On Windows, when generating dependency files (see
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--generate-dependencies</a></samp>),
                                    all file names must be converted appropriately for the instance
                                    of <samp class="ph codeph">make</samp> that is used.
                                    Some instances of <samp class="ph codeph">make</samp> have trouble with the
                                    colon in absolute paths in the native Windows format, which
                                    depends on the environment in which the <samp class="ph codeph">make</samp>
                                    instance has been compiled.
                                    Use <samp class="ph codeph">/cygwin/</samp> as <samp class="ph codeph"><em class="ph i">prefix</em></samp>
                                    for a Cygwin <samp class="ph codeph">make</samp>, and <samp class="ph codeph">/</samp> as
                                    <samp class="ph codeph"><em class="ph i">prefix</em></samp> for MinGW.
                                    Or leave these file names in the native Windows format by
                                    specifying nothing.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-drive-prefix"><a name="options-for-guiding-compiler-driver-drive-prefix" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-drive-prefix" name="options-for-guiding-compiler-driver-drive-prefix" shape="rect">4.2.5.11.&nbsp;<samp class="ph codeph">--drive-prefix <em class="ph i">prefix</em></samp> (<samp class="ph codeph">-dp</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the drive prefix.
                                       </em></p>
                                 <p class="p">
                                    This option specifies <samp class="ph codeph"><em class="ph i">prefix</em></samp> as both
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--input-drive-prefix</a></samp>
                                    and
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--dependency-drive-prefix</a></samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-dependency-target-name"><a name="options-for-guiding-compiler-driver-dependency-target-name" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-dependency-target-name" name="options-for-guiding-compiler-driver-dependency-target-name" shape="rect">4.2.5.12.&nbsp;<samp class="ph codeph">--dependency-target-name <em class="ph i">target</em></samp> (<samp class="ph codeph">-MT</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the target name of the generated rule when generating a
                                       dependency file (see
                                       <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--generate-dependencies</a></samp>).
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-no-align-double"><a name="options-for-guiding-compiler-driver-no-align-double" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-no-align-double" name="options-for-guiding-compiler-driver-no-align-double" shape="rect">4.2.5.13.&nbsp;<samp class="ph codeph">--no-align-double</samp></a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify that <samp class="ph codeph">-malign-double</samp> should not be
                                       passed as a compiler argument on 32-bit platforms.
                                       </em></p>
                                 <p class="p"><strong class="ph b">WARNING:</strong> this makes the ABI incompatible with the CUDA's
                                    kernel ABI for certain 64-bit types.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-guiding-compiler-driver-no-device-link"><a name="options-for-guiding-compiler-driver-no-device-link" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-guiding-compiler-driver-no-device-link" name="options-for-guiding-compiler-driver-no-device-link" shape="rect">4.2.5.14.&nbsp;<samp class="ph codeph">--no-device-link</samp> (<samp class="ph codeph">-nodlink</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Skip the device link step when linking object files.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                     </div>
                     <div class="topic reference nested2" id="options-for-steering-cuda-compilation"><a name="options-for-steering-cuda-compilation" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-cuda-compilation" name="options-for-steering-cuda-compilation" shape="rect">4.2.6.&nbsp;Options for Steering CUDA Compilation</a></h3>
                        <div class="topic reference nested3" id="options-for-steering-cuda-compilation-default-stream"><a name="options-for-steering-cuda-compilation-default-stream" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-cuda-compilation-default-stream" name="options-for-steering-cuda-compilation-default-stream" shape="rect">4.2.6.1.&nbsp;<samp class="ph codeph">--default-stream</samp> {<samp class="ph codeph">legacy</samp>|<samp class="ph codeph">null</samp>|<samp class="ph codeph">per-thread</samp>} (<samp class="ph codeph">-default-stream</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the stream that CUDA commands from the compiled
                                       program will be sent to by default.
                                       </em></p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <dl class="dl">
                                    <dt class="dt dlterm"><samp class="ph codeph">legacy</samp></dt>
                                    <dd class="dd">
                                       The CUDA legacy stream (per context, implicitly
                                       synchronizes with other streams)
                                       
                                    </dd>
                                    <dt class="dt dlterm"><samp class="ph codeph">per-thread</samp></dt>
                                    <dd class="dd">
                                       Normal CUDA stream (per thread, does not implicitly
                                       synchronize with other streams)
                                       
                                    </dd>
                                    <dt class="dt dlterm"><samp class="ph codeph">null</samp></dt>
                                    <dd class="dd">
                                       Deprecated alias for <samp class="ph codeph">legacy</samp></dd>
                                 </dl>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p"><samp class="ph codeph">legacy</samp> is used as the default stream.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                     </div>
                     <div class="topic reference nested2" id="options-for-steering-gpu-code-generation"><a name="options-for-steering-gpu-code-generation" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation" name="options-for-steering-gpu-code-generation" shape="rect">4.2.7.&nbsp;Options for Steering GPU Code Generation</a></h3>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-gpu-architecture"><a name="options-for-steering-gpu-code-generation-gpu-architecture" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-gpu-architecture" name="options-for-steering-gpu-code-generation-gpu-architecture" shape="rect">4.2.7.1.&nbsp;<samp class="ph codeph">--gpu-architecture <em class="ph i">arch</em></samp> (<samp class="ph codeph">-arch</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the name of the class of NVIDIA <em class="ph i">virtual</em> GPU
                                       architecture for which the CUDA input files must be compiled.
                                       </em></p>
                                 <p class="p">
                                    With the exception as described for the shorthand below, the
                                    architecture specified with this option must be a
                                    <em class="ph i">virtual</em> architecture (such as compute_50).
                                    Normally, this option alone does not trigger assembly of the
                                    generated PTX for a <em class="ph i">real</em> architecture (that is the role
                                    of <samp class="ph codeph">nvcc</samp> option
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>,
                                    see below); rather, its purpose is to control preprocessing
                                    and compilation of the input to PTX.
                                    
                                 </p>
                                 <p class="p">
                                    For convenience, in case of simple <samp class="ph codeph">nvcc</samp>
                                    compilations, the following shorthand is supported.
                                    If no value for option
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                                    is specified, then the value of this option defaults to the
                                    value of <samp class="ph codeph">--gpu-architecture</samp>.
                                    In this situation, as only exception to the description above,
                                    the value specified for <samp class="ph codeph">--gpu-architecture</samp>
                                    may be a <em class="ph i">real</em> architecture (such as a sm_50), in which
                                    case <samp class="ph codeph">nvcc</samp> uses the specified <em class="ph i">real</em>
                                    architecture and its closest <em class="ph i">virtual</em> architecture as
                                    effective architecture values.
                                    For example, <samp class="ph codeph">nvcc --gpu-architecture=sm_50</samp> is
                                    equivalent to
                                    <samp class="ph codeph">
                                       nvcc --gpu-architecture=compute_50
                                       <a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a>=sm_50,compute_50</samp>.
                                    
                                 </p>
                                 <p class="p">
                                    See <a class="xref" href="index.html#virtual-architecture-feature-list" shape="rect">Virtual Architecture Feature List</a> for the
                                    list of supported <em class="ph i">virtual</em> architectures and
                                    <a class="xref" href="index.html#gpu-feature-list" shape="rect">GPU Feature List</a> for the list of supported
                                    <em class="ph i">real</em> architectures.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p"><samp class="ph codeph">sm_30</samp> is used as the default value;
                                    PTX is generated for <samp class="ph codeph">compute_30</samp> then
                                    assembled and optimized for <samp class="ph codeph">sm_30</samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-gpu-code"><a name="options-for-steering-gpu-code-generation-gpu-code" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-gpu-code" name="options-for-steering-gpu-code-generation-gpu-code" shape="rect">4.2.7.2.&nbsp;<samp class="ph codeph">--gpu-code <em class="ph i">code,...</em></samp> (<samp class="ph codeph">-code</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the name of the NVIDIA GPU to assemble and optimize
                                       PTX for.
                                       </em></p>
                                 <p class="p"><samp class="ph codeph">nvcc</samp> embeds a compiled code image in the
                                    resulting executable for each specified <em class="ph i">code</em>
                                    architecture, which is a true binary load image for each
                                    <em class="ph i">real</em> architecture (such as sm_50), and PTX code for the
                                    <em class="ph i">virtual</em> architecture (such as compute_50).
                                    
                                 </p>
                                 <p class="p">
                                    During runtime, such embedded PTX code is dynamically compiled
                                    by the CUDA runtime system if no binary load image is found
                                    for the <em class="ph i">current</em> GPU.
                                    
                                 </p>
                                 <p class="p">
                                    Architectures specified for options
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp> and
                                    <samp class="ph codeph">--gpu-code</samp> may be <em class="ph i">virtual</em> as well as
                                    <em class="ph i">real</em>, but the <samp class="ph codeph"><em class="ph i">code</em></samp>
                                    architectures must be compatible with the
                                    <samp class="ph codeph"><em class="ph i">arch</em></samp> architecture.
                                    When the <samp class="ph codeph">--gpu-code</samp> option is used, the value
                                    for the
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>
                                    option must be a <em class="ph i">virtual</em> PTX architecture.
                                    
                                 </p>
                                 <p class="p">
                                    For instance,
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a>=compute_50</samp>
                                    is not compatible with <samp class="ph codeph">--gpu-code=sm_30</samp>,
                                    because the earlier compilation stages will assume the
                                    availability of <samp class="ph codeph">compute_50</samp> features that are
                                    not present on <samp class="ph codeph">sm_30</samp>.
                                    
                                 </p>
                                 <p class="p">
                                    See <a class="xref" href="index.html#virtual-architecture-feature-list" shape="rect">Virtual Architecture Feature List</a> for the
                                    list of supported <em class="ph i">virtual</em> architectures and
                                    <a class="xref" href="index.html#gpu-feature-list" shape="rect">GPU Feature List</a> for the list of supported
                                    <em class="ph i">real</em> architectures.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-generate-code"><a name="options-for-steering-gpu-code-generation-generate-code" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-generate-code" name="options-for-steering-gpu-code-generation-generate-code" shape="rect">4.2.7.3.&nbsp;<samp class="ph codeph">--generate-code <em class="ph i">specification</em></samp> (<samp class="ph codeph">-gencode</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       This option provides a generalization of the
                                       <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a>=<em class="ph i">arch </em><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a>=<em class="ph i">code,...</em></samp>
                                       option combination for specifying <samp class="ph codeph">nvcc</samp>
                                       behavior with respect to code generation.
                                       </em></p>
                                 <p class="p">
                                    Where use of the previous options generates code for different
                                    <em class="ph i">real</em> architectures with the PTX for the same
                                    <em class="ph i">virtual</em> architecture, option
                                    <samp class="ph codeph">--generate-code</samp> allows multiple PTX
                                    generations for different <em class="ph i">virtual</em> architectures.
                                    In fact,
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a>=<em class="ph i">arch </em><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a>=<em class="ph i">code,...</em></samp>
                                    is equivalent to
                                    <samp class="ph codeph">
                                       --generate-code=arch=<em class="ph i">arch</em>,code=<em class="ph i">code,...</em></samp>.
                                    
                                 </p>
                                 <p class="p"><samp class="ph codeph">--generate-code</samp> options may be repeated for
                                    different virtual architectures.
                                    
                                 </p>
                                 <p class="p">
                                    See <a class="xref" href="index.html#virtual-architecture-feature-list" shape="rect">Virtual Architecture Feature List</a> for the
                                    list of supported <em class="ph i">virtual</em> architectures and
                                    <a class="xref" href="index.html#gpu-feature-list" shape="rect">GPU Feature List</a> for the list of supported
                                    <em class="ph i">real</em> architectures.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-relocatable-device-code"><a name="options-for-steering-gpu-code-generation-relocatable-device-code" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-relocatable-device-code" name="options-for-steering-gpu-code-generation-relocatable-device-code" shape="rect">4.2.7.4.&nbsp;<samp class="ph codeph">--relocatable-device-code</samp> {<samp class="ph codeph">true</samp>|<samp class="ph codeph">false</samp>} (<samp class="ph codeph">-rdc</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Enable or disable the generation of relocatable device code.
                                       </em></p>
                                 <p class="p">
                                    If disabled, executable device code is generated.
                                    Relocatable device code must be linked before it can be executed.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">true</samp></li>
                                    <li class="li"><samp class="ph codeph">false</samp></li>
                                 </ul>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    The generation of relocatable device code is disabled.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-entries"><a name="options-for-steering-gpu-code-generation-entries" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-entries" name="options-for-steering-gpu-code-generation-entries" shape="rect">4.2.7.5.&nbsp;<samp class="ph codeph">--entries <em class="ph i">entry,...</em></samp> (<samp class="ph codeph">-e</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the global entry functions for which code must be
                                       generated.
                                       </em></p>
                                 <p class="p">
                                    PTX generated for all entry functions, but only the selected entry
                                    functions are assembled.
                                    Entry function names for this option must be specified in the mangled
                                    name.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p"><samp class="ph codeph">nvcc</samp> generates code for all entry functions.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-maxrregcount"><a name="options-for-steering-gpu-code-generation-maxrregcount" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-maxrregcount" name="options-for-steering-gpu-code-generation-maxrregcount" shape="rect">4.2.7.6.&nbsp;<samp class="ph codeph">--maxrregcount <em class="ph i">amount</em></samp> (<samp class="ph codeph">-maxrregcount</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Specify the maximum amount of registers that GPU functions can
                                       use.
                                       </em></p>
                                 <p class="p">
                                    Until a function-specific limit, a higher value will generally
                                    increase the performance of individual GPU threads that
                                    execute this function.
                                    However, because thread registers are allocated from a global
                                    register pool on each GPU, a higher value of this option will
                                    also reduce the maximum thread block size, thereby reducing
                                    the amount of thread parallelism.
                                    Hence, a good <samp class="ph codeph">maxrregcount</samp> value is the
                                    result of a trade-off.
                                    
                                 </p>
                                 <p class="p">
                                    Value less than the minimum registers required by ABI will be
                                    bumped up by the compiler to ABI minimum limit.
                                    
                                 </p>
                                 <p class="p">
                                    User program may not be able to make use of all registers as
                                    some registers are reserved by compiler.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    No maximum is assumed.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-use_fast_math"><a name="options-for-steering-gpu-code-generation-use_fast_math" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-use_fast_math" name="options-for-steering-gpu-code-generation-use_fast_math" shape="rect">4.2.7.7.&nbsp;<samp class="ph codeph">--use_fast_math</samp> (<samp class="ph codeph">-use_fast_math</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Make use of fast math library.
                                       </em></p>
                                 <p class="p"><samp class="ph codeph">--use_fast_math</samp> implies
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--ftz</a>=true
                                       <a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--prec-div</a>=false
                                       <a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--prec-sqrt</a>=false
                                       <a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--fmad</a>=true</samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-ftz"><a name="options-for-steering-gpu-code-generation-ftz" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-ftz" name="options-for-steering-gpu-code-generation-ftz" shape="rect">4.2.7.8.&nbsp;<samp class="ph codeph">--ftz</samp> {<samp class="ph codeph">true</samp>|<samp class="ph codeph">false</samp>} (<samp class="ph codeph">-ftz</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Control single-precision denormals support.
                                       </em></p>
                                 <p class="p"><samp class="ph codeph">--ftz=true</samp> flushes denormal values to zero
                                    and <samp class="ph codeph">--ftz=false</samp> preserves denormal values.
                                    
                                 </p>
                                 <p class="p"><samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--use_fast_math</a></samp>
                                    implies <samp class="ph codeph">--ftz=true</samp>.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">true</samp></li>
                                    <li class="li"><samp class="ph codeph">false</samp></li>
                                 </ul>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    This option is set to <samp class="ph codeph">false</samp> and
                                    <samp class="ph codeph">nvcc</samp> preserves denormal values.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-prec-div"><a name="options-for-steering-gpu-code-generation-prec-div" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-prec-div" name="options-for-steering-gpu-code-generation-prec-div" shape="rect">4.2.7.9.&nbsp;<samp class="ph codeph">--prec-div</samp> {<samp class="ph codeph">true</samp>|<samp class="ph codeph">false</samp>} (<samp class="ph codeph">-prec-div</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       This option controls single-precision floating-point division
                                       and reciprocals.
                                       </em></p>
                                 <p class="p"><samp class="ph codeph">--prec-div=true</samp> enables the IEEE
                                    round-to-nearest mode and <samp class="ph codeph">--prec-div=false</samp>
                                    enables the fast approximation mode.
                                    
                                 </p>
                                 <p class="p"><samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--use_fast_math</a></samp>
                                    implies <samp class="ph codeph">--prec-div=false</samp>.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">true</samp></li>
                                    <li class="li"><samp class="ph codeph">false</samp></li>
                                 </ul>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    This option is set to <samp class="ph codeph">true</samp> and
                                    <samp class="ph codeph">nvcc</samp> enables the IEEE round-to-nearest mode.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-prec-sqrt"><a name="options-for-steering-gpu-code-generation-prec-sqrt" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-prec-sqrt" name="options-for-steering-gpu-code-generation-prec-sqrt" shape="rect">4.2.7.10.&nbsp;<samp class="ph codeph">--prec-sqrt</samp> {<samp class="ph codeph">true</samp>|<samp class="ph codeph">false</samp>} (<samp class="ph codeph">-prec-sqrt</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       This option controls single-precision floating-point square
                                       root.
                                       </em></p>
                                 <p class="p"><samp class="ph codeph">--prec-sqrt=true</samp> enables the IEEE
                                    round-to-nearest mode and <samp class="ph codeph">--prec-sqrt=false</samp>
                                    enables the fast approximation mode.
                                    
                                 </p>
                                 <p class="p"><samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--use_fast_math</a></samp>
                                    implies <samp class="ph codeph">--prec-sqrt=false</samp>.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">true</samp></li>
                                    <li class="li"><samp class="ph codeph">false</samp></li>
                                 </ul>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    This option is set to <samp class="ph codeph">true</samp> and
                                    <samp class="ph codeph">nvcc</samp> enables the IEEE round-to-nearest mode.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="options-for-steering-gpu-code-generation-fmad"><a name="options-for-steering-gpu-code-generation-fmad" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#options-for-steering-gpu-code-generation-fmad" name="options-for-steering-gpu-code-generation-fmad" shape="rect">4.2.7.11.&nbsp;<samp class="ph codeph">--fmad</samp> {<samp class="ph codeph">true</samp>|<samp class="ph codeph">false</samp>} (<samp class="ph codeph">-fmad</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       This option enables (disables) the contraction of
                                       floating-point multiplies and adds/subtracts into
                                       floating-point multiply-add operations (FMAD, FFMA, or DFMA).
                                       </em></p>
                                 <p class="p"><samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--use_fast_math</a></samp>
                                    implies <samp class="ph codeph">--fmad=true</samp>.
                                    
                                 </p>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Allowed Values</h5>
                                 <ul class="ul">
                                    <li class="li"><samp class="ph codeph">true</samp></li>
                                    <li class="li"><samp class="ph codeph">false</samp></li>
                                 </ul>
                              </div>
                              <div class="section">
                                 <h5 class="title sectiontitle">Default</h5>
                                 <p class="p">
                                    This option is set to <samp class="ph codeph">true</samp> and
                                    <samp class="ph codeph">nvcc</samp> enables the contraction of
                                    floating-point multiplies and adds/subtracts into
                                    floating-point multiply-add operations (FMAD, FFMA, or DFMA).
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                     </div>
                     <div class="topic reference nested2" id="generic-tool-options"><a name="generic-tool-options" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options" name="generic-tool-options" shape="rect">4.2.8.&nbsp;Generic Tool Options</a></h3>
                        <div class="topic reference nested3" id="generic-tool-options-disable-warnings"><a name="generic-tool-options-disable-warnings" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-disable-warnings" name="generic-tool-options-disable-warnings" shape="rect">4.2.8.1.&nbsp;<samp class="ph codeph">--disable-warnings</samp> (<samp class="ph codeph">-w</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Inhibit all warning messages.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-source-in-ptx"><a name="generic-tool-options-source-in-ptx" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-source-in-ptx" name="generic-tool-options-source-in-ptx" shape="rect">4.2.8.2.&nbsp;<samp class="ph codeph">--source-in-ptx</samp> (<samp class="ph codeph">-src-in-ptx</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Interleave source in PTX.
                                       </em></p>
                                 <p class="p">May only be used in conjunction with
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-altering-compiler-linker-behavior" shape="rect">--device-debug</a></samp>
                                    or
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-altering-compiler-linker-behavior" shape="rect">--generate-line-info</a></samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-restrict"><a name="generic-tool-options-restrict" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-restrict" name="generic-tool-options-restrict" shape="rect">4.2.8.3.&nbsp;<samp class="ph codeph">--restrict</samp> (<samp class="ph codeph">-restrict</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Assert that all kernel pointer parameters are
                                       restrict pointers.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-Wno-deprecated-gpu-targets"><a name="generic-tool-options-Wno-deprecated-gpu-targets" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-Wno-deprecated-gpu-targets" name="generic-tool-options-Wno-deprecated-gpu-targets" shape="rect">4.2.8.4.&nbsp;<samp class="ph codeph">--Wno-deprecated-gpu-targets</samp> (<samp class="ph codeph">-Wno-deprecated-gpu-targets</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Suppress warnings about deprecated GPU target architectures.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-Wno-deprecated-declarations"><a name="generic-tool-options-Wno-deprecated-declarations" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-Wno-deprecated-declarations" name="generic-tool-options-Wno-deprecated-declarations" shape="rect">4.2.8.5.&nbsp;<samp class="ph codeph">--Wno-deprecated-declarations</samp> (<samp class="ph codeph">-Wno-deprecated-declarations</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Suppress warning on use of a deprecated entity.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-Wreorder"><a name="generic-tool-options-Wreorder" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-Wreorder" name="generic-tool-options-Wreorder" shape="rect">4.2.8.6.&nbsp;<samp class="ph codeph">--Wreorder</samp> (<samp class="ph codeph">-Wreorder</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Generate warnings when member initializers are reordered.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-Werror"><a name="generic-tool-options-Werror" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-Werror" name="generic-tool-options-Werror" shape="rect">4.2.8.7.&nbsp;<samp class="ph codeph">--Werror <em class="ph i">kind,...</em></samp> (<samp class="ph codeph">-Werror</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Make warnings of the specified kinds into errors.
                                       </em></p>
                                 <div class="p">
                                    The following is the list of warning kinds accepted by this
                                    option:
                                    
                                    <dl class="dl">
                                       <dt class="dt dlterm"><samp class="ph codeph">cross-execution-space-call</samp></dt>
                                       <dd class="dd">
                                          Be more strict about unsupported cross execution space
                                          calls.
                                          The compiler will generate an error instead of a
                                          warning for a call from a <samp class="ph codeph">__host__</samp><samp class="ph codeph">__device__</samp> to a
                                          <samp class="ph codeph">__host__</samp> function.
                                          
                                       </dd>
                                       <dt class="dt dlterm"><samp class="ph codeph">reorder</samp></dt>
                                       <dd class="dd">
                                          Generate errors when member initializers are reordered.
                                          
                                       </dd>
                                       <dt class="dt dlterm"><samp class="ph codeph">deprecated-declarations</samp></dt>
                                       <dd class="dd">
                                          Generate error on use of a deprecated entity.
                                          
                                       </dd>
                                    </dl>
                                 </div>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-resource-usage"><a name="generic-tool-options-resource-usage" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-resource-usage" name="generic-tool-options-resource-usage" shape="rect">4.2.8.8.&nbsp;<samp class="ph codeph">--resource-usage</samp> (<samp class="ph codeph">-res-usage</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Show resource usage such as registers and memory of the GPU
                                       code.
                                       </em></p>
                                 <p class="p">
                                    This option implies
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-passing-specific-phase-options" shape="rect">--nvlink-options</a>=<a class="xref" href="index.html#nvlink-options" shape="rect">--verbose</a></samp>
                                    when
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--relocatable-device-code</a>=true</samp>
                                    is set.
                                    Otherwise, it implies
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-passing-specific-phase-options" shape="rect">--ptxas-options</a>=<a class="xref" href="index.html#ptxas-options" shape="rect">--verbose</a></samp>.
                                    
                                 </p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-help"><a name="generic-tool-options-help" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-help" name="generic-tool-options-help" shape="rect">4.2.8.9.&nbsp;<samp class="ph codeph">--help</samp> (<samp class="ph codeph">-h</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Print help information on this tool.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-version"><a name="generic-tool-options-version" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-version" name="generic-tool-options-version" shape="rect">4.2.8.10.&nbsp;<samp class="ph codeph">--version</samp> (<samp class="ph codeph">-V</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Print version information on this tool.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="generic-tool-options-options-file"><a name="generic-tool-options-options-file" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#generic-tool-options-options-file" name="generic-tool-options-options-file" shape="rect">4.2.8.11.&nbsp;<samp class="ph codeph">--options-file <em class="ph i">file,...</em></samp> (<samp class="ph codeph">-optf</samp>)</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p"><em class="ph i">
                                       Include command line options from specified file.
                                       </em></p>
                              </div>
                           </div>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="phase-options"><a name="phase-options" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#phase-options" name="phase-options" shape="rect">4.2.9.&nbsp;Phase Options</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              The following sections lists some useful options to lower level
                              compilation tools.
                              
                           </p>
                        </div>
                        <div class="topic reference nested3" id="ptxas-options"><a name="ptxas-options" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options" name="ptxas-options" shape="rect">4.2.9.1.&nbsp;Ptxas Options</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p">
                                    The following table lists some useful <samp class="ph codeph">ptxas</samp> options
                                    which can be specified with <samp class="ph codeph">nvcc</samp> option
                                    <samp class="ph codeph">-Xptxas</samp>.
                                    
                                 </p>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-allow-expensive-optimizations"><a name="ptxas-options-allow-expensive-optimizations" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-allow-expensive-optimizations" name="ptxas-options-allow-expensive-optimizations" shape="rect">4.2.9.1.1.&nbsp;<samp class="ph codeph">--allow-expensive-optimizations</samp> (<samp class="ph codeph">-allow-expensive-optimizations</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Enable (disable) to allow compiler to perform expensive
                                          optimizations using maximum available resources (memory and
                                          compile-time).
                                          </em></p>
                                    <p class="p">
                                       If unspecified, default behavior is to enable this feature
                                       for optimization level &gt;= <samp class="ph codeph">O2</samp>.
                                       
                                    </p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-compile-only"><a name="ptxas-options-compile-only" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-compile-only" name="ptxas-options-compile-only" shape="rect">4.2.9.1.2.&nbsp;<samp class="ph codeph">--compile-only</samp> (<samp class="ph codeph">-c</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Generate relocatable object.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-def-load-cache"><a name="ptxas-options-def-load-cache" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-def-load-cache" name="ptxas-options-def-load-cache" shape="rect">4.2.9.1.3.&nbsp;<samp class="ph codeph">--def-load-cache</samp> (<samp class="ph codeph">-dlcm</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Default cache modifier on global/generic load.
                                          </em></p>
                                    <p class="p">
                                       Default value: <samp class="ph codeph">ca</samp>.
                                       
                                    </p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-def-store-cache"><a name="ptxas-options-def-store-cache" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-def-store-cache" name="ptxas-options-def-store-cache" shape="rect">4.2.9.1.4.&nbsp;<samp class="ph codeph">--def-store-cache</samp> (<samp class="ph codeph">-dscm</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Default cache modifier on global/generic store.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-device-debug"><a name="ptxas-options-device-debug" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-device-debug" name="ptxas-options-device-debug" shape="rect">4.2.9.1.5.&nbsp;<samp class="ph codeph">--device-debug</samp> (<samp class="ph codeph">-g</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Semantics same as <samp class="ph codeph">nvcc</samp> option
                                          <samp class="ph codeph"><a class="xref" href="index.html#options-for-altering-compiler-linker-behavior" shape="rect">--device-debug</a></samp>.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-disable-optimizer-constants"><a name="ptxas-options-disable-optimizer-constants" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-disable-optimizer-constants" name="ptxas-options-disable-optimizer-constants" shape="rect">4.2.9.1.6.&nbsp;<samp class="ph codeph">--disable-optimizer-constants</samp> (<samp class="ph codeph">-disable-optimizer-consts</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Disable use of optimizer constant bank.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-entry"><a name="ptxas-options-entry" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-entry" name="ptxas-options-entry" shape="rect">4.2.9.1.7.&nbsp;<samp class="ph codeph">--entry <em class="ph i">entry,...</em></samp> (<samp class="ph codeph">-e</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Semantics same as <samp class="ph codeph">nvcc</samp> option
                                          <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--entries</a></samp>.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-fmad"><a name="ptxas-options-fmad" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-fmad" name="ptxas-options-fmad" shape="rect">4.2.9.1.8.&nbsp;<samp class="ph codeph">--fmad</samp> (<samp class="ph codeph">-fmad</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Semantics same as <samp class="ph codeph">nvcc</samp> option
                                          <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--fmad</a></samp>.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-force-load-cache"><a name="ptxas-options-force-load-cache" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-force-load-cache" name="ptxas-options-force-load-cache" shape="rect">4.2.9.1.9.&nbsp;<samp class="ph codeph">--force-load-cache</samp> (<samp class="ph codeph">-flcm</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Force specified cache modifier on global/generic load.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-force-store-cache"><a name="ptxas-options-force-store-cache" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-force-store-cache" name="ptxas-options-force-store-cache" shape="rect">4.2.9.1.10.&nbsp;<samp class="ph codeph">--force-store-cache</samp> (<samp class="ph codeph">-fscm</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Force specified cache modifier on global/generic store.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-generate-line-info"><a name="ptxas-options-generate-line-info" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-generate-line-info" name="ptxas-options-generate-line-info" shape="rect">4.2.9.1.11.&nbsp;<samp class="ph codeph">--generate-line-info</samp> (<samp class="ph codeph">-lineinfo</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Semantics same as <samp class="ph codeph">nvcc</samp> option
                                          <samp class="ph codeph"><a class="xref" href="index.html#options-for-altering-compiler-linker-behavior" shape="rect">--generate-line-info</a></samp>.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-gpu-name"><a name="ptxas-options-gpu-name" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-gpu-name" name="ptxas-options-gpu-name" shape="rect">4.2.9.1.12.&nbsp;<samp class="ph codeph">--gpu-name <em class="ph i">gpuname</em></samp> (<samp class="ph codeph">-arch</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Specify name of NVIDIA GPU to generate code for.
                                          </em></p>
                                    <p class="p">
                                       This option also takes virtual compute architectures, in
                                       which case code generation is suppressed.
                                       This can be used for parsing only.
                                       
                                    </p>
                                    <p class="p">
                                       Allowed values for this option:
                                       <samp class="ph codeph">compute_30</samp>,
                                       <span class="ph"><samp class="ph codeph">compute_35</samp>,
                                          <samp class="ph codeph">compute_50</samp>,
                                          <samp class="ph codeph">compute_52</samp>; and
                                          </span><samp class="ph codeph">sm_30</samp>,
                                       <span class="ph"><samp class="ph codeph">sm_32</samp>,
                                          <samp class="ph codeph">sm_35</samp>,
                                          <samp class="ph codeph">sm_50</samp> and
                                          <samp class="ph codeph">sm_52</samp>.
                                          </span></p>
                                    <p class="p">Default value: <samp class="ph codeph">sm_30</samp>.
                                    </p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-help"><a name="ptxas-options-help" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-help" name="ptxas-options-help" shape="rect">4.2.9.1.13.&nbsp;<samp class="ph codeph">--help</samp> (<samp class="ph codeph">-h</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Semantics same as <samp class="ph codeph">nvcc</samp> option
                                          <samp class="ph codeph"><a class="xref" href="index.html#generic-tool-options" shape="rect">--help</a></samp>.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-machine"><a name="ptxas-options-machine" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-machine" name="ptxas-options-machine" shape="rect">4.2.9.1.14.&nbsp;<samp class="ph codeph">--machine</samp> (<samp class="ph codeph">-m</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Semantics same as <samp class="ph codeph">nvcc</samp> option
                                          <samp class="ph codeph"><a class="xref" href="index.html#options-for-altering-compiler-linker-behavior" shape="rect">--machine</a></samp>.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-maxrregcount"><a name="ptxas-options-maxrregcount" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-maxrregcount" name="ptxas-options-maxrregcount" shape="rect">4.2.9.1.15.&nbsp;<samp class="ph codeph">--maxrregcount <em class="ph i">amount</em></samp> (<samp class="ph codeph">-maxrregcount</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Semantics same as <samp class="ph codeph">nvcc</samp> option
                                          <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--maxrregcount</a></samp>.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-opt-level"><a name="ptxas-options-opt-level" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-opt-level" name="ptxas-options-opt-level" shape="rect">4.2.9.1.16.&nbsp;<samp class="ph codeph">--opt-level <em class="ph i">N</em></samp> (<samp class="ph codeph">-O</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Specify optimization level.
                                          </em></p>
                                    <p class="p">
                                       Default value: <samp class="ph codeph">3</samp>.
                                       
                                    </p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-options-file"><a name="ptxas-options-options-file" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-options-file" name="ptxas-options-options-file" shape="rect">4.2.9.1.17.&nbsp;<samp class="ph codeph">--options-file <em class="ph i">file,...</em></samp> (<samp class="ph codeph">-optf</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Semantics same as <samp class="ph codeph">nvcc</samp> option
                                          <samp class="ph codeph"><a class="xref" href="index.html#generic-tool-options" shape="rect">--options-file</a></samp>.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-output-file"><a name="ptxas-options-output-file" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-output-file" name="ptxas-options-output-file" shape="rect">4.2.9.1.18.&nbsp;<samp class="ph codeph">--output-file <em class="ph i">file</em></samp> (<samp class="ph codeph">-o</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Specify name of output file.
                                          </em></p>
                                    <p class="p">
                                       Default value: <samp class="ph codeph">elf.o</samp>.
                                       
                                    </p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-preserve-relocs"><a name="ptxas-options-preserve-relocs" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-preserve-relocs" name="ptxas-options-preserve-relocs" shape="rect">4.2.9.1.19.&nbsp;<samp class="ph codeph">--preserve-relocs</samp> (<samp class="ph codeph">-preserve-relocs</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          This option will make <samp class="ph codeph">ptxas</samp> to generate
                                          relocatable references for variables and preserve
                                          relocations generated for them in linked executable.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-sp-bound-check"><a name="ptxas-options-sp-bound-check" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-sp-bound-check" name="ptxas-options-sp-bound-check" shape="rect">4.2.9.1.20.&nbsp;<samp class="ph codeph">--sp-bound-check</samp> (<samp class="ph codeph">-sp-bound-check</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Generate stack-pointer bounds-checking code sequence.
                                          </em></p>
                                    <p class="p">
                                       This option is turned on automatically when
                                       <samp class="ph codeph"><a class="xref" href="index.html#ptxas-options" shape="rect">--device-debug</a></samp>
                                       or
                                       <samp class="ph codeph"><a class="xref" href="index.html#ptxas-options" shape="rect">--opt-level</a>=0</samp>
                                       is specified.
                                       
                                    </p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-verbose"><a name="ptxas-options-verbose" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-verbose" name="ptxas-options-verbose" shape="rect">4.2.9.1.21.&nbsp;<samp class="ph codeph">--verbose</samp> (<samp class="ph codeph">-v</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Enable verbose mode which prints code generation statistics.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-version"><a name="ptxas-options-version" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-version" name="ptxas-options-version" shape="rect">4.2.9.1.22.&nbsp;<samp class="ph codeph">--version</samp> (<samp class="ph codeph">-V</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Semantics same as <samp class="ph codeph">nvcc</samp> option
                                          <samp class="ph codeph"><a class="xref" href="index.html#generic-tool-options" shape="rect">--version</a></samp>.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-warning-as-error"><a name="ptxas-options-warning-as-error" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-warning-as-error" name="ptxas-options-warning-as-error" shape="rect">4.2.9.1.23.&nbsp;<samp class="ph codeph">--warning-as-error</samp> (<samp class="ph codeph">-Werror</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Make all warnings into errors.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-warn-on-double-precision-use"><a name="ptxas-options-warn-on-double-precision-use" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-warn-on-double-precision-use" name="ptxas-options-warn-on-double-precision-use" shape="rect">4.2.9.1.24.&nbsp;<samp class="ph codeph">--warn-on-double-precision-use</samp> (<samp class="ph codeph">-warn-double-usage</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Warning if double(s) are used in an instruction.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-warn-on-local-memory-usage"><a name="ptxas-options-warn-on-local-memory-usage" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-warn-on-local-memory-usage" name="ptxas-options-warn-on-local-memory-usage" shape="rect">4.2.9.1.25.&nbsp;<samp class="ph codeph">--warn-on-local-memory-usage</samp> (<samp class="ph codeph">-warn-lmem-usage</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Warning if local memory is used.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="ptxas-options-warn-on-spills"><a name="ptxas-options-warn-on-spills" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#ptxas-options-warn-on-spills" name="ptxas-options-warn-on-spills" shape="rect">4.2.9.1.26.&nbsp;<samp class="ph codeph">--warn-on-spills</samp> (<samp class="ph codeph">-warn-spills</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Warning if registers are spilled to local memory.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                        </div>
                        <div class="topic reference nested3" id="nvlink-options"><a name="nvlink-options" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#nvlink-options" name="nvlink-options" shape="rect">4.2.9.2.&nbsp;NVLINK Options</a></h3>
                           <div class="body refbody">
                              <div class="section">
                                 <p class="p">
                                    The following table lists some useful <samp class="ph codeph">nvlink</samp> options
                                    which can be specified with <samp class="ph codeph">nvcc</samp> option
                                    <samp class="ph codeph"><a class="xref" href="index.html#options-for-passing-specific-phase-options" shape="rect">--nvlink-options</a></samp>.
                                    
                                 </p>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="nvlink-options-disable-warnings"><a name="nvlink-options-disable-warnings" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#nvlink-options-disable-warnings" name="nvlink-options-disable-warnings" shape="rect">4.2.9.2.1.&nbsp;<samp class="ph codeph">--disable-warnings</samp> (<samp class="ph codeph">-w</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Inhibit all warning messages.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="nvlink-options-preserve-relocs"><a name="nvlink-options-preserve-relocs" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#nvlink-options-preserve-relocs" name="nvlink-options-preserve-relocs" shape="rect">4.2.9.2.2.&nbsp;<samp class="ph codeph">--preserve-relocs</samp> (<samp class="ph codeph">-preserve-relocs</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Preserve resolved relocations in linked executable.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="nvlink-options-verbose"><a name="nvlink-options-verbose" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#nvlink-options-verbose" name="nvlink-options-verbose" shape="rect">4.2.9.2.3.&nbsp;<samp class="ph codeph">--verbose</samp> (<samp class="ph codeph">-v</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Enable verbose mode which prints code generation statistics.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                           <div class="topic reference nested4" id="nvlink-options-warning-as-error"><a name="nvlink-options-warning-as-error" shape="rect">
                                 <!-- --></a><h3 class="title topictitle2"><a href="#nvlink-options-warning-as-error" name="nvlink-options-warning-as-error" shape="rect">4.2.9.2.4.&nbsp;<samp class="ph codeph">--warning-as-error</samp> (<samp class="ph codeph">-Werror</samp>)</a></h3>
                              <div class="body refbody">
                                 <div class="section">
                                    <p class="p"><em class="ph i">
                                          Make all warnings into errors.
                                          </em></p>
                                 </div>
                              </div>
                           </div>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="gpu-compilation"><a name="gpu-compilation" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#gpu-compilation" name="gpu-compilation" shape="rect">5.&nbsp;GPU Compilation</a></h2>
                  <div class="body conbody">
                     <p class="p">
                        This chapter describes the GPU compilation model that is maintained by
                        <samp class="ph codeph">nvcc</samp>, in cooperation with the CUDA driver.
                        It goes through some technical sections, with concrete examples at the
                        end.
                        
                     </p>
                  </div>
                  <div class="topic concept nested1" id="gpu-generations"><a name="gpu-generations" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#gpu-generations" name="gpu-generations" shape="rect">5.1.&nbsp;GPU Generations</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           In order to allow for architectural evolution, NVIDIA GPUs are released
                           in different generations.
                           New generations introduce major improvements in functionality and/or
                           chip architecture, while GPU models within the same generation show
                           minor configuration differences that <em class="ph i">moderately</em> affect
                           functionality, performance, or both.
                           
                        </p>
                        <p class="p">
                           Binary compatibility of GPU applications is not guaranteed across
                           different generations.
                           For example, a CUDA application that has been compiled for a Fermi GPU
                           will very likely not run on a Kepler GPU (and vice versa).
                           This is the instruction set and instruction encodings of a geneartion is
                           different from those of of other generations.
                           
                        </p>
                        <p class="p">
                           Binary compatibility within one GPU generation can be guaranteed under
                           certain conditions because they share the basic instruction set.
                           This is the case between two GPU versions that do not show functional
                           differences at all (for instance when one version is a scaled down
                           version of the other), or when one version is functionally included in
                           the other.
                           An example of the latter is the <em class="ph i">base</em> Kepler version
                           <samp class="ph codeph">sm_30</samp> whose functionality is a subset of all other
                           Kepler versions: any code compiled for <samp class="ph codeph">sm_30</samp> will run
                           on all other Kepler GPUs.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="gpu-feature-list"><a name="gpu-feature-list" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#gpu-feature-list" name="gpu-feature-list" shape="rect">5.2.&nbsp;GPU Feature List</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The following table lists the names of the current GPU architectures,
                           annotated with the functional capabilities that they provide.
                           There are other differences, such as amounts of register and processor
                           clusters, that only affect execution performance.
                           
                        </p>
                        <p class="p">
                           In the CUDA naming scheme, GPUs are named <samp class="ph codeph">sm_xy</samp>, where
                           <samp class="ph codeph">x</samp> denotes the GPU generation number, and
                           <samp class="ph codeph">y</samp> the version in that generation.
                           Additionally, to facilitate comparing GPU capabilities, CUDA attempts to
                           choose its GPU names such that if
                           <samp class="ph codeph">x<sub class="ph sub">1</sub>y<sub class="ph sub">1</sub></samp> &lt;=
                           <samp class="ph codeph">x<sub class="ph sub">2</sub>y<sub class="ph sub">2</sub></samp> then all non-ISA related
                           capabilities of <samp class="ph codeph">sm_x<sub class="ph sub">1</sub>y<sub class="ph sub">1</sub></samp> are
                           included in those of <samp class="ph codeph">sm_x<sub class="ph sub">2</sub>y<sub class="ph sub">2</sub></samp>.
                           From this it indeed follows that <samp class="ph codeph">sm_30</samp> is the
                           <em class="ph i">base</em> Kepler model, and it also explains why higher entries in the
                           tables are always functional extensions to the lower entries.
                           This is denoted by the plus sign in the table.
                           Moreover, if we abstract from the instruction encoding, it implies that
                           <samp class="ph codeph">sm_30</samp>'s functionality will continue to be included in
                           all later GPU generations.
                           As we will see next, this property will be the foundation for
                           application compatibility support by <samp class="ph codeph">nvcc</samp>.
                           
                        </p>
                        <div class="tablenoborder">
                           <table cellpadding="4" cellspacing="0" summary="" class="table" frame="border" border="1" rules="all">
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">sm_30</samp> and <samp class="ph codeph">sm_32</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">
                                       <p class="p">Basic features</p>
                                       <p class="p">+ Kepler support</p>
                                       <p class="p">+ Unified memory programming</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">sm_35</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Dynamic parallelism support</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">sm_50</samp>, <samp class="ph codeph">sm_52</samp>, and
                                       <samp class="ph codeph">sm_53</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Maxwell support</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">sm_60</samp>, <samp class="ph codeph">sm_61</samp>, and
                                       <samp class="ph codeph">sm_62</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Pascal support</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">sm_70</samp> and <samp class="ph codeph">sm_72</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Volta support</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">sm_75</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Turing support</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="application-compatibility"><a name="application-compatibility" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#application-compatibility" name="application-compatibility" shape="rect">5.3.&nbsp;Application Compatibility</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           Binary code compatibility over CPU generations, together with a
                           published instruction set architecture is the usual mechanism for
                           ensuring that distributed applications <em class="ph i">out there in the field</em>
                           will continue to run on newer versions of the CPU when these become
                           mainstream.
                           
                        </p>
                        <p class="p">
                           This situation is different for GPUs, because NVIDIA cannot guarantee
                           binary compatibility without sacrificing regular opportunities for GPU
                           improvements.
                           Rather, as is already conventional in the graphics programming domain,
                           <samp class="ph codeph">nvcc</samp> relies on a two stage compilation model for
                           ensuring application compatibility with future GPU generations.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="virtual-architectures"><a name="virtual-architectures" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#virtual-architectures" name="virtual-architectures" shape="rect">5.4.&nbsp;Virtual Architectures</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           GPU compilation is performed via an intermediate representation, PTX,
                           which can be considered as assembly for a virtual GPU architecture.
                           Contrary to an actual graphics processor, such a virtual GPU is defined
                           entirely by the set of capabilities, or features, that it provides to
                           the application.
                           In particular, a virtual GPU architecture provides a (largely) generic
                           instruction set, and binary instruction encoding is a non-issue because
                           PTX programs are always represented in text format.
                           
                        </p>
                        <p class="p">
                           Hence, a <samp class="ph codeph">nvcc</samp> compilation command always uses two
                           architectures: a <em class="ph i">virtual</em> intermediate architecture, plus a
                           <em class="ph i">real</em> GPU architecture to specify the intended processor to
                           execute on.
                           For such an <samp class="ph codeph">nvcc</samp> command to be valid, the <em class="ph i">real</em>
                           architecture must be an implementation of the <em class="ph i">virtual</em>
                           architecture.
                           This is further explained below.
                           
                        </p>
                        <p class="p">
                           The chosen virtual architecture is more of a statement on the GPU
                           capabilities that the application requires: using a <em class="ph i">smallest</em>
                           virtual architecture still allows a <em class="ph i">widest</em> range of actual
                           architectures for the second <samp class="ph codeph">nvcc</samp> stage.
                           Conversely, specifying a virtual architecture that provides features
                           unused by the application unnecessarily restricts the set of possible
                           GPUs that can be specified in the second <samp class="ph codeph">nvcc</samp> stage.
                           
                        </p>
                        <p class="p">
                           From this it follows that the virtual architecture should always be
                           chosen as <em class="ph i">low</em> as possible, thereby maximizing the actual GPUs to
                           run on.
                           The <em class="ph i">real</em> architecture should be chosen as <em class="ph i">high</em> as
                           possible (assuming that this always generates better code), but this is
                           only possible with knowledge of the actual GPUs on which the application
                           is expected to run.
                           As we will see later, in the situation of just in time compilation,
                           where the driver has this exact knowledge: the runtime GPU is the one on
                           which the program is about to be launched/executed.
                           
                        </p>
                        <div class="fig fignone" id="virtual-architectures__virtual-architectures"><a name="virtual-architectures__virtual-architectures" shape="rect">
                              <!-- --></a><span class="figcap">Figure 2. Two-Staged Compilation with Virtual and Real Architectures</span><br clear="none"></br><div class="imagecenter"><img class="image imagecenter" src="graphics/virtual-architectures.png" alt="Virtual compute architecture and Real sm architecture."></img></div><br clear="none"></br></div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="virtual-architecture-feature-list"><a name="virtual-architecture-feature-list" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#virtual-architecture-feature-list" name="virtual-architecture-feature-list" shape="rect">5.5.&nbsp;Virtual Architecture Feature List</a></h3>
                     <div class="body conbody">
                        <div class="tablenoborder"><a name="virtual-architecture-feature-list__virtual-architecture-features" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="virtual-architecture-feature-list__virtual-architecture-features" class="table" frame="border" border="1" rules="all">
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">compute_30</samp> and <samp class="ph codeph">compute_32</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">
                                       <p class="p">Basic features</p>
                                       <p class="p">+ Kepler support</p>
                                       <p class="p">+ Unified memory programming</p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">compute_35</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Dynamic parallelism support</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">compute_50</samp>,
                                       <samp class="ph codeph">compute_52</samp>, and
                                       <samp class="ph codeph">compute_53</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Maxwell support</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">compute_60</samp>,
                                       <samp class="ph codeph">compute_61</samp>, and
                                       <samp class="ph codeph">compute_62</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Pascal support</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">compute_70</samp> and <samp class="ph codeph">compute_72</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Volta support</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="25%" rowspan="1" colspan="1"><samp class="ph codeph">compute_75</samp></td>
                                    <td class="entry" valign="top" width="75%" rowspan="1" colspan="1">+ Turing support</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                        <p class="p">
                           The above table lists the currently defined virtual architectures.
                           The virtual architecture naming scheme is the same as the real
                           architecture naming scheme shown in Section
                           <a class="xref" href="index.html#gpu-feature-list" shape="rect">GPU Feature List</a>.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="further-mechanisms"><a name="further-mechanisms" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#further-mechanisms" name="further-mechanisms" shape="rect">5.6.&nbsp;Further Mechanisms</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           Clearly, compilation staging in itself does not help towards the goal of
                           application compatibility with future GPUs.
                           For this we need the two other mechanisms by CUDA Samples: just in time
                           compilation (JIT) and fatbinaries.
                           
                        </p>
                     </div>
                     <div class="topic concept nested2" id="just-in-time-compilation"><a name="just-in-time-compilation" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#just-in-time-compilation" name="just-in-time-compilation" shape="rect">5.6.1.&nbsp;Just-in-Time Compilation</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              The compilation step to an actual GPU binds the code to one generation
                              of GPUs.
                              Within that generation, it involves a choice between GPU <em class="ph i">coverage</em>
                              and possible performance.
                              For example, compiling to <samp class="ph codeph">sm_30</samp> allows the code to run
                              on all Kepler-generation GPUs, but compiling to <samp class="ph codeph">sm_35</samp>
                              would probably yield better code if Kepler GK110 and later are the only
                              targets.
                              
                           </p>
                           <div class="fig fignone" id="just-in-time-compilation__just-in-time-compilation"><a name="just-in-time-compilation__just-in-time-compilation" shape="rect">
                                 <!-- --></a><span class="figcap">Figure 3. Just-in-Time Compilation of Device Code</span><br clear="none"></br><div class="imagecenter"><img class="image imagecenter" src="graphics/just-in-time-compilation.png" alt="Just in time compilation."></img></div><br clear="none"></br></div>
                           <p class="p">
                              By specifying a virtual code architecture instead of a <em class="ph i">real</em> GPU,
                              <samp class="ph codeph">nvcc</samp> postpones the assembly of PTX code until
                              application runtime, at which the target GPU is exactly known.
                              For instance, the command below allows generation of exactly matching
                              GPU binary code, when the application is launched on an
                              <samp class="ph codeph">sm_50</samp> or later architecture.
                              
                           </p><pre class="pre screen" xml:space="preserve">nvcc x.cu --gpu-architecture=compute_50 --gpu-code=compute_50</pre><p class="p">
                              The disadvantage of just in time compilation is increased application
                              startup delay, but this can be alleviated by letting the CUDA driver use
                              a compilation cache (refer to "Section 3.1.1.2. Just-in-Time
                              Compilation" of
                              <a class="xref" href="http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html" target="_blank" shape="rect">
                                 CUDA C Programming Guide</a>)
                              which is persistent over multiple runs of the applications.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="fatbinaries"><a name="fatbinaries" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#fatbinaries" name="fatbinaries" shape="rect">5.6.2.&nbsp;Fatbinaries</a></h3>
                        <div class="body conbody">
                           <div class="p">
                              A different solution to overcome startup delay by JIT while still
                              allowing execution on newer GPUs is to specify multiple code instances,
                              as in
                              <pre class="pre screen" xml:space="preserve">nvcc x.cu --gpu-architecture=compute_50 --gpu-code=compute_50,sm_50,sm_52</pre></div>
                           <p class="p">
                              This command generates exact code for two Kepler variants, plus PTX code
                              for use by JIT in case a next-generation GPU is encountered.
                              <samp class="ph codeph">nvcc</samp> organizes its device code in fatbinaries, which
                              are able to hold multiple translations of the same GPU source code.
                              At runtime, the CUDA driver will select the most appropriate translation
                              when the device function is launched.
                              
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvcc-examples"><a name="nvcc-examples" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvcc-examples" name="nvcc-examples" shape="rect">5.7.&nbsp;NVCC Examples</a></h3>
                     <div class="topic concept nested2" id="base-notation"><a name="base-notation" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#base-notation" name="base-notation" shape="rect">5.7.1.&nbsp;Base Notation</a></h3>
                        <div class="body conbody">
                           <p class="p"><samp class="ph codeph">nvcc</samp> provides the options
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>
                              and
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                              for specifying the target architectures for both translation stages.
                              Except for allowed short hands described below, the
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>
                              option takes a single value, which must be the
                              name of a virtual compute architecture, while option
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                              takes a list of values which must all be the
                              names of actual GPUs.
                              <samp class="ph codeph">nvcc</samp> performs a stage 2 translation for each of these
                              GPUs, and will embed the result in the result of compilation (which
                              usually is a host object file or executable).
                              
                           </p>
                           <div class="example">
                              <h4 class="title sectiontitle">Example</h4><pre class="pre screen" xml:space="preserve">nvcc x.cu --gpu-architecture=compute_50 --gpu-code=sm_50,sm_52</pre></div>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="shorthand"><a name="shorthand" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#shorthand" name="shorthand" shape="rect">5.7.2.&nbsp;Shorthand</a></h3>
                        <div class="body conbody">
                           <p class="p"><samp class="ph codeph">nvcc</samp> allows a number of shorthands for simple cases.
                              
                           </p>
                        </div>
                        <div class="topic concept nested3" id="shorthand-1"><a name="shorthand-1" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#shorthand-1" name="shorthand-1" shape="rect">5.7.2.1.&nbsp;Shorthand 1</a></h3>
                           <div class="body conbody">
                              <p class="p"><samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                                 arguments can be virtual architectures.
                                 In this case the stage 2 translation will be omitted for such virtual
                                 architecture, and the stage 1 PTX result will be embedded instead.
                                 At application launch, and in case the driver does not find a better
                                 alternative, the stage 2 compilation will be invoked by the driver
                                 with the PTX as input.
                                 
                              </p>
                              <div class="example">
                                 <h5 class="title sectiontitle">Example</h5><pre class="pre screen" xml:space="preserve">nvcc x.cu --gpu-architecture=compute_50 --gpu-code=compute_50,sm_50,sm_52</pre></div>
                           </div>
                        </div>
                        <div class="topic concept nested3" id="shorthand-2"><a name="shorthand-2" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#shorthand-2" name="shorthand-2" shape="rect">5.7.2.2.&nbsp;Shorthand 2</a></h3>
                           <div class="body conbody">
                              <p class="p">
                                 The
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                                 option can be omitted.
                                 Only in this case, the
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>
                                 value can be a non-virtual architecture.
                                 The
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                                 values default to the <em class="ph i">closest</em> virtual
                                 architecture that is implemented by the GPU specified with
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>,
                                 plus the
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>,
                                 value itself.
                                 The <em class="ph i">closest</em> virtual architecture is used as the effective
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>,
                                 value.
                                 If the 
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>
                                 value is a virtual architecture, it is also used as the effective
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                                 value.
                                 
                              </p>
                              <div class="example">
                                 <h5 class="title sectiontitle">Example</h5>
                                 <div class="p"><pre class="pre screen" xml:space="preserve">nvcc x.cu --gpu-architecture=sm_52
nvcc x.cu --gpu-architecture=compute_50</pre>
                                    
                                    are equivalent to
                                    <pre class="pre screen" xml:space="preserve">nvcc x.cu --gpu-architecture=compute_52 --gpu-code=sm_52,compute_52
nvcc x.cu --gpu-architecture=compute_50 --gpu-code=compute_50</pre></div>
                              </div>
                           </div>
                        </div>
                        <div class="topic concept nested3" id="shorthand-3"><a name="shorthand-3" shape="rect">
                              <!-- --></a><h3 class="title topictitle2"><a href="#shorthand-3" name="shorthand-3" shape="rect">5.7.2.3.&nbsp;Shorthand 3</a></h3>
                           <div class="body conbody">
                              <p class="p">
                                 Both
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>
                                 and
                                 <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                                 options can be omitted.
                                 
                              </p>
                              <div class="example">
                                 <h5 class="title sectiontitle">Example</h5>
                                 <div class="p"><pre class="pre screen" xml:space="preserve">nvcc x.cu</pre>
                                    is equivalent to
                                    <pre class="pre screen" xml:space="preserve">nvcc x.cu --gpu-architecture=compute_30 --gpu-code=sm_30,compute_30</pre></div>
                              </div>
                           </div>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="extended-notation"><a name="extended-notation" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#extended-notation" name="extended-notation" shape="rect">5.7.3.&nbsp;Extended Notation</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              The options
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>
                              and
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                              can be used in all cases where code is to be generated for one or more
                              GPUs using a common virtual architecture.
                              This will cause a single invocation of <samp class="ph codeph">nvcc</samp> stage 1
                              (that is, preprocessing and generation of virtual PTX assembly code),
                              followed by a compilation stage 2 (binary code generation) repeated for
                              each specified GPU.
                              
                           </p>
                           <p class="p">
                              Using a common virtual architecture means that all assumed GPU features
                              are fixed for the entire <samp class="ph codeph">nvcc</samp> compilation.
                              For instance, the following <samp class="ph codeph">nvcc</samp> command assumes no
                              half-precision floating-point operation support for both the
                              <samp class="ph codeph">sm_50</samp> code and the <samp class="ph codeph">sm_53</samp> code:
                              
                           </p><pre class="pre screen" xml:space="preserve">nvcc x.cu --gpu-architecture=compute_50 --gpu-code=compute_50,sm_50,sm_53</pre><p class="p">
                              Sometimes it is necessary to perform different GPU code generation
                              steps, partitioned over different architectures.
                              This is possible using <samp class="ph codeph">nvcc</samp> option
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--generate-code</a></samp>,
                              which then must be used instead of a
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>
                              and
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-code</a></samp>
                              combination.
                              
                           </p>
                           <p class="p">
                              Unlike option
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--gpu-architecture</a></samp>
                              option
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--generate-code</a></samp>,
                              may be repeated on the <samp class="ph codeph">nvcc</samp> command line.
                              It takes sub-options <samp class="ph codeph">arch</samp> and <samp class="ph codeph">code</samp>,
                              which must not be confused with their main option equivalents, but
                              behave similarly.
                              If repeated architecture compilation is used, then the device code must
                              use conditional compilation based on the value of the architecture
                              identification macro <samp class="ph codeph">__CUDA_ARCH__</samp>, which is described
                              in the next section.
                              
                           </p>
                           <p class="p">
                              For example, the following assumes absence of half-precision
                              floating-point operation support for
                              the <samp class="ph codeph">sm_50</samp> and <samp class="ph codeph">sm_52</samp> code, but full
                              support on <samp class="ph codeph">sm_53</samp>:
                              
                           </p><pre class="pre screen" xml:space="preserve">nvcc x.cu \
    --generate-code arch=compute_50,code=sm_50 \
    --generate-code arch=compute_50,code=sm_52 \
    --generate-code arch=compute_53,code=sm_53</pre><p class="p">
                              Or, leaving actual GPU code generation to the JIT compiler in the CUDA
                              driver:
                              
                           </p><pre class="pre screen" xml:space="preserve">nvcc x.cu \
    --generate-code arch=compute_50,code=compute_50 \
    --generate-code arch=compute_53,code=compute_53</pre><p class="p">
                              The code sub-options can be combined with a slightly more complex syntax:
                              
                           </p><pre class="pre screen" xml:space="preserve">nvcc x.cu \
    --generate-code arch=compute_50,code=[sm_50,sm_52] \
    --generate-code arch=compute_53,code=sm_53</pre></div>
                     </div>
                     <div class="topic concept nested2" id="virtual-architecture-identification-macro"><a name="virtual-architecture-identification-macro" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#virtual-architecture-identification-macro" name="virtual-architecture-identification-macro" shape="rect">5.7.4.&nbsp;Virtual Architecture Identification Macro</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              The architecture identification macro <samp class="ph codeph">__CUDA_ARCH__</samp>
                              is assigned a three-digit value string <samp class="ph codeph">xy0</samp> (ending in
                              a literal <samp class="ph codeph">0</samp>) during each <samp class="ph codeph">nvcc</samp>
                              compilation stage 1 that compiles for <samp class="ph codeph">compute_xy</samp>.
                              
                           </p>
                           <p class="p">
                              This macro can be used in the implementation of GPU functions for
                              determining the virtual architecture for which it is currently being
                              compiled.
                              The host code (the non-GPU code) must <em class="ph i">not</em> depend on it.
                              
                           </p>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="using-separate-compilation-in-cuda"><a name="using-separate-compilation-in-cuda" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#using-separate-compilation-in-cuda" name="using-separate-compilation-in-cuda" shape="rect">6.&nbsp;Using Separate Compilation in CUDA</a></h2>
                  <div class="body conbody">
                     <p class="p">
                        Prior to the 5.0 release, CUDA did not support separate compilation, so
                        CUDA code could not call device functions or access variables across
                        files.
                        Such compilation is referred to as <em class="ph i">whole program compilation</em>.
                        We have always supported the separate compilation of host code, it was
                        just the device CUDA code that needed to all be within one file.
                        Starting with CUDA 5.0, separate compilation of device code is
                        supported, but the old whole program mode is still the default, so there
                        are new options to invoke separate compilation.
                        
                     </p>
                  </div>
                  <div class="topic concept nested1" id="code-changes-for-separate-compilation"><a name="code-changes-for-separate-compilation" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#code-changes-for-separate-compilation" name="code-changes-for-separate-compilation" shape="rect">6.1.&nbsp;Code Changes for Separate Compilation</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The code changes required for separate compilation of device code are
                           the same as what you already do for host code, namely using
                           <samp class="ph codeph">extern</samp> and <samp class="ph codeph">static</samp> to control the
                           visibility of symbols.
                           Note that previously <samp class="ph codeph">extern</samp> was ignored in CUDA code;
                           now it will be honored.
                           With the use of <samp class="ph codeph">static</samp> it is possible to have multiple
                           device symbols with the same name in different files.
                           For this reason, the CUDA API calls that referred to symbols by their
                           string name are deprecated; instead the symbol should be referenced by
                           its address.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvcc-options-for-separate-compilation"><a name="nvcc-options-for-separate-compilation" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvcc-options-for-separate-compilation" name="nvcc-options-for-separate-compilation" shape="rect">6.2.&nbsp;NVCC Options for Separate Compilation</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           CUDA works by embedding device code into host objects.
                           In whole program compilation, it embeds executable device code into the
                           host object.
                           In separate compilation, we embed relocatable device code into the host
                           object, and run <samp class="ph codeph">nvlink</samp>, the device linker, to link all
                           the device code together.
                           The output of nvlink is then linked together with all the host objects
                           by the host linker to form the final executable.
                           
                        </p>
                        <p class="p">
                           The generation of relocatable vs executable device code is controlled by
                           the
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--relocatable-device-code</a></samp>
                           option.
                           
                        </p>
                        <p class="p">
                           The
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--compile</a></samp>
                           option is already used to control stopping a
                           compile at a host object, so a new option
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--device-c</a></samp>
                           is added that simply does
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--relocatable-device-code</a>=true
                              <a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--compile</a></samp>.
                           
                        </p>
                        <div class="p">
                           To invoke just the device linker, the
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--device-link</a></samp>
                           option can be used, which emits a host object
                           containing the embedded executable device code.
                           The output of that must then be passed to the host linker.
                           Or:
                           <pre class="pre screen" xml:space="preserve">nvcc &lt;objects&gt;</pre>
                           
                           can be used to implicitly call both the device and host linkers.
                           This works because if the device linker does not see any relocatable
                           code it does not do anything.
                           </div>
                        <p class="p"><a class="xref" href="index.html#nvcc-options-for-separate-compilation__nvcc-options-for-separate-compilation" shape="rect">Figure 4</a>
                           shows the flow (<samp class="ph codeph">nvcc --device-c</samp>
                           has the same flow as
                           <a class="xref" href="index.html#options-for-specifying-compilation-phase-compile__cuda-compilation-from-cu-to-o" shape="rect">index.html#options-for-specifying-compilation-phase-compile__cuda-compilation-from-cu-to-o</a>)
                           
                        </p>
                        <div class="fig fignone" id="nvcc-options-for-separate-compilation__nvcc-options-for-separate-compilation"><a name="nvcc-options-for-separate-compilation__nvcc-options-for-separate-compilation" shape="rect">
                              <!-- --></a><span class="figcap">Figure 4. CUDA Separate Compilation Trajectory</span><img class="image" src="graphics/nvcc-options-for-separate-compilation.png" alt="Flow diagram of nvcc options for separate compilation"></img></div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="libraries"><a name="libraries" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#libraries" name="libraries" shape="rect">6.3.&nbsp;Libraries</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The device linker has the ability to read the static host library
                           formats (<samp class="ph codeph">.a</samp> on Linux and Mac OS X,
                           <samp class="ph codeph">.lib</samp> on Windows).
                           It ignores any dynamic (<samp class="ph codeph">.so</samp> or <samp class="ph codeph">.dll</samp>)
                           libraries.
                           The
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--library</a></samp>
                           and
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--library-path</a></samp>
                           options can be used to
                           pass libraries to both the device and host linker.
                           The library name is specified without the library file extension when
                           the
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--library</a></samp>
                           option is used.
                           
                        </p><pre class="pre screen" xml:space="preserve">nvcc --gpu-architecture=sm_50 a.o b.o --library-path=&lt;path&gt; --library=foo</pre><p class="p">
                           Alternatively, the library name, including the library file extension,
                           can be used without the
                           <samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--library</a></samp>
                           option on Windows.
                           
                        </p><pre class="pre screen" xml:space="preserve">nvcc --gpu-architecture=sm_50 a.obj b.obj foo.lib --library-path=&lt;path&gt;</pre><p class="p">
                           Note that the device linker ignores any objects that do not have
                           relocatable device code.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="examples"><a name="examples" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#examples" name="examples" shape="rect">6.4.&nbsp;Examples</a></h3>
                     <div class="body conbody">
                        <p class="p">Suppose we have the following files:</p><pre xml:space="preserve"><span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-comment">//---------- b.h ----------</span>
<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-directive">#define N 8</span>

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">extern</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__device__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> g[N];

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">extern</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__device__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span> bar(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>);</pre><pre xml:space="preserve"><span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-comment">//---------- b.cu ----------</span>
<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-directive">#include "b.h"</span>

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__device__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> g[N];

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__device__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span> bar (<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>)
{
  g[<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">threadIdx</span>.x]++;
}</pre><pre xml:space="preserve"><span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-comment">//---------- a.cu ----------</span>
<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-directive">#include &lt;stdio.h&gt;</span>
<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-directive">#include "b.h"</span>

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__global__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span> foo (<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>) {

  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__shared__</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> a[N];
  a[<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">threadIdx</span>.x] = <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">threadIdx</span>.x;

  __syncthreads();

  g[<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">threadIdx</span>.x] = a[<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">blockDim</span>.x - <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">threadIdx</span>.x - 1];

  bar();
}

<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> main (<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>) {
  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">unsigned</span> <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> i;
  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> *dg, hg[N];
  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span> sum = 0;

  foo<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">&lt;&lt;&lt;</span>1, N<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">&gt;&gt;&gt;</span>();

  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">if</span>(cudaGetSymbolAddress((<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>**)&amp;dg, g)){
      printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"couldn't get the symbol addr\n"</span>);
      <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">return</span> 1;
  }
  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">if</span>(cudaMemcpy(hg, dg, N * <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">sizeof</span>(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">int</span>), cudaMemcpyDeviceToHost)){
      printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"couldn't memcpy\n"</span>);
      <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">return</span> 1;
  }

  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">for</span> (i = 0; i &lt; N; i++) {
    sum += hg[i];
  }
  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">if</span> (sum == 36) {
    printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"PASSED\n"</span>);
  } <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">else</span> {
    printf(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-string">"FAILED (%d)\n"</span>, sum);
  }

  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">return</span> 0;
}</pre><p class="p">
                           These can be compiled with the following commands (these examples are
                           for Linux):
                           
                        </p><pre class="pre screen" xml:space="preserve">nvcc --gpu-architecture=sm_50 --device-c a.cu b.cu
nvcc --gpu-architecture=sm_50 a.o b.o</pre><p class="p">
                           If you want to invoke the device and host linker separately, you can
                           do:
                           
                        </p><pre class="pre screen" xml:space="preserve">nvcc --gpu-architecture=sm_50 --device-c a.cu b.cu
nvcc --gpu-architecture=sm_50 --device-link a.o b.o --output-file link.o
g++ a.o b.o link.o --library-path=&lt;path&gt; --library=cudart</pre><p class="p">
                           Note that all desired target architectures must be passed to the 
                           device linker, as that specifies what will be in the final executable
                           (some objects or libraries may contain device code for multiple architectures,
                           and the link step can then choose what to put in the final executable).
                           
                        </p>
                        <p class="p">
                           If you want to use the driver API to load a linked cubin, you can
                           request just the cubin:
                           
                        </p><pre class="pre screen" xml:space="preserve">nvcc --gpu-architecture=sm_50 --device-link a.o b.o \
    --cubin --output-file link.cubin</pre><p class="p">The objects could be put into a library and used with:</p><pre class="pre screen" xml:space="preserve">nvcc --gpu-architecture=sm_50 --device-c a.cu b.cu
nvcc --lib a.o b.o --output-file test.a
nvcc --gpu-architecture=sm_50 test.a</pre><p class="p">
                           Note that only static libraries are supported by the device linker.
                           
                        </p>
                        <p class="p">
                           A PTX file can be compiled to a host object file and then linked by
                           using:
                           
                        </p><pre class="pre screen" xml:space="preserve">nvcc --gpu-architecture=sm_50 --device-c a.ptx</pre><p class="p">
                           An example that uses libraries, host linker, and dynamic parallelism
                           would be:
                           
                        </p><pre class="pre screen" xml:space="preserve">nvcc --gpu-architecture=sm_50 --device-c a.cu b.cu
nvcc --gpu-architecture=sm_50 --device-link a.o b.o --output-file link.o
nvcc --lib --output-file libgpu.a a.o b.o link.o
g++ host.o --library=gpu --library-path=&lt;path&gt; \
    --library=cudadevrt --library=cudart</pre><p class="p">
                           It is possible to do multiple device links within a single host
                           executable, as long as each device link is independent of the other.
                           This requirement of independence means that they cannot share code
                           across device executables, nor can they share addresses (e.g., a
                           device function address can be passed from host to device for a
                           callback only if the device link sees both the caller and potential
                           callback callee; you cannot pass an address from one device executable
                           to another, as those are separate address spaces).
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="potential-separate-compilation-issues"><a name="potential-separate-compilation-issues" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#potential-separate-compilation-issues" name="potential-separate-compilation-issues" shape="rect">6.5.&nbsp;Potential Separate Compilation Issues</a></h3>
                     <div class="topic concept nested2" id="object-compatibility"><a name="object-compatibility" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#object-compatibility" name="object-compatibility" shape="rect">6.5.1.&nbsp;Object Compatibility</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              Only relocatable device code with the same ABI version, 
                              link-compatible SM target
                              architecture, and same pointer size (32 or 64) can be linked together.
                              Incompatible objects will produce a link error.
                              Link-compatible SM architectures are ones that have compatible SASS
                              binaries that can combine without translating, e.g. sm_52 and sm_50.
                              An object could have been compiled for a different architecture but also
                              have PTX available, in which case the device linker will JIT the PTX to
                              cubin for the desired architecture and then link.
                              Relocatable device code requires CUDA 5.0 or later Toolkit.
                              
                           </p>
                           <p class="p">
                              If a kernel is limited to a certain number of registers with the
                              <samp class="ph codeph">launch_bounds</samp> attribute or the
                              <samp class="ph codeph"><a class="xref" href="index.html#options-for-steering-gpu-code-generation" shape="rect">--maxrregcount</a></samp>
                              option, then all functions that the kernel calls must not use more than
                              that number of registers; if they exceed the limit, then a link error
                              will be given.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="jit-linking-not-supported"><a name="jit-linking-not-supported" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#jit-linking-not-supported" name="jit-linking-not-supported" shape="rect">6.5.2.&nbsp;JIT Linking Support</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              CUDA 5.0 does not support JIT linking, while CUDA 5.5 does.
                              This means that to use JIT linking you must recompile your code with
                              CUDA 5.5 or later.
                              JIT linking means doing a relink of the code at startup time.
                              The device linker (<samp class="ph codeph">nvlink</samp>) links at the cubin level.
                              If the cubin does not match the target architecture at load time, the
                              driver re-invokes the device linker to generate cubin for the target
                              architecture, by first JIT'ing the PTX for each object to the
                              appropriate cubin, and then linking together the new cubin.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="implicit-cuda-host-code"><a name="implicit-cuda-host-code" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#implicit-cuda-host-code" name="implicit-cuda-host-code" shape="rect">6.5.3.&nbsp;Implicit CUDA Host Code</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              A file like <samp class="ph codeph">b.cu</samp> above only contains CUDA device code,
                              so one might think that the b.o object doesn't need to be passed to the
                              host linker.
                              But actually there is implicit host code generated whenever a device
                              symbol can be accessed from the host side, either via a launch or an
                              API call like <samp class="ph codeph">cudaGetSymbolAddress()</samp>.
                              This implicit host code is put into <samp class="ph codeph">b.o</samp>, and needs to
                              be passed to the host linker.
                              Plus, for JIT linking to work all device code must be passed to the host
                              linker, else the host executable will not contain device code needed for
                              the JIT link.
                              So a general rule is that the device linker and host linker must see the
                              same host object files (if the object files have any device references
                              in them—if a file is pure host then the device linker doesn't need to
                              see it).
                              If an object file containing device code is not passed to the host
                              linker, then you will see an error message about the function
                              <samp class="ph codeph">__cudaRegisterLinkedBinary_<em class="ph i">name</em></samp> calling an
                              undefined or unresolved symbol
                              <samp class="ph codeph">__fatbinwrap_<em class="ph i">name</em></samp>.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="cuda-arch"><a name="cuda-arch" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#cuda-arch" name="cuda-arch" shape="rect">6.5.4.&nbsp;Using __CUDA_ARCH__</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              In separate compilation, <samp class="ph codeph">__CUDA_ARCH__</samp> must not be used
                              in headers such that different objects could contain different behavior.
                              Or, it must be guaranteed that all objects will compile for the same
                              compute_arch.
                              If a weak function or template function is defined in a header and its
                              behavior depends on <samp class="ph codeph">__CUDA_ARCH__</samp>, then the instances
                              of that function in the objects could conflict if the objects are
                              compiled for different compute arch.
                              For example, if an a.h contains:
                              
                           </p><pre xml:space="preserve">template&lt;typename T&gt;
<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__device__</span> T* getptr(<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">void</span>)
{
<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-directive">#if __CUDA_ARCH__ == 500</span>
  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">return</span> NULL; <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-comment">/* no address */</span>
<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-directive">#else</span>
  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-attribute">__shared__</span> T arr[256];
  <span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-keyword">return</span> arr;
<span xmlns:xslthl="http://xslthl.sf.net" class="xslthl-directive">#endif</span>
}</pre><p class="p">
                              Then if a.cu and b.cu both include a.h and instantiate
                              <samp class="ph codeph">getptr</samp> for the same type, and b.cu expects a non-NULL
                              address, and compile with:
                              
                           </p><pre class="pre screen" xml:space="preserve">nvcc --gpu-architecture=compute_50 --device-c a.cu
nvcc --gpu-architecture=compute_52 --device-c b.cu
nvcc --gpu-architecture=sm_52 a.o b.o</pre><p class="p">
                              At link time only one version of the getptr is used, so the behavior
                              would depend on which version is picked.
                              To avoid this, either a.cu and b.cu must be compiled for the same
                              compute arch, or <samp class="ph codeph">__CUDA_ARCH__</samp> should not be used in
                              the shared header function.
                              
                           </p>
                        </div>
                     </div>
                     <div class="topic concept nested2" id="library-device-code"><a name="library-device-code" shape="rect">
                           <!-- --></a><h3 class="title topictitle2"><a href="#library-device-code" name="library-device-code" shape="rect">6.5.5.&nbsp;Device Code in Libraries</a></h3>
                        <div class="body conbody">
                           <p class="p">
                              If a device function with non-weak external linkage is defined in a library 
                              as well as a non-library object (or another library), 
                              the device linker will complain about the multiple definitions
                              (this differs from traditional host linkers that may ignore 
                              the function definition from the library
                              object, if it was already found in an earlier object).
                              
                           </p>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="miscellaneous-nvcc-usage"><a name="miscellaneous-nvcc-usage" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#miscellaneous-nvcc-usage" name="miscellaneous-nvcc-usage" shape="rect">7.&nbsp;Miscellaneous NVCC Usage</a></h2>
                  <div class="topic concept nested1" id="cross-compilation"><a name="cross-compilation" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#cross-compilation" name="cross-compilation" shape="rect">7.1.&nbsp;Cross Compilation</a></h3>
                     <div class="body conbody">
                        <div class="p">
                           Cross compilation is controlled by using the following
                           <samp class="ph codeph">nvcc</samp> command line options:
                           
                           <ul class="ul">
                              <li class="li"><samp class="ph codeph"><a class="xref" href="index.html#file-and-path-specifications" shape="rect">--compiler-bindir</a></samp>
                                 is used for cross compilation, where the underlying host compiler is
                                 capable of generating objects for the target platform.
                                 
                              </li>
                              <li class="li"><samp class="ph codeph"><a class="xref" href="index.html#options-for-altering-compiler-linker-behavior" shape="rect">--machine</a>=32</samp>.
                                 This option signals that the target platform is a 32-bit platform.
                                 Use this when the host platform is a 64-bit platform.
                                 
                              </li>
                           </ul>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="keeping-intermediate-phase-files"><a name="keeping-intermediate-phase-files" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#keeping-intermediate-phase-files" name="keeping-intermediate-phase-files" shape="rect">7.2.&nbsp;Keeping Intermediate Phase Files</a></h3>
                     <div class="body conbody">
                        <p class="p"><samp class="ph codeph">nvcc</samp> stores intermediate results by default into
                           temporary files that are deleted immediately before it completes.
                           The location of the temporary file directories used are, depending on
                           the current platform, as follows:
                           
                        </p>
                        <dl class="dl">
                           <dt class="dt dlterm">Windows</dt>
                           <dd class="dd">
                              Value of environment variable <samp class="ph codeph">TEMP</samp> is used.
                              If it is not set, <samp class="ph codeph">C:\Windows\temp</samp> is used instead.
                              
                           </dd>
                           <dt class="dt dlterm">Other Platforms</dt>
                           <dd class="dd">
                              Value of environment variable <samp class="ph codeph">TMPDIR</samp> is used.
                              If it is not set, <samp class="ph codeph">/tmp</samp> is used instead.
                              
                           </dd>
                        </dl>
                        <p class="p">
                           Option
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--keep</a></samp>
                           makes <samp class="ph codeph">nvcc</samp> store these intermediate files in the
                           current directory or in the directory specified by
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--keep-dir</a></samp>
                           instead, with names as described in <a class="xref" href="index.html#supported-phases" shape="rect">Supported Phases</a>.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="cleaning-generated-files"><a name="cleaning-generated-files" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#cleaning-generated-files" name="cleaning-generated-files" shape="rect">7.3.&nbsp;Cleaning Up Generated Files</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           All files generated by a particular <samp class="ph codeph">nvcc</samp> command can be
                           cleaned up by repeating the command, but with additional option
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--clean-targets</a></samp>.
                           This option is particularly useful after using
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--keep</a></samp>,
                           because the
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--keep</a></samp>
                           option usually leaves quite an amount of intermediate files around.
                           
                        </p>
                        <p class="p">
                           Because using
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--clean-targets</a></samp>
                           will remove exactly what the original <samp class="ph codeph">nvcc</samp> command
                           created, it is important to exactly repeat all of the options in the
                           original command.
                           For instance, in the following example, omitting
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-guiding-compiler-driver" shape="rect">--keep</a></samp>,
                           or adding
                           <samp class="ph codeph"><a class="xref" href="index.html#options-for-specifying-compilation-phase" shape="rect">--compile</a></samp>
                           will have different cleanup effects.
                           
                        </p><pre class="pre screen" xml:space="preserve">nvcc acos.cu --keep
nvcc acos.cu --keep --clean-targets</pre></div>
                  </div>
                  <div class="topic concept nested1" id="printing-code-generation-statistics"><a name="printing-code-generation-statistics" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#printing-code-generation-statistics" name="printing-code-generation-statistics" shape="rect">7.4.&nbsp;Printing Code Generation Statistics</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           A summary on the amount of used registers and the amount of memory
                           needed per compiled device function can be printed by passing option
                           <samp class="ph codeph"><a class="xref" href="index.html#generic-tool-options" shape="rect">--resource-usage</a></samp>
                           to <samp class="ph codeph">nvcc</samp>:
                           
                        </p><pre class="pre screen" xml:space="preserve">$ nvcc --resource-usage acos.cu
ptxas info    : 1536 bytes gmem, 8 bytes cmem[14]
ptxas info    : Compiling entry function 'acos_main' for 'sm_30'
ptxas info    : Function properties for acos_main
    0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
ptxas info    : Used 6 registers, 1536 bytes smem, 32 bytes cmem[0]</pre><p class="p">As shown in the above example, the amounts of statically allocated global memory (gmem) and
                           constant memory in bank 14 (cmem) are listed.
                        </p>
                        <p class="p">Global memory and some of the constant banks are module scoped resources and not per kernel
                           resources.  Allocation of constant variables to constant banks is profile specific.
                        </p>
                        <p class="p">Followed by this, per kernel resource information is printed.</p>
                        <p class="p">Stack frame is per thread stack usage used by this function.  Spill stores and loads
                           represent stores and loads done on stack memory which are being used for storing variables
                           that couldn't be allocated to physical registers.
                        </p>
                        <p class="p">Similarly number of registers, amount of shared memory and total space in constant bank
                           allocated is shown.
                        </p>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="notices-header"><a name="notices-header" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#notices-header" name="notices-header" shape="rect">Notices</a></h2>
                  <div class="topic reference nested1" id="notice"><a name="notice" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#notice" name="notice" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Notice</h3>
                           <p class="p">ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND
                              SEPARATELY, "MATERIALS") ARE BEING PROVIDED "AS IS." NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE
                              WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS
                              FOR A PARTICULAR PURPOSE. 
                           </p>
                           <p class="p">Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the
                              consequences of use of such information or for any infringement of patents or other rights of third parties that may result
                              from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications
                              mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information
                              previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems
                              without express written approval of NVIDIA Corporation.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="trademarks"><a name="trademarks" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#trademarks" name="trademarks" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Trademarks</h3>
                           <p class="p">NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation
                              in the U.S. and other countries.  Other company and product names may be trademarks of
                              the respective companies with which they are associated.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="copyright-past-to-present"><a name="copyright-past-to-present" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#copyright-past-to-present" name="copyright-past-to-present" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Copyright</h3>
                           <p class="p">© <span class="ph">2007</span>-<span class="ph">2019</span> NVIDIA
                              Corporation. All rights reserved.
                           </p>
                           <p class="p">This product includes software developed by the Syncro Soft SRL (http://www.sync.ro/).</p>
                        </div>
                     </div>
                  </div>
               </div>
               
               <hr id="contents-end"></hr>
               
            </article>
         </div>
      </div>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/formatting/common.min.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-write.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-tracker.js"></script>
      <script type="text/javascript">var switchTo5x=true;</script><script type="text/javascript" src="http://w.sharethis.com/button/buttons.js"></script><script type="text/javascript">stLight.options({publisher: "998dc202-a267-4d8e-bce9-14debadb8d92", doNotHash: false, doNotCopy: false, hashAddressBar: false});</script><script type="text/javascript">_satellite.pageBottom();</script></body>
</html>