Sophie

Sophie

distrib > Fedora > 20 > x86_64 > by-pkgid > d9f573299e87e886807be879704f0b6e > files > 140

julia-doc-0.3.4-1.fc20.noarch.rpm




<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Performance Tips &mdash; Julia Language 0.3.4 documentation</title>
  

  
  

  
  <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>

  
  
    

  

  
  
    <link rel="stylesheet" href="../_static/julia.css" type="text/css" />
  

  
    <link rel="stylesheet" href="../_static/julia.css" type="text/css" />
  
    <link rel="top" title="Julia Language 0.3.4 documentation" href="../index.html"/>
        <link rel="next" title="Style Guide" href="style-guide.html"/>
        <link rel="prev" title="Packages" href="packages.html"/> 

  
  <script src="https://cdnjs.cloudflare.com/ajax/libs/modernizr/2.6.2/modernizr.min.js"></script>

</head>

<body class="wy-body-for-nav" role="document">

  <div class="wy-grid-for-nav">

    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-nav-search">
        <a href="http://julialang.org/"><img src="../_static/julia-logo.svg" class="logo"></a>
        <!--
        <a href="../index.html" class="fa fa-home"> Julia Language</a>
        -->
        <div role="search">
  <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>
      </div>

      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
        
        
            <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="getting-started.html">Getting Started</a><ul>
<li class="toctree-l2"><a class="reference internal" href="getting-started.html#resources">Resources</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="variables.html">Variables</a><ul>
<li class="toctree-l2"><a class="reference internal" href="variables.html#allowed-variable-names">Allowed Variable Names</a></li>
<li class="toctree-l2"><a class="reference internal" href="variables.html#stylistic-conventions">Stylistic Conventions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="integers-and-floating-point-numbers.html">Integers and Floating-Point Numbers</a><ul>
<li class="toctree-l2"><a class="reference internal" href="integers-and-floating-point-numbers.html#integers">Integers</a></li>
<li class="toctree-l2"><a class="reference internal" href="integers-and-floating-point-numbers.html#floating-point-numbers">Floating-Point Numbers</a></li>
<li class="toctree-l2"><a class="reference internal" href="integers-and-floating-point-numbers.html#arbitrary-precision-arithmetic">Arbitrary Precision Arithmetic</a></li>
<li class="toctree-l2"><a class="reference internal" href="integers-and-floating-point-numbers.html#numeric-literal-coefficients">Numeric Literal Coefficients</a></li>
<li class="toctree-l2"><a class="reference internal" href="integers-and-floating-point-numbers.html#literal-zero-and-one">Literal zero and one</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="mathematical-operations.html">Mathematical Operations and Elementary Functions</a><ul>
<li class="toctree-l2"><a class="reference internal" href="mathematical-operations.html#arithmetic-operators">Arithmetic Operators</a></li>
<li class="toctree-l2"><a class="reference internal" href="mathematical-operations.html#bitwise-operators">Bitwise Operators</a></li>
<li class="toctree-l2"><a class="reference internal" href="mathematical-operations.html#updating-operators">Updating operators</a></li>
<li class="toctree-l2"><a class="reference internal" href="mathematical-operations.html#numeric-comparisons">Numeric Comparisons</a></li>
<li class="toctree-l2"><a class="reference internal" href="mathematical-operations.html#elementary-functions">Elementary Functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="complex-and-rational-numbers.html">Complex and Rational Numbers</a><ul>
<li class="toctree-l2"><a class="reference internal" href="complex-and-rational-numbers.html#complex-numbers">Complex Numbers</a></li>
<li class="toctree-l2"><a class="reference internal" href="complex-and-rational-numbers.html#rational-numbers">Rational Numbers</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="strings.html">Strings</a><ul>
<li class="toctree-l2"><a class="reference internal" href="strings.html#characters">Characters</a></li>
<li class="toctree-l2"><a class="reference internal" href="strings.html#string-basics">String Basics</a></li>
<li class="toctree-l2"><a class="reference internal" href="strings.html#unicode-and-utf-8">Unicode and UTF-8</a></li>
<li class="toctree-l2"><a class="reference internal" href="strings.html#interpolation">Interpolation</a></li>
<li class="toctree-l2"><a class="reference internal" href="strings.html#common-operations">Common Operations</a></li>
<li class="toctree-l2"><a class="reference internal" href="strings.html#non-standard-string-literals">Non-Standard String Literals</a></li>
<li class="toctree-l2"><a class="reference internal" href="strings.html#regular-expressions">Regular Expressions</a></li>
<li class="toctree-l2"><a class="reference internal" href="strings.html#id3">Byte Array Literals</a></li>
<li class="toctree-l2"><a class="reference internal" href="strings.html#version-number-literals">Version Number Literals</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="functions.html">Functions</a><ul>
<li class="toctree-l2"><a class="reference internal" href="functions.html#argument-passing-behavior">Argument Passing Behavior</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#the-return-keyword">The <tt class="docutils literal"><span class="pre">return</span></tt> Keyword</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#id1">Operators Are Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#operators-with-special-names">Operators With Special Names</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#anonymous-functions">Anonymous Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#multiple-return-values">Multiple Return Values</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#varargs-functions">Varargs Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#optional-arguments">Optional Arguments</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#keyword-arguments">Keyword Arguments</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#evaluation-scope-of-default-values">Evaluation Scope of Default Values</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#block-syntax-for-function-arguments">Block Syntax for Function Arguments</a></li>
<li class="toctree-l2"><a class="reference internal" href="functions.html#further-reading">Further Reading</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="control-flow.html">Control Flow</a><ul>
<li class="toctree-l2"><a class="reference internal" href="control-flow.html#compound-expressions">Compound Expressions</a></li>
<li class="toctree-l2"><a class="reference internal" href="control-flow.html#conditional-evaluation">Conditional Evaluation</a></li>
<li class="toctree-l2"><a class="reference internal" href="control-flow.html#short-circuit-evaluation">Short-Circuit Evaluation</a></li>
<li class="toctree-l2"><a class="reference internal" href="control-flow.html#repeated-evaluation-loops">Repeated Evaluation: Loops</a></li>
<li class="toctree-l2"><a class="reference internal" href="control-flow.html#exception-handling">Exception Handling</a></li>
<li class="toctree-l2"><a class="reference internal" href="control-flow.html#tasks-aka-coroutines">Tasks (aka Coroutines)</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="variables-and-scoping.html">Scope of Variables</a><ul>
<li class="toctree-l2"><a class="reference internal" href="variables-and-scoping.html#for-loops-and-comprehensions">For Loops and Comprehensions</a></li>
<li class="toctree-l2"><a class="reference internal" href="variables-and-scoping.html#constants">Constants</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="types.html">Types</a><ul>
<li class="toctree-l2"><a class="reference internal" href="types.html#type-declarations">Type Declarations</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#abstract-types">Abstract Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#bits-types">Bits Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#composite-types">Composite Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#immutable-composite-types">Immutable Composite Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#declared-types">Declared Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#tuple-types">Tuple Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#type-unions">Type Unions</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#man-parametric-types">Parametric Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#type-aliases">Type Aliases</a></li>
<li class="toctree-l2"><a class="reference internal" href="types.html#operations-on-types">Operations on Types</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="methods.html">Methods</a><ul>
<li class="toctree-l2"><a class="reference internal" href="methods.html#defining-methods">Defining Methods</a></li>
<li class="toctree-l2"><a class="reference internal" href="methods.html#method-ambiguities">Method Ambiguities</a></li>
<li class="toctree-l2"><a class="reference internal" href="methods.html#parametric-methods">Parametric Methods</a></li>
<li class="toctree-l2"><a class="reference internal" href="methods.html#note-on-optional-and-keyword-arguments">Note on Optional and keyword Arguments</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="constructors.html">Constructors</a><ul>
<li class="toctree-l2"><a class="reference internal" href="constructors.html#outer-constructor-methods">Outer Constructor Methods</a></li>
<li class="toctree-l2"><a class="reference internal" href="constructors.html#inner-constructor-methods">Inner Constructor Methods</a></li>
<li class="toctree-l2"><a class="reference internal" href="constructors.html#incomplete-initialization">Incomplete Initialization</a></li>
<li class="toctree-l2"><a class="reference internal" href="constructors.html#parametric-constructors">Parametric Constructors</a></li>
<li class="toctree-l2"><a class="reference internal" href="constructors.html#case-study-rational">Case Study: Rational</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="conversion-and-promotion.html">Conversion and Promotion</a><ul>
<li class="toctree-l2"><a class="reference internal" href="conversion-and-promotion.html#conversion">Conversion</a></li>
<li class="toctree-l2"><a class="reference internal" href="conversion-and-promotion.html#promotion">Promotion</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules.html">Modules</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules.html#summary-of-module-usage">Summary of module usage</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="metaprogramming.html">Metaprogramming</a><ul>
<li class="toctree-l2"><a class="reference internal" href="metaprogramming.html#expressions-and-eval">Expressions and Eval</a></li>
<li class="toctree-l2"><a class="reference internal" href="metaprogramming.html#macros">Macros</a></li>
<li class="toctree-l2"><a class="reference internal" href="metaprogramming.html#reflection">Reflection</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="arrays.html">Multi-dimensional Arrays</a><ul>
<li class="toctree-l2"><a class="reference internal" href="arrays.html#arrays">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="arrays.html#sparse-matrices">Sparse Matrices</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="linear-algebra.html">Linear algebra</a><ul>
<li class="toctree-l2"><a class="reference internal" href="linear-algebra.html#matrix-factorizations">Matrix factorizations</a></li>
<li class="toctree-l2"><a class="reference internal" href="linear-algebra.html#special-matrices">Special matrices</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="networking-and-streams.html">Networking and Streams</a><ul>
<li class="toctree-l2"><a class="reference internal" href="networking-and-streams.html#basic-stream-i-o">Basic Stream I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="networking-and-streams.html#text-i-o">Text I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="networking-and-streams.html#working-with-files">Working with Files</a></li>
<li class="toctree-l2"><a class="reference internal" href="networking-and-streams.html#a-simple-tcp-example">A simple TCP example</a></li>
<li class="toctree-l2"><a class="reference internal" href="networking-and-streams.html#resolving-ip-addresses">Resolving IP Addresses</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="parallel-computing.html">Parallel Computing</a><ul>
<li class="toctree-l2"><a class="reference internal" href="parallel-computing.html#data-movement">Data Movement</a></li>
<li class="toctree-l2"><a class="reference internal" href="parallel-computing.html#parallel-map-and-loops">Parallel Map and Loops</a></li>
<li class="toctree-l2"><a class="reference internal" href="parallel-computing.html#synchronization-with-remote-references">Synchronization With Remote References</a></li>
<li class="toctree-l2"><a class="reference internal" href="parallel-computing.html#scheduling">Scheduling</a></li>
<li class="toctree-l2"><a class="reference internal" href="parallel-computing.html#distributed-arrays">Distributed Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="parallel-computing.html#constructing-distributed-arrays">Constructing Distributed Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="parallel-computing.html#distributed-array-operations">Distributed Array Operations</a></li>
<li class="toctree-l2"><a class="reference internal" href="parallel-computing.html#shared-arrays-experimental">Shared Arrays (Experimental)</a></li>
<li class="toctree-l2"><a class="reference internal" href="parallel-computing.html#clustermanagers">ClusterManagers</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="running-external-programs.html">Running External Programs</a><ul>
<li class="toctree-l2"><a class="reference internal" href="running-external-programs.html#interpolation">Interpolation</a></li>
<li class="toctree-l2"><a class="reference internal" href="running-external-programs.html#quoting">Quoting</a></li>
<li class="toctree-l2"><a class="reference internal" href="running-external-programs.html#pipelines">Pipelines</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="calling-c-and-fortran-code.html">Calling C and Fortran Code</a><ul>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#mapping-c-types-to-julia">Mapping C Types to Julia</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#accessing-data-through-a-pointer">Accessing Data through a Pointer</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#passing-pointers-for-modifying-inputs">Passing Pointers for Modifying Inputs</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#garbage-collection-safety">Garbage Collection Safety</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#non-constant-function-specifications">Non-constant Function Specifications</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#indirect-calls">Indirect Calls</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#calling-convention">Calling Convention</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#accessing-global-variables">Accessing Global Variables</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#passing-julia-callback-functions-to-c">Passing Julia Callback Functions to C</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#c">C++</a></li>
<li class="toctree-l2"><a class="reference internal" href="calling-c-and-fortran-code.html#handling-platform-variations">Handling Platform Variations</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="interacting-with-julia.html">Interacting With Julia</a><ul>
<li class="toctree-l2"><a class="reference internal" href="interacting-with-julia.html#the-different-prompt-modes">The different prompt modes</a></li>
<li class="toctree-l2"><a class="reference internal" href="interacting-with-julia.html#key-bindings">Key bindings</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="embedding.html">Embedding Julia</a><ul>
<li class="toctree-l2"><a class="reference internal" href="embedding.html#high-level-embedding">High-Level Embedding</a></li>
<li class="toctree-l2"><a class="reference internal" href="embedding.html#converting-types">Converting Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="embedding.html#calling-julia-functions">Calling Julia Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="embedding.html#memory-management">Memory Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="embedding.html#working-with-arrays">Working with Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="embedding.html#exceptions">Exceptions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="packages.html">Packages</a><ul>
<li class="toctree-l2"><a class="reference internal" href="packages.html#package-status">Package Status</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#adding-and-removing-packages">Adding and Removing Packages</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#installing-unregistered-packages">Installing Unregistered Packages</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#updating-packages">Updating Packages</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#checkout-pin-and-free">Checkout, Pin and Free</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="packages.html#package-development">Package Development</a><ul>
<li class="toctree-l2"><a class="reference internal" href="packages.html#initial-setup">Initial Setup</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#generating-a-new-package">Generating a New Package</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#making-your-package-available">Making Your Package Available</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#publishing-your-package">Publishing Your Package</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#tagging-package-versions">Tagging Package Versions</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#fixing-package-requirements">Fixing Package Requirements</a></li>
<li class="toctree-l2"><a class="reference internal" href="packages.html#man-package-requirements">Requirements Specification</a></li>
</ul>
</li>
<li class="toctree-l1 current"><a class="current reference internal" href="">Performance Tips</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#avoid-global-variables">Avoid global variables</a></li>
<li class="toctree-l2"><a class="reference internal" href="#measure-performance-with-time-and-pay-attention-to-memory-allocation">Measure performance with <tt class="docutils literal"><span class="pre">&#64;time</span></tt> and pay attention to memory allocation</a></li>
<li class="toctree-l2"><a class="reference internal" href="#tools">Tools</a></li>
<li class="toctree-l2"><a class="reference internal" href="#avoid-containers-with-abstract-type-parameters">Avoid containers with abstract type parameters</a></li>
<li class="toctree-l2"><a class="reference internal" href="#type-declarations">Type declarations</a></li>
<li class="toctree-l2"><a class="reference internal" href="#break-functions-into-multiple-definitions">Break functions into multiple definitions</a></li>
<li class="toctree-l2"><a class="reference internal" href="#write-type-stable-functions">Write &#8220;type-stable&#8221; functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="#avoid-changing-the-type-of-a-variable">Avoid changing the type of a variable</a></li>
<li class="toctree-l2"><a class="reference internal" href="#separate-kernel-functions">Separate kernel functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="#access-arrays-in-memory-order-along-columns">Access arrays in memory order, along columns</a></li>
<li class="toctree-l2"><a class="reference internal" href="#pre-allocating-outputs">Pre-allocating outputs</a></li>
<li class="toctree-l2"><a class="reference internal" href="#avoid-string-interpolation-for-i-o">Avoid string interpolation for I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="#fix-deprecation-warnings">Fix deprecation warnings</a></li>
<li class="toctree-l2"><a class="reference internal" href="#tweaks">Tweaks</a></li>
<li class="toctree-l2"><a class="reference internal" href="#performance-annotations">Performance Annotations</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="style-guide.html">Style Guide</a><ul>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#write-functions-not-just-scripts">Write functions, not just scripts</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#avoid-writing-overly-specific-types">Avoid writing overly-specific types</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#handle-excess-argument-diversity-in-the-caller">Handle excess argument diversity in the caller</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#append-to-names-of-functions-that-modify-their-arguments">Append <cite>!</cite> to names of functions that modify their arguments</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#avoid-strange-type-unions">Avoid strange type Unions</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#try-to-avoid-nullable-fields">Try to avoid nullable fields</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#avoid-elaborate-container-types">Avoid elaborate container types</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#avoid-underscores-in-names">Avoid underscores in names</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#don-t-overuse-try-catch">Don&#8217;t overuse try-catch</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#don-t-parenthesize-conditions">Don&#8217;t parenthesize conditions</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#don-t-overuse">Don&#8217;t overuse ...</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#don-t-use-unnecessary-static-parameters">Don&#8217;t use unnecessary static parameters</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#avoid-confusion-about-whether-something-is-an-instance-or-a-type">Avoid confusion about whether something is an instance or a type</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#don-t-overuse-macros">Don&#8217;t overuse macros</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#don-t-expose-unsafe-operations-at-the-interface-level">Don&#8217;t expose unsafe operations at the interface level</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#don-t-overload-methods-of-base-container-types">Don&#8217;t overload methods of base container types</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#be-careful-with-type-equality">Be careful with type equality</a></li>
<li class="toctree-l2"><a class="reference internal" href="style-guide.html#do-not-write-x-f-x">Do not write <tt class="docutils literal"><span class="pre">x-&gt;f(x)</span></tt></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="faq.html">Frequently Asked Questions</a><ul>
<li class="toctree-l2"><a class="reference internal" href="faq.html#sessions-and-the-repl">Sessions and the REPL</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#functions">Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#types-type-declarations-and-constructors">Types, type declarations, and constructors</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#nothingness-and-missing-values">Nothingness and missing values</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#julia-releases">Julia Releases</a></li>
<li class="toctree-l2"><a class="reference internal" href="faq.html#developing-julia">Developing Julia</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="noteworthy-differences.html">Noteworthy Differences from other Languages</a><ul>
<li class="toctree-l2"><a class="reference internal" href="noteworthy-differences.html#noteworthy-differences-from-matlab">Noteworthy differences from MATLAB</a></li>
<li class="toctree-l2"><a class="reference internal" href="noteworthy-differences.html#noteworthy-differences-from-r">Noteworthy differences from R</a></li>
<li class="toctree-l2"><a class="reference internal" href="noteworthy-differences.html#noteworthy-differences-from-python">Noteworthy differences from Python</a></li>
</ul>
</li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/base.html">The Standard Library</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#introduction">Introduction</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#getting-around">Getting Around</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#all-objects">All Objects</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#types">Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#generic-functions">Generic Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#syntax">Syntax</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#iteration">Iteration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#general-collections">General Collections</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#iterable-collections">Iterable Collections</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#indexable-collections">Indexable Collections</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#associative-collections">Associative Collections</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#set-like-collections">Set-Like Collections</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#dequeues">Dequeues</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#strings">Strings</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#i-o">I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#network-i-o">Network I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#text-i-o">Text I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#multimedia-i-o">Multimedia I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#memory-mapped-i-o">Memory-mapped I/O</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#standard-numeric-types">Standard Numeric Types</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#mathematical-operators">Mathematical Operators</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#mathematical-functions">Mathematical Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#data-formats">Data Formats</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#numbers">Numbers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#bigfloats">BigFloats</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#random-numbers">Random Numbers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#arrays">Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#combinatorics">Combinatorics</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#statistics">Statistics</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#signal-processing">Signal Processing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#numerical-integration">Numerical Integration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#parallel-computing">Parallel Computing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#distributed-arrays">Distributed Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#shared-arrays-experimental-unix-only-feature">Shared Arrays (Experimental, UNIX-only feature)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#system">System</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#c-interface">C Interface</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#errors">Errors</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#tasks">Tasks</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#events">Events</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#reflection">Reflection</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/base.html#internals">Internals</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/sparse.html">Sparse Matrices</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/linalg.html">Linear Algebra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/linalg.html#module-Base.LinAlg.BLAS">BLAS Functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/constants.html">Constants</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/file.html">Filesystem</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/punctuation.html">Punctuation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/sort.html">Sorting and Related Functions</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/sort.html#sorting-functions">Sorting Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/sort.html#order-related-functions">Order-Related Functions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/sort.html#sorting-algorithms">Sorting Algorithms</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/pkg.html">Package Manager Functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/collections.html">Collections and Data Structures</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/collections.html#priorityqueue">PriorityQueue</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/collections.html#heap-functions">Heap Functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/graphics.html">Graphics</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/graphics.html#geometry">Geometry</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/test.html">Unit and Functional Testing</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/test.html#overview">Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/test.html#handlers">Handlers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/test.html#macros">Macros</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/test.html#functions">Functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/test.html#testing-base-julia">Testing Base Julia</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stdlib/profile.html">Profiling</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/profile.html#basic-usage">Basic usage</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/profile.html#accumulation-and-clearing">Accumulation and clearing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/profile.html#options-for-controlling-the-display-of-profile-results">Options for controlling the display of profile results</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/profile.html#configuration">Configuration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../stdlib/profile.html#function-reference">Function reference</a></li>
</ul>
</li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../devdocs/julia.html">Documentation of Julia&#8217;s Internals</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../devdocs/cartesian.html">Base.Cartesian</a></li>
<li class="toctree-l2"><a class="reference internal" href="../devdocs/sysimg.html">System Image Building</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../devdocs/C.html">Developing/debugging Julia&#8217;s C code</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../devdocs/backtraces.html">Reporting and analyzing crashes (segfaults)</a></li>
<li class="toctree-l2"><a class="reference internal" href="../devdocs/debuggingtips.html">gdb debugging tips</a></li>
</ul>
</li>
</ul>

        
      </div>
      &nbsp;
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
        <a href="../index.html">Julia Language</a>
      </nav>


      
      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
    <li><a href="../index.html">Docs</a> &raquo;</li>
      
    <li>Performance Tips</li>
      <li class="wy-breadcrumbs-aside">
        
          <a href="../_sources/manual/performance-tips.txt" rel="nofollow"> View page source</a>
        
      </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document">
            
  <div class="section" id="performance-tips">
<span id="man-performance-tips"></span><h1>Performance Tips<a class="headerlink" href="#performance-tips" title="Permalink to this headline">¶</a></h1>
<p>In the following sections, we briefly go through a few techniques that
can help make your Julia code run as fast as possible.</p>
<div class="section" id="avoid-global-variables">
<h2>Avoid global variables<a class="headerlink" href="#avoid-global-variables" title="Permalink to this headline">¶</a></h2>
<p>A global variable might have its value, and therefore its type, change
at any point. This makes it difficult for the compiler to optimize code
using global variables. Variables should be local, or passed as
arguments to functions, whenever possible.</p>
<p>Any code that is performance-critical or being benchmarked should be
inside a function.</p>
<p>We find that global names are frequently constants, and declaring them
as such greatly improves performance:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="kd">const</span> <span class="n">DEFAULT_VAL</span> <span class="o">=</span> <span class="mi">0</span>
</pre></div>
</div>
<p>Uses of non-constant globals can be optimized by annotating their types
at the point of use:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="kd">global</span> <span class="n">x</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="n">x</span><span class="p">::</span><span class="kt">Int</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
</pre></div>
</div>
<p>Writing functions is better style. It leads to more reusable code and
clarifies what steps are being done, and what their inputs and outputs
are.</p>
</div>
<div class="section" id="measure-performance-with-time-and-pay-attention-to-memory-allocation">
<h2>Measure performance with <tt class="docutils literal"><span class="pre">&#64;time</span></tt> and pay attention to memory allocation<a class="headerlink" href="#measure-performance-with-time-and-pay-attention-to-memory-allocation" title="Permalink to this headline">¶</a></h2>
<p>The most useful tool for measuring performance is the <tt class="docutils literal"><span class="pre">&#64;time</span></tt> macro.
The following example illustrates good working style:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">julia</span><span class="o">&gt;</span> <span class="k">function</span><span class="nf"> f</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
           <span class="n">s</span> <span class="o">=</span> <span class="mi">0</span>
           <span class="k">for</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">1</span><span class="p">:</span><span class="n">n</span>
               <span class="n">s</span> <span class="o">+=</span> <span class="n">i</span><span class="o">/</span><span class="mi">2</span>
           <span class="k">end</span>
           <span class="n">s</span>
       <span class="k">end</span>
<span class="n">f</span> <span class="p">(</span><span class="n">generic</span> <span class="k">function</span><span class="nf"> with</span> <span class="mi">1</span> <span class="n">method</span><span class="p">)</span>

<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">f</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="n">elapsed</span> <span class="n">time</span><span class="p">:</span> <span class="mf">0.008217942</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">93784</span> <span class="n">bytes</span> <span class="n">allocated</span><span class="p">)</span>
<span class="mf">0.5</span>

<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">f</span><span class="p">(</span><span class="mi">10</span><span class="o">^</span><span class="mi">6</span><span class="p">)</span>
<span class="n">elapsed</span> <span class="n">time</span><span class="p">:</span> <span class="mf">0.063418472</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">32002136</span> <span class="n">bytes</span> <span class="n">allocated</span><span class="p">)</span>
<span class="mf">2.5000025e11</span>
</pre></div>
</div>
<p>On the first call (<tt class="docutils literal"><span class="pre">&#64;time</span> <span class="pre">f(1)</span></tt>), <tt class="docutils literal"><span class="pre">f</span></tt> gets compiled.  (If you&#8217;ve
not yet used <tt class="docutils literal"><span class="pre">&#64;time</span></tt> in this session, it will also compile functions
needed for timing.)  You should not take the results of this run
seriously. For the second run, note that in addition to reporting the
time, it also indicated that a large amount of memory was allocated.
This is the single biggest advantage of <tt class="docutils literal"><span class="pre">&#64;time</span></tt> vs. functions like
<tt class="docutils literal"><span class="pre">tic</span></tt> and <tt class="docutils literal"><span class="pre">toc</span></tt>, which only report time.</p>
<p>Unexpected memory allocation is almost always a sign of some problem
with your code, usually a problem with type-stability. Consequently,
in addition to the allocation itself, it&#8217;s very likely that the code
generated for your function is far from optimal. Take such indications
seriously and follow the advice below.</p>
<p>As a teaser, note that an improved version of this function allocates
no memory (except to pass back the result back to the REPL) and has
thirty-fold faster execution:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">f_improved</span><span class="p">(</span><span class="mi">10</span><span class="o">^</span><span class="mi">6</span><span class="p">)</span>
<span class="n">elapsed</span> <span class="n">time</span><span class="p">:</span> <span class="mf">0.00253829</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">112</span> <span class="n">bytes</span> <span class="n">allocated</span><span class="p">)</span>
<span class="mf">2.5000025e11</span>
</pre></div>
</div>
<p>Below you&#8217;ll learn how to spot the problem with <tt class="docutils literal"><span class="pre">f</span></tt> and how to fix it.</p>
<p>In some situations, your function may need to allocate memory as part
of its operation, and this can complicate the simple picture above. In
such cases, consider using one of the <a class="reference internal" href="#man-performance-tools"><em>tools</em></a> below to diagnose problems, or write a
version of your function that separates allocation from its
algorithmic aspects (see <a class="reference internal" href="#man-preallocation"><em>Pre-allocating outputs</em></a>).</p>
</div>
<div class="section" id="tools">
<span id="man-performance-tools"></span><h2>Tools<a class="headerlink" href="#tools" title="Permalink to this headline">¶</a></h2>
<p>Julia and its package ecosystem includes tools that may help you
diagnose problems and improve the performance of your code:</p>
<ul class="simple">
<li><a class="reference internal" href="../stdlib/profile.html#stdlib-profiling"><em>Profiling</em></a> allows you to measure the performance of
your running code and identify lines that serve as bottlenecks.  For
complex projects, the <a class="reference external" href="https://github.com/timholy/ProfileView.jl">ProfileView</a> package can help you
visualize your profiling results.</li>
<li>Unexpectedly-large memory allocations&#8212;as reported by <tt class="docutils literal"><span class="pre">&#64;time</span></tt>,
<tt class="docutils literal"><span class="pre">&#64;allocated</span></tt>, or the profiler (through calls to the
garbage-collection routines)&#8212;hint that there might be issues with
your code.  If you don&#8217;t see another reason for the allocations,
suspect a type problem.  You can also start julia with the
<tt class="docutils literal"><span class="pre">--track-allocation=user</span></tt> option and examine the resulting
<tt class="docutils literal"><span class="pre">*.mem</span></tt> files to see information about where those allocations
occur.</li>
<li>The <a class="reference external" href="https://github.com/astrieanna/TypeCheck.jl">TypeCheck</a>
package can help identify certain kinds of type problems. A more
laborious but comprehensive tool is <tt class="docutils literal"><span class="pre">code_typed</span></tt>.  Look
particularly for variables that have type <tt class="docutils literal"><span class="pre">Any</span></tt> (in the header) or
statements declared as <tt class="docutils literal"><span class="pre">Union</span></tt> types.  Such problems can usually
be fixed using the tips below.</li>
<li>The <a class="reference external" href="https://github.com/tonyhffong/Lint.jl">Lint</a> package can also
warn you of certain types of programming errors.</li>
</ul>
</div>
<div class="section" id="avoid-containers-with-abstract-type-parameters">
<h2>Avoid containers with abstract type parameters<a class="headerlink" href="#avoid-containers-with-abstract-type-parameters" title="Permalink to this headline">¶</a></h2>
<p>When working with parameterized types, including arrays, it is best to
avoid parameterizing with abstract types where possible.</p>
<p>Consider the following:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">a</span> <span class="o">=</span> <span class="n">Real</span><span class="p">[]</span>    <span class="c"># typeof(a) = Array{Real,1}</span>
<span class="k">if</span> <span class="p">(</span><span class="n">f</span> <span class="o">=</span> <span class="n">rand</span><span class="p">())</span> <span class="o">&lt;</span> <span class="o">.</span><span class="mi">8</span>
    <span class="n">push</span><span class="o">!</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">f</span><span class="p">)</span>
<span class="k">end</span>
</pre></div>
</div>
<p>Because <tt class="docutils literal"><span class="pre">a</span></tt> is a an array of abstract type <tt class="docutils literal"><span class="pre">Real</span></tt>, it must be able
to hold any Real value.  Since <tt class="docutils literal"><span class="pre">Real</span></tt> objects can be of arbitrary
size and structure, <tt class="docutils literal"><span class="pre">a</span></tt> must be represented as an array of pointers to
individually allocated <tt class="docutils literal"><span class="pre">Real</span></tt> objects.  Because <tt class="docutils literal"><span class="pre">f</span></tt> will always be
a <tt class="docutils literal"><span class="pre">Float64</span></tt>, we should instead, use:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">a</span> <span class="o">=</span> <span class="kt">Float64</span><span class="p">[]</span> <span class="c"># typeof(a) = Array{Float64,1}</span>
</pre></div>
</div>
<p>which will create a contiguous block of 64-bit floating-point values
that can be manipulated efficiently.</p>
<p>See also the discussion under <a class="reference internal" href="types.html#man-parametric-types"><em>Parametric Types</em></a>.</p>
</div>
<div class="section" id="type-declarations">
<h2>Type declarations<a class="headerlink" href="#type-declarations" title="Permalink to this headline">¶</a></h2>
<p>In many languages with optional type declarations, adding declarations
is the principal way to make code run faster. This is <em>not</em> the case
in Julia. In Julia, the compiler generally knows the types of all function
arguments, local variables, and expressions.
However, there are a few specific instances where declarations are
helpful.</p>
<div class="section" id="declare-specific-types-for-fields-of-composite-types">
<h3>Declare specific types for fields of composite types<a class="headerlink" href="#declare-specific-types-for-fields-of-composite-types" title="Permalink to this headline">¶</a></h3>
<p>Given a user-defined type like the following:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">type</span><span class="nc"> Foo</span>
    <span class="n">field</span>
<span class="k">end</span>
</pre></div>
</div>
<p>the compiler will not generally know the type of <tt class="docutils literal"><span class="pre">foo.field</span></tt>, since it
might be modified at any time to refer to a value of a different type.
It will help to declare the most specific type possible, such as
<tt class="docutils literal"><span class="pre">field::Float64</span></tt> or <tt class="docutils literal"><span class="pre">field::Array{Int64,1}</span></tt>.</p>
</div>
<div class="section" id="annotate-values-taken-from-untyped-locations">
<h3>Annotate values taken from untyped locations<a class="headerlink" href="#annotate-values-taken-from-untyped-locations" title="Permalink to this headline">¶</a></h3>
<p>It is often convenient to work with data structures that may contain
values of any type, such as the original <tt class="docutils literal"><span class="pre">Foo</span></tt> type above, or cell
arrays (arrays of type <tt class="docutils literal"><span class="pre">Array{Any}</span></tt>). But, if you&#8217;re using one of
these structures and happen to know the type of an element, it helps to
share this knowledge with the compiler:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> foo</span><span class="p">(</span><span class="n">a</span><span class="p">::</span><span class="n">Array</span><span class="p">{</span><span class="kt">Any</span><span class="p">,</span><span class="mi">1</span><span class="p">})</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">a</span><span class="p">[</span><span class="mi">1</span><span class="p">]::</span><span class="kt">Int32</span>
    <span class="n">b</span> <span class="o">=</span> <span class="n">x</span><span class="o">+</span><span class="mi">1</span>
    <span class="o">...</span>
<span class="k">end</span>
</pre></div>
</div>
<p>Here, we happened to know that the first element of <tt class="docutils literal"><span class="pre">a</span></tt> would be an
<tt class="docutils literal"><span class="pre">Int32</span></tt>. Making an annotation like this has the added benefit that it
will raise a run-time error if the value is not of the expected type,
potentially catching certain bugs earlier.</p>
</div>
<div class="section" id="declare-types-of-keyword-arguments">
<h3>Declare types of keyword arguments<a class="headerlink" href="#declare-types-of-keyword-arguments" title="Permalink to this headline">¶</a></h3>
<p>Keyword arguments can have declared types:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> with_keyword</span><span class="p">(</span><span class="n">x</span><span class="p">;</span> <span class="n">name</span><span class="p">::</span><span class="kt">Int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span>
    <span class="o">...</span>
<span class="k">end</span>
</pre></div>
</div>
<p>Functions are specialized on the types of keyword arguments, so these
declarations will not affect performance of code inside the function.
However, they will reduce the overhead of calls to the function that
include keyword arguments.</p>
<p>Functions with keyword arguments have near-zero overhead for call sites
that pass only positional arguments.</p>
<p>Passing dynamic lists of keyword arguments, as in <tt class="docutils literal"><span class="pre">f(x;</span> <span class="pre">keywords...)</span></tt>,
can be slow and should be avoided in performance-sensitive code.</p>
</div>
</div>
<div class="section" id="break-functions-into-multiple-definitions">
<h2>Break functions into multiple definitions<a class="headerlink" href="#break-functions-into-multiple-definitions" title="Permalink to this headline">¶</a></h2>
<p>Writing a function as many small definitions allows the compiler to
directly call the most applicable code, or even inline it.</p>
<p>Here is an example of a &#8220;compound function&#8221; that should really be
written as multiple definitions:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> norm</span><span class="p">(</span><span class="n">A</span><span class="p">)</span>
    <span class="k">if</span> <span class="nb">isa</span><span class="p">(</span><span class="n">A</span><span class="p">,</span> <span class="n">Vector</span><span class="p">)</span>
        <span class="k">return</span> <span class="n">sqrt</span><span class="p">(</span><span class="n">real</span><span class="p">(</span><span class="n">dot</span><span class="p">(</span><span class="n">A</span><span class="p">,</span><span class="n">A</span><span class="p">)))</span>
    <span class="k">elseif</span> <span class="nb">isa</span><span class="p">(</span><span class="n">A</span><span class="p">,</span> <span class="n">Matrix</span><span class="p">)</span>
        <span class="k">return</span> <span class="n">max</span><span class="p">(</span><span class="n">svd</span><span class="p">(</span><span class="n">A</span><span class="p">)[</span><span class="mi">2</span><span class="p">])</span>
    <span class="k">else</span>
        <span class="nb">error</span><span class="p">(</span><span class="s">&quot;norm: invalid argument&quot;</span><span class="p">)</span>
    <span class="k">end</span>
<span class="k">end</span>
</pre></div>
</div>
<p>This can be written more concisely and efficiently as:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">norm</span><span class="p">(</span><span class="n">x</span><span class="p">::</span><span class="n">Vector</span><span class="p">)</span> <span class="o">=</span> <span class="n">sqrt</span><span class="p">(</span><span class="n">real</span><span class="p">(</span><span class="n">dot</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">x</span><span class="p">)))</span>
<span class="n">norm</span><span class="p">(</span><span class="n">A</span><span class="p">::</span><span class="n">Matrix</span><span class="p">)</span> <span class="o">=</span> <span class="n">max</span><span class="p">(</span><span class="n">svd</span><span class="p">(</span><span class="n">A</span><span class="p">)[</span><span class="mi">2</span><span class="p">])</span>
</pre></div>
</div>
</div>
<div class="section" id="write-type-stable-functions">
<h2>Write &#8220;type-stable&#8221; functions<a class="headerlink" href="#write-type-stable-functions" title="Permalink to this headline">¶</a></h2>
<p>When possible, it helps to ensure that a function always returns a value
of the same type. Consider the following definition:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">pos</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">=</span> <span class="n">x</span> <span class="o">&lt;</span> <span class="mi">0</span> <span class="o">?</span> <span class="mi">0</span> <span class="p">:</span> <span class="n">x</span>
</pre></div>
</div>
<p>Although this seems innocent enough, the problem is that <tt class="docutils literal"><span class="pre">0</span></tt> is an
integer (of type <tt class="docutils literal"><span class="pre">Int</span></tt>) and <tt class="docutils literal"><span class="pre">x</span></tt> might be of any type. Thus,
depending on the value of <tt class="docutils literal"><span class="pre">x</span></tt>, this function might return a value of
either of two types. This behavior is allowed, and may be desirable in
some cases. But it can easily be fixed as follows:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">pos</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">=</span> <span class="n">x</span> <span class="o">&lt;</span> <span class="mi">0</span> <span class="o">?</span> <span class="n">zero</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="p">:</span> <span class="n">x</span>
</pre></div>
</div>
<p>There is also a <tt class="docutils literal"><span class="pre">one</span></tt> function, and a more general <tt class="docutils literal"><span class="pre">oftype(x,y)</span></tt>
function, which returns <tt class="docutils literal"><span class="pre">y</span></tt> converted to the type of <tt class="docutils literal"><span class="pre">x</span></tt>. The first
argument to any of these functions can be either a value or a type.</p>
</div>
<div class="section" id="avoid-changing-the-type-of-a-variable">
<h2>Avoid changing the type of a variable<a class="headerlink" href="#avoid-changing-the-type-of-a-variable" title="Permalink to this headline">¶</a></h2>
<p>An analogous &#8220;type-stability&#8221; problem exists for variables used
repeatedly within a function:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> foo</span><span class="p">()</span>
    <span class="n">x</span> <span class="o">=</span> <span class="mi">1</span>
    <span class="k">for</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">1</span><span class="p">:</span><span class="mi">10</span>
        <span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="o">/</span><span class="n">bar</span><span class="p">()</span>
    <span class="k">end</span>
    <span class="k">return</span> <span class="n">x</span>
<span class="k">end</span>
</pre></div>
</div>
<p>Local variable <tt class="docutils literal"><span class="pre">x</span></tt> starts as an integer, and after one loop iteration
becomes a floating-point number (the result of the <tt class="docutils literal"><span class="pre">/</span></tt> operator). This
makes it more difficult for the compiler to optimize the body of the
loop. There are several possible fixes:</p>
<ul class="simple">
<li>Initialize <tt class="docutils literal"><span class="pre">x</span></tt> with <tt class="docutils literal"><span class="pre">x</span> <span class="pre">=</span> <span class="pre">1.0</span></tt></li>
<li>Declare the type of <tt class="docutils literal"><span class="pre">x</span></tt>: <tt class="docutils literal"><span class="pre">x::Float64</span> <span class="pre">=</span> <span class="pre">1</span></tt></li>
<li>Use an explicit conversion: <tt class="docutils literal"><span class="pre">x</span> <span class="pre">=</span> <span class="pre">one(T)</span></tt></li>
</ul>
</div>
<div class="section" id="separate-kernel-functions">
<h2>Separate kernel functions<a class="headerlink" href="#separate-kernel-functions" title="Permalink to this headline">¶</a></h2>
<p>Many functions follow a pattern of performing some set-up work, and then
running many iterations to perform a core computation. Where possible,
it is a good idea to put these core computations in separate functions.
For example, the following contrived function returns an array of a
randomly-chosen type:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> strange_twos</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
    <span class="n">a</span> <span class="o">=</span> <span class="n">Array</span><span class="p">(</span><span class="n">randbool</span><span class="p">()</span> <span class="o">?</span> <span class="kt">Int64</span> <span class="p">:</span> <span class="kt">Float64</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
    <span class="k">for</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">1</span><span class="p">:</span><span class="n">n</span>
        <span class="n">a</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="mi">2</span>
    <span class="k">end</span>
    <span class="k">return</span> <span class="n">a</span>
<span class="k">end</span>
</pre></div>
</div>
<p>This should be written as:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> fill_twos</span><span class="o">!</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
    <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
        <span class="n">a</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="mi">2</span>
    <span class="k">end</span>
<span class="k">end</span>

<span class="k">function</span><span class="nf"> strange_twos</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
    <span class="n">a</span> <span class="o">=</span> <span class="n">Array</span><span class="p">(</span><span class="n">randbool</span><span class="p">()</span> <span class="o">?</span> <span class="kt">Int64</span> <span class="p">:</span> <span class="kt">Float64</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
    <span class="n">fill_twos</span><span class="o">!</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
    <span class="k">return</span> <span class="n">a</span>
<span class="k">end</span>
</pre></div>
</div>
<p>Julia&#8217;s compiler specializes code for argument types at function
boundaries, so in the original implementation it does not know the type
of <tt class="docutils literal"><span class="pre">a</span></tt> during the loop (since it is chosen randomly). Therefore the
second version is generally faster since the inner loop can be
recompiled as part of <tt class="docutils literal"><span class="pre">fill_twos!</span></tt> for different types of <tt class="docutils literal"><span class="pre">a</span></tt>.</p>
<p>The second form is also often better style and can lead to more code
reuse.</p>
<p>This pattern is used in several places in the standard library. For
example, see <tt class="docutils literal"><span class="pre">hvcat_fill</span></tt> in
<a class="reference external" href="https://github.com/JuliaLang/julia/blob/master/base/abstractarray.jl">abstractarray.jl</a>,
or the <tt class="docutils literal"><span class="pre">fill!</span></tt> function, which we could have used instead of writing
our own <tt class="docutils literal"><span class="pre">fill_twos!</span></tt>.</p>
<p>Functions like <tt class="docutils literal"><span class="pre">strange_twos</span></tt> occur when dealing with data of
uncertain type, for example data loaded from an input file that might
contain either integers, floats, strings, or something else.</p>
</div>
<div class="section" id="access-arrays-in-memory-order-along-columns">
<h2>Access arrays in memory order, along columns<a class="headerlink" href="#access-arrays-in-memory-order-along-columns" title="Permalink to this headline">¶</a></h2>
<p>Multidimensional arrays in Julia are stored in column-major order. This
means that arrays are stacked one column at a time. This can be verified
using the <tt class="docutils literal"><span class="pre">vec</span></tt> function or the syntax <tt class="docutils literal"><span class="pre">[:]</span></tt> as shown below (notice
that the array is ordered <tt class="docutils literal"><span class="pre">[1</span> <span class="pre">3</span> <span class="pre">2</span> <span class="pre">4]</span></tt>, not <tt class="docutils literal"><span class="pre">[1</span> <span class="pre">2</span> <span class="pre">3</span> <span class="pre">4]</span></tt>):</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">julia</span><span class="o">&gt;</span> <span class="n">x</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span> <span class="mi">2</span><span class="p">;</span> <span class="mi">3</span> <span class="mi">4</span><span class="p">]</span>
<span class="mi">2</span><span class="n">x2</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Int64</span><span class="p">,</span><span class="mi">2</span><span class="p">}:</span>
 <span class="mi">1</span>  <span class="mi">2</span>
 <span class="mi">3</span>  <span class="mi">4</span>

<span class="n">julia</span><span class="o">&gt;</span> <span class="n">x</span><span class="p">[:]</span>
<span class="mi">4</span><span class="o">-</span><span class="n">element</span> <span class="n">Array</span><span class="p">{</span><span class="kt">Int64</span><span class="p">,</span><span class="mi">1</span><span class="p">}:</span>
 <span class="mi">1</span>
 <span class="mi">3</span>
 <span class="mi">2</span>
 <span class="mi">4</span>
</pre></div>
</div>
<p>This convention for ordering arrays is common in many languages like
Fortran, Matlab, and R (to name a few). The alternative to column-major
ordering is row-major ordering, which is the convention adopted by C and
Python (<tt class="docutils literal"><span class="pre">numpy</span></tt>) among other languages. Remembering the ordering of
arrays can have significant performance effects when looping over
arrays. A rule of thumb to keep in mind is that with column-major
arrays, the first index changes most rapidly. Essentially this means
that looping will be faster if the inner-most loop index is the first to
appear in a slice expression.</p>
<p>Consider the following contrived example. Imagine we wanted to write a
function that accepts a <tt class="docutils literal"><span class="pre">Vector</span></tt> and and returns a square <tt class="docutils literal"><span class="pre">Matrix</span></tt>
with either the rows or the columns filled with copies of the input
vector. Assume that it is not important whether rows or columns are
filled with these copies (perhaps the rest of the code can be easily
adapted accordingly). We could conceivably do this in at least four ways
(in addition to the recommended call to the built-in function
<tt class="docutils literal"><span class="pre">repmat</span></tt>):</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> copy_cols</span><span class="p">{</span><span class="n">T</span><span class="p">}(</span><span class="n">x</span><span class="p">::</span><span class="n">Vector</span><span class="p">{</span><span class="n">T</span><span class="p">})</span>
    <span class="n">n</span> <span class="o">=</span> <span class="n">size</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
    <span class="n">out</span> <span class="o">=</span> <span class="n">Array</span><span class="p">(</span><span class="n">eltype</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">n</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
    <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">n</span>
        <span class="n">out</span><span class="p">[:,</span> <span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">x</span>
    <span class="k">end</span>
    <span class="n">out</span>
<span class="k">end</span>

<span class="k">function</span><span class="nf"> copy_rows</span><span class="p">{</span><span class="n">T</span><span class="p">}(</span><span class="n">x</span><span class="p">::</span><span class="n">Vector</span><span class="p">{</span><span class="n">T</span><span class="p">})</span>
    <span class="n">n</span> <span class="o">=</span> <span class="n">size</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
    <span class="n">out</span> <span class="o">=</span> <span class="n">Array</span><span class="p">(</span><span class="n">eltype</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">n</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
    <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">n</span>
        <span class="n">out</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="p">:]</span> <span class="o">=</span> <span class="n">x</span>
    <span class="k">end</span>
    <span class="n">out</span>
<span class="k">end</span>

<span class="k">function</span><span class="nf"> copy_col_row</span><span class="p">{</span><span class="n">T</span><span class="p">}(</span><span class="n">x</span><span class="p">::</span><span class="n">Vector</span><span class="p">{</span><span class="n">T</span><span class="p">})</span>
    <span class="n">n</span> <span class="o">=</span> <span class="n">size</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
    <span class="n">out</span> <span class="o">=</span> <span class="n">Array</span><span class="p">(</span><span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
    <span class="k">for</span> <span class="n">col</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">n</span><span class="p">,</span> <span class="n">row</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">n</span>
        <span class="n">out</span><span class="p">[</span><span class="n">row</span><span class="p">,</span> <span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">x</span><span class="p">[</span><span class="n">row</span><span class="p">]</span>
    <span class="k">end</span>
    <span class="n">out</span>
<span class="k">end</span>

<span class="k">function</span><span class="nf"> copy_row_col</span><span class="p">{</span><span class="n">T</span><span class="p">}(</span><span class="n">x</span><span class="p">::</span><span class="n">Vector</span><span class="p">{</span><span class="n">T</span><span class="p">})</span>
    <span class="n">n</span> <span class="o">=</span> <span class="n">size</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
    <span class="n">out</span> <span class="o">=</span> <span class="n">Array</span><span class="p">(</span><span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
    <span class="k">for</span> <span class="n">row</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">n</span><span class="p">,</span> <span class="n">col</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">n</span>
        <span class="n">out</span><span class="p">[</span><span class="n">row</span><span class="p">,</span> <span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">x</span><span class="p">[</span><span class="n">col</span><span class="p">]</span>
    <span class="k">end</span>
    <span class="n">out</span>
<span class="k">end</span>
</pre></div>
</div>
<p>Now we will time each of these functions using the same random <tt class="docutils literal"><span class="pre">10000</span></tt>
by <tt class="docutils literal"><span class="pre">1</span></tt> input vector:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">julia</span><span class="o">&gt;</span> <span class="n">x</span> <span class="o">=</span> <span class="n">randn</span><span class="p">(</span><span class="mi">10000</span><span class="p">);</span>

<span class="n">julia</span><span class="o">&gt;</span> <span class="n">fmt</span><span class="p">(</span><span class="n">f</span><span class="p">)</span> <span class="o">=</span> <span class="n">println</span><span class="p">(</span><span class="n">rpad</span><span class="p">(</span><span class="n">string</span><span class="p">(</span><span class="n">f</span><span class="p">)</span><span class="o">*</span><span class="s">&quot;: &quot;</span><span class="p">,</span> <span class="mi">14</span><span class="p">,</span> <span class="sc">&#39; &#39;</span><span class="p">),</span> <span class="p">@</span><span class="n">elapsed</span> <span class="n">f</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>

<span class="n">julia</span><span class="o">&gt;</span> <span class="n">map</span><span class="p">(</span><span class="n">fmt</span><span class="p">,</span> <span class="p">{</span><span class="n">copy_cols</span><span class="p">,</span> <span class="n">copy_rows</span><span class="p">,</span> <span class="n">copy_col_row</span><span class="p">,</span> <span class="n">copy_row_col</span><span class="p">});</span>
<span class="n">copy_cols</span><span class="p">:</span>    <span class="mf">0.331706323</span>
<span class="n">copy_rows</span><span class="p">:</span>    <span class="mf">1.799009911</span>
<span class="n">copy_col_row</span><span class="p">:</span> <span class="mf">0.415630047</span>
<span class="n">copy_row_col</span><span class="p">:</span> <span class="mf">1.721531501</span>
</pre></div>
</div>
<p>Notice that <tt class="docutils literal"><span class="pre">copy_cols</span></tt> is much faster than <tt class="docutils literal"><span class="pre">copy_rows</span></tt>. This is
expected because <tt class="docutils literal"><span class="pre">copy_cols</span></tt> respects the column-based memory layout
of the <tt class="docutils literal"><span class="pre">Matrix</span></tt> and fills it one column at a time. Additionally,
<tt class="docutils literal"><span class="pre">copy_col_row</span></tt> is much faster than <tt class="docutils literal"><span class="pre">copy_row_col</span></tt> because it follows
our rule of thumb that the first element to appear in a slice expression
should be coupled with the inner-most loop.</p>
</div>
<div class="section" id="pre-allocating-outputs">
<span id="man-preallocation"></span><h2>Pre-allocating outputs<a class="headerlink" href="#pre-allocating-outputs" title="Permalink to this headline">¶</a></h2>
<p>If your function returns an Array or some other complex
type, it may have to allocate memory.  Unfortunately, oftentimes
allocation and its converse, garbage collection, are substantial
bottlenecks.</p>
<p>Sometimes you can circumvent the need to allocate memory on each
function call by pre-allocating the output.  As a
trivial example, compare</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> xinc</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
    <span class="k">return</span> <span class="p">[</span><span class="n">x</span><span class="p">,</span> <span class="n">x</span><span class="o">+</span><span class="mi">1</span><span class="p">,</span> <span class="n">x</span><span class="o">+</span><span class="mi">2</span><span class="p">]</span>
<span class="k">end</span>

<span class="k">function</span><span class="nf"> loopinc</span><span class="p">()</span>
    <span class="n">y</span> <span class="o">=</span> <span class="mi">0</span>
    <span class="k">for</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">1</span><span class="p">:</span><span class="mi">10</span><span class="o">^</span><span class="mi">7</span>
        <span class="n">ret</span> <span class="o">=</span> <span class="n">xinc</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
        <span class="n">y</span> <span class="o">+=</span> <span class="n">ret</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
    <span class="k">end</span>
    <span class="n">y</span>
<span class="k">end</span>
</pre></div>
</div>
<p>with</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> xinc</span><span class="o">!</span><span class="p">{</span><span class="n">T</span><span class="p">}(</span><span class="n">ret</span><span class="p">::</span><span class="n">AbstractVector</span><span class="p">{</span><span class="n">T</span><span class="p">},</span> <span class="n">x</span><span class="p">::</span><span class="n">T</span><span class="p">)</span>
    <span class="n">ret</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">x</span>
    <span class="n">ret</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">=</span> <span class="n">x</span><span class="o">+</span><span class="mi">1</span>
    <span class="n">ret</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">=</span> <span class="n">x</span><span class="o">+</span><span class="mi">2</span>
    <span class="n">nothing</span>
<span class="k">end</span>

<span class="k">function</span><span class="nf"> loopinc_prealloc</span><span class="p">()</span>
    <span class="n">ret</span> <span class="o">=</span> <span class="n">Array</span><span class="p">(</span><span class="kt">Int</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
    <span class="n">y</span> <span class="o">=</span> <span class="mi">0</span>
    <span class="k">for</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">1</span><span class="p">:</span><span class="mi">10</span><span class="o">^</span><span class="mi">7</span>
        <span class="n">xinc</span><span class="o">!</span><span class="p">(</span><span class="n">ret</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span>
        <span class="n">y</span> <span class="o">+=</span> <span class="n">ret</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
    <span class="k">end</span>
    <span class="n">y</span>
<span class="k">end</span>
</pre></div>
</div>
<p>Timing results:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">loopinc</span><span class="p">()</span>
<span class="n">elapsed</span> <span class="n">time</span><span class="p">:</span> <span class="mf">1.955026528</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">1279975584</span> <span class="n">bytes</span> <span class="n">allocated</span><span class="p">)</span>
<span class="mi">50000015000000</span>

<span class="n">julia</span><span class="o">&gt;</span> <span class="p">@</span><span class="n">time</span> <span class="n">loopinc_prealloc</span><span class="p">()</span>
<span class="n">elapsed</span> <span class="n">time</span><span class="p">:</span> <span class="mf">0.078639163</span> <span class="n">seconds</span> <span class="p">(</span><span class="mi">144</span> <span class="n">bytes</span> <span class="n">allocated</span><span class="p">)</span>
<span class="mi">50000015000000</span>
</pre></div>
</div>
<p>Pre-allocation has other advantages, for example by allowing the
caller to control the &#8220;output&#8221; type from an algorithm.  In the example
above, we could have passed a <tt class="docutils literal"><span class="pre">SubArray</span></tt> rather than an <tt class="docutils literal"><span class="pre">Array</span></tt>,
had we so desired.</p>
<p>Taken to its extreme, pre-allocation can make your code uglier, so
performance measurements and some judgment may be required.</p>
</div>
<div class="section" id="avoid-string-interpolation-for-i-o">
<h2>Avoid string interpolation for I/O<a class="headerlink" href="#avoid-string-interpolation-for-i-o" title="Permalink to this headline">¶</a></h2>
<p>When writing data to a file (or other I/O device), forming extra
intermediate strings is a source of overhead. Instead of:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">println</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="s">&quot;</span><span class="si">$</span><span class="s">a </span><span class="si">$</span><span class="s">b&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>use:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">println</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="n">a</span><span class="p">,</span> <span class="s">&quot; &quot;</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span>
</pre></div>
</div>
<p>The first version of the code forms a string, then writes it
to the file, while the second version writes values directly
to the file. Also notice that in some cases string interpolation can
be harder to read. Consider:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">println</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="s">&quot;</span><span class="si">$</span><span class="s">(f(a))</span><span class="si">$</span><span class="s">(f(b))&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>versus:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">println</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="n">f</span><span class="p">(</span><span class="n">a</span><span class="p">),</span> <span class="n">f</span><span class="p">(</span><span class="n">b</span><span class="p">))</span>
</pre></div>
</div>
</div>
<div class="section" id="fix-deprecation-warnings">
<h2>Fix deprecation warnings<a class="headerlink" href="#fix-deprecation-warnings" title="Permalink to this headline">¶</a></h2>
<p>A deprecated function internally performs a lookup in order to
print a relevant warning only once. This extra lookup can cause a
significant slowdown, so all uses of deprecated functions should be
modified as suggested by the warnings.</p>
</div>
<div class="section" id="tweaks">
<h2>Tweaks<a class="headerlink" href="#tweaks" title="Permalink to this headline">¶</a></h2>
<p>These are some minor points that might help in tight inner loops.</p>
<ul class="simple">
<li>Avoid unnecessary arrays. For example, instead of <tt class="docutils literal"><span class="pre">sum([x,y,z])</span></tt>
use <tt class="docutils literal"><span class="pre">x+y+z</span></tt>.</li>
<li>Use <tt class="docutils literal"><span class="pre">*</span></tt> instead of raising to small integer powers, for example
<tt class="docutils literal"><span class="pre">x*x*x</span></tt> instead of <tt class="docutils literal"><span class="pre">x^3</span></tt>.</li>
<li>Use <tt class="docutils literal"><span class="pre">abs2(z)</span></tt> instead of <tt class="docutils literal"><span class="pre">abs(z)^2</span></tt> for complex <tt class="docutils literal"><span class="pre">z</span></tt>. In general,
try to rewrite code to use <tt class="docutils literal"><span class="pre">abs2</span></tt> instead of <tt class="docutils literal"><span class="pre">abs</span></tt> for complex arguments.</li>
<li>Use <tt class="docutils literal"><span class="pre">div(x,y)</span></tt> for truncating division of integers instead of
<tt class="docutils literal"><span class="pre">trunc(x/y)</span></tt>, and <tt class="docutils literal"><span class="pre">fld(x,y)</span></tt> instead of <tt class="docutils literal"><span class="pre">floor(x/y)</span></tt>.</li>
</ul>
</div>
<div class="section" id="performance-annotations">
<h2>Performance Annotations<a class="headerlink" href="#performance-annotations" title="Permalink to this headline">¶</a></h2>
<p>Sometimes you can enable better optimization by promising certain program
properties.</p>
<ul class="simple">
<li>Use <tt class="docutils literal"><span class="pre">&#64;inbounds</span></tt> to eliminate array bounds checking within expressions.
Be certain before doing this. If the subscripts are ever out of bounds,
you may suffer crashes or silent corruption.</li>
<li>Write <tt class="docutils literal"><span class="pre">&#64;simd</span></tt> in front of <tt class="docutils literal"><span class="pre">for</span></tt> loops that are amenable to vectorization.
<strong>This feature is experimental</strong> and could change or disappear in future
versions of Julia.</li>
</ul>
<p>Here is an example with both forms of markup:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="k">function</span><span class="nf"> inner</span><span class="p">(</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span> <span class="p">)</span>
    <span class="n">s</span> <span class="o">=</span> <span class="n">zero</span><span class="p">(</span><span class="n">eltype</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
    <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
        <span class="p">@</span><span class="n">inbounds</span> <span class="n">s</span> <span class="o">+=</span> <span class="n">x</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">*</span><span class="n">y</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
    <span class="k">end</span>
    <span class="n">s</span>
<span class="k">end</span>

<span class="k">function</span><span class="nf"> innersimd</span><span class="p">(</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span> <span class="p">)</span>
    <span class="n">s</span> <span class="o">=</span> <span class="n">zero</span><span class="p">(</span><span class="n">eltype</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
    <span class="p">@</span><span class="n">simd</span> <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
        <span class="p">@</span><span class="n">inbounds</span> <span class="n">s</span> <span class="o">+=</span> <span class="n">x</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">*</span><span class="n">y</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
    <span class="k">end</span>
    <span class="n">s</span>
<span class="k">end</span>

<span class="k">function</span><span class="nf"> timeit</span><span class="p">(</span> <span class="n">n</span><span class="p">,</span> <span class="n">reps</span> <span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">rand</span><span class="p">(</span><span class="kt">Float32</span><span class="p">,</span><span class="n">n</span><span class="p">)</span>
    <span class="n">y</span> <span class="o">=</span> <span class="n">rand</span><span class="p">(</span><span class="kt">Float32</span><span class="p">,</span><span class="n">n</span><span class="p">)</span>
    <span class="n">s</span> <span class="o">=</span> <span class="n">zero</span><span class="p">(</span><span class="kt">Float64</span><span class="p">)</span>
    <span class="n">time</span> <span class="o">=</span> <span class="p">@</span><span class="n">elapsed</span> <span class="k">for</span> <span class="n">j</span> <span class="k">in</span> <span class="mi">1</span><span class="p">:</span><span class="n">reps</span>
        <span class="n">s</span><span class="o">+=</span><span class="n">inner</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">y</span><span class="p">)</span>
    <span class="k">end</span>
    <span class="n">println</span><span class="p">(</span><span class="s">&quot;GFlop        = &quot;</span><span class="p">,</span><span class="mf">2.0</span><span class="o">*</span><span class="n">n</span><span class="o">*</span><span class="n">reps</span><span class="o">/</span><span class="n">time</span><span class="o">*</span><span class="mf">1E-9</span><span class="p">)</span>
    <span class="n">time</span> <span class="o">=</span> <span class="p">@</span><span class="n">elapsed</span> <span class="k">for</span> <span class="n">j</span> <span class="k">in</span> <span class="mi">1</span><span class="p">:</span><span class="n">reps</span>
        <span class="n">s</span><span class="o">+=</span><span class="n">innersimd</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">y</span><span class="p">)</span>
    <span class="k">end</span>
    <span class="n">println</span><span class="p">(</span><span class="s">&quot;GFlop (SIMD) = &quot;</span><span class="p">,</span><span class="mf">2.0</span><span class="o">*</span><span class="n">n</span><span class="o">*</span><span class="n">reps</span><span class="o">/</span><span class="n">time</span><span class="o">*</span><span class="mf">1E-9</span><span class="p">)</span>
<span class="k">end</span>

<span class="n">timeit</span><span class="p">(</span><span class="mi">1000</span><span class="p">,</span><span class="mi">1000</span><span class="p">)</span>
</pre></div>
</div>
<p>On a computer with a 2.4GHz Intel Core i5 processor, this produces:</p>
<div class="highlight-julia"><div class="highlight"><pre><span class="n">GFlop</span>        <span class="o">=</span> <span class="mf">1.9467069505224963</span>
<span class="n">GFlop</span> <span class="p">(</span><span class="n">SIMD</span><span class="p">)</span> <span class="o">=</span> <span class="mf">17.578554163920018</span>
</pre></div>
</div>
<p>The range for a <tt class="docutils literal"><span class="pre">&#64;simd</span> <span class="pre">for</span></tt> loop should be a one-dimensional range.
A variable used for accumulating, such as <tt class="docutils literal"><span class="pre">s</span></tt> in the example, is called
a <em>reduction variable</em>. By using``&#64;simd``, you are asserting several
properties of the loop:</p>
<ul class="simple">
<li>It is safe to execute iterations in arbitrary or overlapping order,
with special consideration for reduction variables.</li>
<li>Floating-point operations on reduction variables can be reordered,
possibly causing different results than without <tt class="docutils literal"><span class="pre">&#64;simd</span></tt>.</li>
<li>No iteration ever waits on another iteration to make forward progress.</li>
</ul>
<p>A <tt class="docutils literal"><span class="pre">break</span></tt>, <tt class="docutils literal"><span class="pre">continue</span></tt>, or <tt class="docutils literal"><span class="pre">goto</span></tt> in an <tt class="docutils literal"><span class="pre">&#64;simd</span></tt> loop may cause
wrong results.</p>
<p>Using <tt class="docutils literal"><span class="pre">&#64;simd</span></tt> merely gives the compiler license to vectorize. Whether
it actually does so depends on the compiler. To actually benefit from the
current implementation, your loop should have the following additional
properties:</p>
<ul class="simple">
<li>The loop must be an innermost loop.</li>
<li>The loop body must be straight-line code. This is why <tt class="docutils literal"><span class="pre">&#64;inbounds</span></tt> is
currently needed for all array accesses. The compiler can sometimes turn
short <tt class="docutils literal"><span class="pre">&amp;&amp;</span></tt>, <tt class="docutils literal"><span class="pre">||</span></tt>, and <tt class="docutils literal"><span class="pre">?:</span></tt> expressions into straight-line code,
if it is safe to evaluate all operands unconditionally. Consider using
<tt class="docutils literal"><span class="pre">ifelse</span></tt> instead of <tt class="docutils literal"><span class="pre">?:</span></tt> in the loop if it is safe to do so.</li>
<li>Accesses must have a stride pattern and cannot be &#8220;gathers&#8221; (random-index reads)
or &#8220;scatters&#8221; (random-index writes).</li>
<li>The stride should be unit stride.</li>
<li>In some simple cases, for example with 2-3 arrays accessed in a loop, the
LLVM auto-vectorization may kick in automatically, leading to no further
speedup with <tt class="docutils literal"><span class="pre">&#64;simd</span></tt>.</li>
</ul>
</div>
</div>


          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="style-guide.html" class="btn btn-neutral float-right" title="Style Guide"/>Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="packages.html" class="btn btn-neutral" title="Packages"><span class="fa fa-arrow-circle-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <p>
    </p>
  </div>

  <a href="https://github.com/snide/sphinx_rtd_theme">Sphinx theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>
</footer>
        </div>
      </div>

    </section>

  </div>
  

  

    <script type="text/javascript">
        var DOCUMENTATION_OPTIONS = {
            URL_ROOT:'../',
            VERSION:'0.3.4',
            COLLAPSE_INDEX:false,
            FILE_SUFFIX:'.html',
            HAS_SOURCE:  true
        };
    </script>
      <script type="text/javascript" src="../_static/jquery.js"></script>
      <script type="text/javascript" src="../_static/underscore.js"></script>
      <script type="text/javascript" src="../_static/doctools.js"></script>
      <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>

  

  
  
    <script type="text/javascript" src="../_static/js/theme.js"></script>
  

  
  
  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.StickyNav.enable();
      });
  </script>
   

</body>
</html>