<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title>tables.index — PyTables 3.0.0 documentation</title> <link rel="stylesheet" href="../../_static/cloud.css" type="text/css" /> <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> <link rel="stylesheet" href="../../" type="text/css" /> <script type="text/javascript"> var DOCUMENTATION_OPTIONS = { URL_ROOT: '../../', VERSION: '3.0.0', COLLAPSE_INDEX: false, FILE_SUFFIX: '.html', HAS_SOURCE: true }; </script> <script type="text/javascript" src="../../_static/jquery.js"></script> <script type="text/javascript" src="../../_static/underscore.js"></script> <script type="text/javascript" src="../../_static/doctools.js"></script> <script type="text/javascript" src="../../_static/jquery.cookie.js"></script> <script type="text/javascript" src="../../_static/toggle_sections.js"></script> <script type="text/javascript" src="../../_static/toggle_sidebar.js"></script> <link rel="shortcut icon" href="../../_static/favicon.ico"/> <link rel="top" title="PyTables 3.0.0 documentation" href="../../index.html" /> <link rel="up" title="tables" href="../tables.html" /> </head> <body> <div class="relbar-top"> <div class="related"> <h3>Navigation</h3> <ul> <li class="right" style="margin-right: 10px"> <a href="../../genindex.html" title="General Index" accesskey="I">index</a></li> <li class="right" > <a href="../../py-modindex.html" title="Python Module Index" >modules</a> </li> <li class="right" > <a href="../../np-modindex.html" title="Python Module Index" >modules</a> </li> <li><a href="../../index.html">PyTables 3.0.0 documentation</a> »</li> <li><a href="../index.html" >Module code</a> »</li> <li><a href="../tables.html" accesskey="U">tables</a> »</li> </ul> </div> </div> <div class="document"> <div class="documentwrapper"> <div class="bodywrapper"> <div class="body"> <h1>Source code for tables.index</h1><div class="highlight"><pre> <span class="c"># -*- coding: utf-8 -*-</span> <span class="c">#######################################################################</span> <span class="c">#</span> <span class="c"># License: BSD</span> <span class="c"># Created: June 08, 2004</span> <span class="c"># Author: Francesc Alted - faltet@pytables.com</span> <span class="c">#</span> <span class="c"># $Id$</span> <span class="c">#</span> <span class="c">########################################################################</span> <span class="sd">"""Here is defined the Index class."""</span> <span class="kn">import</span> <span class="nn">sys</span> <span class="kn">from</span> <span class="nn">bisect</span> <span class="kn">import</span> <span class="n">bisect_left</span><span class="p">,</span> <span class="n">bisect_right</span> <span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span><span class="p">,</span> <span class="n">clock</span> <span class="kn">import</span> <span class="nn">os</span> <span class="kn">import</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="nn">tempfile</span> <span class="kn">import</span> <span class="nn">math</span> <span class="kn">import</span> <span class="nn">warnings</span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">from</span> <span class="nn">tables.idxutils</span> <span class="kn">import</span> <span class="p">(</span><span class="n">calc_chunksize</span><span class="p">,</span> <span class="n">calcoptlevels</span><span class="p">,</span> <span class="n">get_reduction_level</span><span class="p">,</span> <span class="n">nextafter</span><span class="p">,</span> <span class="n">inftype</span><span class="p">)</span> <span class="kn">from</span> <span class="nn">tables</span> <span class="kn">import</span> <span class="n">indexesextension</span> <span class="kn">from</span> <span class="nn">tables.node</span> <span class="kn">import</span> <span class="n">NotLoggedMixin</span> <span class="kn">from</span> <span class="nn">tables.atom</span> <span class="kn">import</span> <span class="n">UIntAtom</span><span class="p">,</span> <span class="n">Atom</span> <span class="kn">from</span> <span class="nn">tables.earray</span> <span class="kn">import</span> <span class="n">EArray</span> <span class="kn">from</span> <span class="nn">tables.carray</span> <span class="kn">import</span> <span class="n">CArray</span> <span class="kn">from</span> <span class="nn">tables.leaf</span> <span class="kn">import</span> <span class="n">Filters</span> <span class="kn">from</span> <span class="nn">tables.indexes</span> <span class="kn">import</span> <span class="n">CacheArray</span><span class="p">,</span> <span class="n">LastRowArray</span><span class="p">,</span> <span class="n">IndexArray</span> <span class="kn">from</span> <span class="nn">tables.group</span> <span class="kn">import</span> <span class="n">Group</span> <span class="kn">from</span> <span class="nn">tables.path</span> <span class="kn">import</span> <span class="n">join_path</span> <span class="kn">from</span> <span class="nn">tables.exceptions</span> <span class="kn">import</span> <span class="n">PerformanceWarning</span> <span class="kn">from</span> <span class="nn">tables.utils</span> <span class="kn">import</span> <span class="n">is_idx</span><span class="p">,</span> <span class="n">idx2long</span><span class="p">,</span> <span class="n">lazyattr</span> <span class="kn">from</span> <span class="nn">tables.lrucacheextension</span> <span class="kn">import</span> <span class="n">ObjectCache</span> <span class="kn">from</span> <span class="nn">tables._past</span> <span class="kn">import</span> <span class="n">previous_api</span><span class="p">,</span> <span class="n">previous_api_property</span> <span class="c"># default version for INDEX objects</span> <span class="c"># obversion = "1.0" # Version of indexes in PyTables 1.x series</span> <span class="c"># obversion = "2.0" # Version of indexes in PyTables Pro 2.0 series</span> <span class="n">obversion</span> <span class="o">=</span> <span class="s">"2.1"</span> <span class="c"># Version of indexes in PyTables Pro 2.1 and up series,</span> <span class="c"># including the join 2.3 Std + Pro version</span> <span class="n">debug</span> <span class="o">=</span> <span class="bp">False</span> <span class="c"># debug = True # Uncomment this for printing sizes purposes</span> <span class="n">profile</span> <span class="o">=</span> <span class="bp">False</span> <span class="c"># profile = True # Uncomment for profiling</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="kn">from</span> <span class="nn">tables.utils</span> <span class="kn">import</span> <span class="n">show_stats</span> <span class="c"># The default method for sorting</span> <span class="n">defsort</span> <span class="o">=</span> <span class="s">"quicksort"</span> <span class="c"># defsort = "mergesort"</span> <span class="c"># Default policy for automatically updating indexes after a table</span> <span class="c"># append operation, or automatically reindexing after an</span> <span class="c"># index-invalidating operation like removing or modifying table rows.</span> <span class="n">default_auto_index</span> <span class="o">=</span> <span class="bp">True</span> <span class="c"># Keep in sync with ``Table.autoindex`` docstring.</span> <span class="c"># Default filters used to compress indexes. This is quite fast and</span> <span class="c"># compression is pretty good.</span> <span class="c"># Remember to keep these defaults in sync with the docstrings and UG.</span> <span class="n">default_index_filters</span> <span class="o">=</span> <span class="n">Filters</span><span class="p">(</span><span class="n">complevel</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">complib</span><span class="o">=</span><span class="s">'zlib'</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">fletcher32</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span> <span class="c"># Deprecated API</span> <span class="n">defaultAutoIndex</span> <span class="o">=</span> <span class="n">default_auto_index</span> <span class="n">defaultIndexFilters</span> <span class="o">=</span> <span class="n">default_index_filters</span> <span class="c"># The list of types for which an optimised search in cython and C has</span> <span class="c"># been implemented. Always add here the name of a new optimised type.</span> <span class="n">opt_search_types</span> <span class="o">=</span> <span class="p">(</span><span class="s">"int8"</span><span class="p">,</span> <span class="s">"int16"</span><span class="p">,</span> <span class="s">"int32"</span><span class="p">,</span> <span class="s">"int64"</span><span class="p">,</span> <span class="s">"uint8"</span><span class="p">,</span> <span class="s">"uint16"</span><span class="p">,</span> <span class="s">"uint32"</span><span class="p">,</span> <span class="s">"uint64"</span><span class="p">,</span> <span class="s">"float32"</span><span class="p">,</span> <span class="s">"float64"</span><span class="p">)</span> <span class="c"># The upper limit for uint32 ints</span> <span class="n">max32</span> <span class="o">=</span> <span class="mi">2</span><span class="o">**</span><span class="mi">32</span> <span class="k">def</span> <span class="nf">_table_column_pathname_of_index</span><span class="p">(</span><span class="n">indexpathname</span><span class="p">):</span> <span class="n">names</span> <span class="o">=</span> <span class="n">indexpathname</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s">"/"</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">names</span><span class="p">):</span> <span class="k">if</span> <span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s">'_i_'</span><span class="p">):</span> <span class="k">break</span> <span class="n">tablepathname</span> <span class="o">=</span> <span class="s">"/"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">names</span><span class="p">[:</span><span class="n">i</span><span class="p">])</span> <span class="o">+</span> <span class="s">"/"</span> <span class="o">+</span> <span class="n">name</span><span class="p">[</span><span class="mi">3</span><span class="p">:]</span> <span class="n">colpathname</span> <span class="o">=</span> <span class="s">"/"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">names</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">:])</span> <span class="k">return</span> <span class="p">(</span><span class="n">tablepathname</span><span class="p">,</span> <span class="n">colpathname</span><span class="p">)</span> <span class="n">_tableColumnPathnameOfIndex</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">_table_column_pathname_of_index</span><span class="p">)</span> <div class="viewcode-block" id="Index"><a class="viewcode-back" href="../../usersguide/libref/helper_classes.html#tables.index.Index">[docs]</a><span class="k">class</span> <span class="nc">Index</span><span class="p">(</span><span class="n">NotLoggedMixin</span><span class="p">,</span> <span class="n">indexesextension</span><span class="o">.</span><span class="n">Index</span><span class="p">,</span> <span class="n">Group</span><span class="p">):</span> <span class="sd">"""Represents the index of a column in a table.</span> <span class="sd"> This class is used to keep the indexing information for columns in a Table</span> <span class="sd"> dataset (see :ref:`TableClassDescr`). It is actually a descendant of the</span> <span class="sd"> Group class (see :ref:`GroupClassDescr`), with some added functionality. An</span> <span class="sd"> Index is always associated with one and only one column in the table.</span> <span class="sd"> .. note::</span> <span class="sd"> This class is mainly intended for internal use, but some of its</span> <span class="sd"> documented attributes and methods may be interesting for the</span> <span class="sd"> programmer.</span> <span class="sd"> Parameters</span> <span class="sd"> ----------</span> <span class="sd"> parentnode</span> <span class="sd"> The parent :class:`Group` object.</span> <span class="sd"> .. versionchanged:: 3.0</span> <span class="sd"> Renamed from *parentNode* to *parentnode*.</span> <span class="sd"> name : str</span> <span class="sd"> The name of this node in its parent group.</span> <span class="sd"> atom : Atom</span> <span class="sd"> An Atom object representing the shape and type of the atomic objects to</span> <span class="sd"> be saved. Only scalar atoms are supported.</span> <span class="sd"> title</span> <span class="sd"> Sets a TITLE attribute of the Index entity.</span> <span class="sd"> kind</span> <span class="sd"> The desired kind for this index. The 'full' kind specifies a complete</span> <span class="sd"> track of the row position (64-bit), while the 'medium', 'light' or</span> <span class="sd"> 'ultralight' kinds only specify in which chunk the row is (using</span> <span class="sd"> 32-bit, 16-bit and 8-bit respectively).</span> <span class="sd"> optlevel</span> <span class="sd"> The desired optimization level for this index.</span> <span class="sd"> filters : Filters</span> <span class="sd"> An instance of the Filters class that provides information about the</span> <span class="sd"> desired I/O filters to be applied during the life of this object.</span> <span class="sd"> tmp_dir</span> <span class="sd"> The directory for the temporary files.</span> <span class="sd"> expectedrows</span> <span class="sd"> Represents an user estimate about the number of row slices that will be</span> <span class="sd"> added to the growable dimension in the IndexArray object.</span> <span class="sd"> byteorder</span> <span class="sd"> The byteorder of the index datasets *on-disk*.</span> <span class="sd"> blocksizes</span> <span class="sd"> The four main sizes of the compound blocks in index datasets (a low</span> <span class="sd"> level parameter).</span> <span class="sd"> """</span> <span class="n">_c_classid</span> <span class="o">=</span> <span class="s">'INDEX'</span> <span class="n">_c_classId</span> <span class="o">=</span> <span class="n">previous_api_property</span><span class="p">(</span><span class="s">'_c_classid'</span><span class="p">)</span> <span class="c"># <properties></span> <span class="n">kind</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="p">{</span><span class="mi">1</span><span class="p">:</span> <span class="s">'ultralight'</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s">'light'</span><span class="p">,</span> <span class="mi">4</span><span class="p">:</span> <span class="s">'medium'</span><span class="p">,</span> <span class="mi">8</span><span class="p">:</span> <span class="s">'full'</span><span class="p">}[</span><span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">],</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"The kind of this index."</span><span class="p">)</span> <span class="n">filters</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_filters</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="sd">"""Filter properties for this index - see Filters in</span> <span class="sd"> :ref:`FiltersClassDescr`."""</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_getdirty</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="k">if</span> <span class="s">'DIRTY'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="p">:</span> <span class="c"># If there is no ``DIRTY`` attribute, index should be clean.</span> <span class="k">return</span> <span class="bp">False</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">DIRTY</span> <span class="k">def</span> <span class="nf">_setdirty</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dirty</span><span class="p">):</span> <span class="n">wasdirty</span><span class="p">,</span> <span class="n">isdirty</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirty</span><span class="p">,</span> <span class="nb">bool</span><span class="p">(</span><span class="n">dirty</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">DIRTY</span> <span class="o">=</span> <span class="n">dirty</span> <span class="c"># If an *actual* change in dirtiness happens,</span> <span class="c"># notify the condition cache by setting or removing a nail.</span> <span class="n">conditioncache</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="o">.</span><span class="n">_condition_cache</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">wasdirty</span> <span class="ow">and</span> <span class="n">isdirty</span><span class="p">:</span> <span class="n">conditioncache</span><span class="o">.</span><span class="n">nail</span><span class="p">()</span> <span class="k">if</span> <span class="n">wasdirty</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">isdirty</span><span class="p">:</span> <span class="n">conditioncache</span><span class="o">.</span><span class="n">unnail</span><span class="p">()</span> <span class="n">dirty</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="n">_getdirty</span><span class="p">,</span> <span class="n">_setdirty</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="sd">"""Whether the index is dirty or not.</span> <span class="sd"> Dirty indexes are out of sync with column data, so they exist but they</span> <span class="sd"> are not usable.</span> <span class="sd"> """</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_getcolumn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="n">tablepath</span><span class="p">,</span> <span class="n">columnpath</span> <span class="o">=</span> <span class="n">_table_column_pathname_of_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_v_pathname</span><span class="p">)</span> <span class="n">table</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_file</span><span class="o">.</span><span class="n">_get_node</span><span class="p">(</span><span class="n">tablepath</span><span class="p">)</span> <span class="n">column</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">cols</span><span class="o">.</span><span class="n">_g_col</span><span class="p">(</span><span class="n">columnpath</span><span class="p">)</span> <span class="k">return</span> <span class="n">column</span> <span class="n">column</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">_getcolumn</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="sd">"""The Column (see :ref:`ColumnClassDescr`) instance for the indexed</span> <span class="sd"> column."""</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_gettable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="n">tablepath</span><span class="p">,</span> <span class="n">columnpath</span> <span class="o">=</span> <span class="n">_table_column_pathname_of_index</span><span class="p">(</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_pathname</span><span class="p">)</span> <span class="n">table</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_file</span><span class="o">.</span><span class="n">_get_node</span><span class="p">(</span><span class="n">tablepath</span><span class="p">)</span> <span class="k">return</span> <span class="n">table</span> <span class="n">table</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">_gettable</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"Accessor for the `Table` object of this index."</span><span class="p">)</span> <span class="n">nblockssuperblock</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">superblocksize</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"The number of blocks in a superblock."</span><span class="p">)</span> <span class="n">nslicesblock</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"The number of slices in a block."</span><span class="p">)</span> <span class="n">nchunkslice</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"The number of chunks in a slice."</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_g_nsuperblocks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="c"># Last row should not be considered as a superblock</span> <span class="n">nelements</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="n">nblocks</span> <span class="o">=</span> <span class="n">nelements</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">superblocksize</span> <span class="k">if</span> <span class="n">nelements</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="n">nblocks</span> <span class="o">+=</span> <span class="mi">1</span> <span class="k">return</span> <span class="n">nblocks</span> <span class="n">nsuperblocks</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">_g_nsuperblocks</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"The total number of superblocks in index."</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_g_nblocks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="c"># Last row should not be considered as a block</span> <span class="n">nelements</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="n">nblocks</span> <span class="o">=</span> <span class="n">nelements</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span> <span class="k">if</span> <span class="n">nelements</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="n">nblocks</span> <span class="o">+=</span> <span class="mi">1</span> <span class="k">return</span> <span class="n">nblocks</span> <span class="n">nblocks</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">_g_nblocks</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"The total number of blocks in index."</span><span class="p">)</span> <span class="n">nslices</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"The number of complete slices in index."</span><span class="p">)</span> <span class="n">nchunks</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"The number of complete chunks in index."</span><span class="p">)</span> <span class="n">shape</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nrows</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">),</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"The shape of this index (in slices and elements)."</span><span class="p">)</span> <span class="n">temp_required</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">></span> <span class="mi">1</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="o">.</span><span class="n">nrows</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">),</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"Whether a temporary file for indexes is required or not."</span><span class="p">)</span> <span class="n">want_complete_sort</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="k">lambda</span> <span class="bp">self</span><span class="p">:</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">8</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span> <span class="o">==</span> <span class="mi">9</span><span class="p">),</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"Whether we should try to build a completely sorted index or not."</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_is_csi</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="c"># An index with 0 indexed elements is not a CSI one (by definition)</span> <span class="k">return</span> <span class="bp">False</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o"><</span> <span class="mi">8</span><span class="p">:</span> <span class="c"># An index that is not full cannot be completely sorted</span> <span class="k">return</span> <span class="bp">False</span> <span class="c"># Try with the 'is_csi' attribute</span> <span class="k">if</span> <span class="s">'is_csi'</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="p">:</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">is_csi</span> <span class="c"># If not, then compute the overlaps manually</span> <span class="c"># (the attribute 'is_csi' will be set there)</span> <span class="bp">self</span><span class="o">.</span><span class="n">compute_overlaps</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">False</span><span class="p">)</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">noverlaps</span> <span class="o">==</span> <span class="mi">0</span> <span class="n">_is_CSI</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">_is_csi</span><span class="p">)</span> <span class="n">is_csi</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">_is_csi</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="sd">"""Whether the index is completely sorted or not.</span> <span class="sd"> .. versionchanged:: 3.0</span> <span class="sd"> The *is_CSI* property has been renamed into *is_csi*.</span> <span class="sd"> """</span><span class="p">)</span> <span class="n">is_CSI</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">is_csi</span><span class="p">)</span> <span class="nd">@lazyattr</span> <span class="k">def</span> <span class="nf">nrowsinchunk</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="sd">"""The number of rows that fits in a *table* chunk."""</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="o">.</span><span class="n">chunkshape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="nd">@lazyattr</span> <span class="k">def</span> <span class="nf">lbucket</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="sd">"""Return the length of a bucket based index type."""</span> <span class="c"># Avoid to set a too large lbucket size (mainly useful for tests)</span> <span class="n">lbucket</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nrowsinchunk</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> <span class="c"># For ultra-light, we will never have to keep track of a</span> <span class="c"># bucket outside of a slice.</span> <span class="n">maxnb</span> <span class="o">=</span> <span class="mi">2</span><span class="o">**</span><span class="mi">8</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="o">></span> <span class="n">maxnb</span> <span class="o">*</span> <span class="n">lbucket</span><span class="p">:</span> <span class="n">lbucket</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">)</span> <span class="o">/</span> <span class="n">maxnb</span><span class="p">))</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> <span class="c"># For light, we will never have to keep track of a</span> <span class="c"># bucket outside of a block.</span> <span class="n">maxnb</span> <span class="o">=</span> <span class="mi">2</span><span class="o">**</span><span class="mi">16</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span> <span class="o">></span> <span class="n">maxnb</span> <span class="o">*</span> <span class="n">lbucket</span><span class="p">:</span> <span class="n">lbucket</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span><span class="p">)</span> <span class="o">/</span> <span class="n">maxnb</span><span class="p">))</span> <span class="k">else</span><span class="p">:</span> <span class="c"># For medium and full indexes there should not be a need to</span> <span class="c"># increase lbucket</span> <span class="k">pass</span> <span class="k">return</span> <span class="n">lbucket</span> <span class="c"># </properties></span> <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">parentnode</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">atom</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="s">""</span><span class="p">,</span> <span class="n">kind</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">optlevel</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">filters</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">tmp_dir</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">expectedrows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">byteorder</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">blocksizes</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">new</span><span class="o">=</span><span class="bp">True</span><span class="p">):</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_version</span> <span class="o">=</span> <span class="bp">None</span> <span class="sd">"""The object version of this index."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span> <span class="o">=</span> <span class="n">optlevel</span> <span class="sd">"""The optimization level for this index."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp_dir</span> <span class="o">=</span> <span class="n">tmp_dir</span> <span class="sd">"""The directory for the temporary files."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">expectedrows</span> <span class="o">=</span> <span class="n">expectedrows</span> <span class="sd">"""The expected number of items of index arrays."""</span> <span class="k">if</span> <span class="n">byteorder</span> <span class="ow">in</span> <span class="p">[</span><span class="s">"little"</span><span class="p">,</span> <span class="s">"big"</span><span class="p">]:</span> <span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span> <span class="o">=</span> <span class="n">byteorder</span> <span class="k">else</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">byteorder</span> <span class="sd">"""The byteorder of the index datasets."""</span> <span class="k">if</span> <span class="n">atom</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span> <span class="o">=</span> <span class="n">atom</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">base</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">=</span> <span class="n">atom</span><span class="o">.</span><span class="n">type</span> <span class="sd">"""The datatypes to be stored by the sorted index array."""</span> <span class="c">############### Important note ###########################</span> <span class="c"># The datatypes saved as index values are NumPy native</span> <span class="c"># types, so we get rid of type metainfo like Time* or Enum*</span> <span class="c"># that belongs to HDF5 types (actually, this metainfo is</span> <span class="c"># not needed for sorting and looking-up purposes).</span> <span class="c">##########################################################</span> <span class="n">indsize</span> <span class="o">=</span> <span class="p">{</span> <span class="s">'ultralight'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s">'light'</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s">'medium'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s">'full'</span><span class="p">:</span> <span class="mi">8</span><span class="p">}[</span><span class="n">kind</span><span class="p">]</span> <span class="k">assert</span> <span class="n">indsize</span> <span class="ow">in</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">),</span> <span class="s">"indsize should be 1, 2, 4 or 8!"</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">=</span> <span class="n">indsize</span> <span class="sd">"""The itemsize for the indices part of the index."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">nrows</span> <span class="o">=</span> <span class="bp">None</span> <span class="sd">"""The total number of slices in the index."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="bp">None</span> <span class="sd">"""The number of currently indexed rows for this column."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksizes</span> <span class="o">=</span> <span class="n">blocksizes</span> <span class="sd">"""The four main sizes of the compound blocks (if specified)."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirtycache</span> <span class="o">=</span> <span class="bp">True</span> <span class="sd">"""Dirty cache (for ranges, bounds & sorted) flag."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">superblocksize</span> <span class="o">=</span> <span class="bp">None</span> <span class="sd">"""Size of the superblock for this index."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span> <span class="o">=</span> <span class="bp">None</span> <span class="sd">"""Size of the block for this index."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="o">=</span> <span class="bp">None</span> <span class="sd">"""Size of the slice for this index."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">=</span> <span class="bp">None</span> <span class="sd">"""Size of the chunk for this index."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmpfilename</span> <span class="o">=</span> <span class="bp">None</span> <span class="sd">"""Filename for temporary bounds."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">opt_search_types</span> <span class="o">=</span> <span class="n">opt_search_types</span> <span class="sd">"""The types for which and optimized search has been implemented."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">noverlaps</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="sd">"""The number of overlaps in an index. 0 means a completely</span> <span class="sd"> sorted index. -1 means that this number is not computed yet."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">tprof</span> <span class="o">=</span> <span class="mi">0</span> <span class="sd">"""Time counter for benchmarking purposes."""</span> <span class="kn">from</span> <span class="nn">tables.file</span> <span class="kn">import</span> <span class="n">open_file</span> <span class="bp">self</span><span class="o">.</span><span class="n">_openFile</span> <span class="o">=</span> <span class="n">open_file</span> <span class="sd">"""The `open_file()` function, to avoid a circular import."""</span> <span class="nb">super</span><span class="p">(</span><span class="n">Index</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="n">parentnode</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">title</span><span class="p">,</span> <span class="n">new</span><span class="p">,</span> <span class="n">filters</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_g_post_init_hook</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_new</span><span class="p">:</span> <span class="c"># The version for newly created indexes</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_version</span> <span class="o">=</span> <span class="n">obversion</span> <span class="nb">super</span><span class="p">(</span><span class="n">Index</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">_g_post_init_hook</span><span class="p">()</span> <span class="c"># Index arrays must only be created for new indexes</span> <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_new</span><span class="p">:</span> <span class="n">idxversion</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_version</span> <span class="c"># Set-up some variables from info on disk and return</span> <span class="n">attrs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span> <span class="c"># Coerce NumPy scalars to Python scalars in order</span> <span class="c"># to avoid undesired upcasting operations.</span> <span class="bp">self</span><span class="o">.</span><span class="n">superblocksize</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">attrs</span><span class="o">.</span><span class="n">superblocksize</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">attrs</span><span class="o">.</span><span class="n">blocksize</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">attrs</span><span class="o">.</span><span class="n">slicesize</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">attrs</span><span class="o">.</span><span class="n">chunksize</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksizes</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">superblocksize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">attrs</span><span class="o">.</span><span class="n">optlevel</span><span class="p">)</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span> <span class="n">indices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">indices</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">atom</span><span class="o">.</span><span class="n">dtype</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">atom</span><span class="o">.</span><span class="n">type</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">=</span> <span class="n">indices</span><span class="o">.</span><span class="n">atom</span><span class="o">.</span><span class="n">itemsize</span> <span class="c"># Some sanity checks for slicesize, chunksize and indsize</span> <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="o">==</span> <span class="n">indices</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s">"Wrong slicesize"</span> <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">==</span> <span class="n">indices</span><span class="o">.</span><span class="n">_v_chunkshape</span><span class="p">[</span> <span class="mi">1</span><span class="p">],</span> <span class="s">"Wrong chunksize"</span> <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="ow">in</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">),</span> <span class="s">"Wrong indices itemsize"</span> <span class="k">if</span> <span class="n">idxversion</span> <span class="o">></span> <span class="s">"2.0"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">attrs</span><span class="o">.</span><span class="n">reduction</span><span class="p">)</span> <span class="n">nelementsSLR</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span><span class="p">)</span> <span class="n">nelementsILR</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">indicesLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="o">=</span> <span class="mi">1</span> <span class="n">nelementsILR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">indicesLR</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="n">nelementsSLR</span> <span class="o">=</span> <span class="n">nelementsILR</span> <span class="bp">self</span><span class="o">.</span><span class="n">nrows</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">nrows</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nrows</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="o">+</span> <span class="n">nelementsILR</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="o">=</span> <span class="n">nelementsSLR</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="o">=</span> <span class="n">nelementsILR</span> <span class="k">if</span> <span class="n">nelementsILR</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">nrows</span> <span class="o">+=</span> <span class="mi">1</span> <span class="c"># Get the bounds as a cache (this has to remain here!)</span> <span class="n">rchunksize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="n">nboundsLR</span> <span class="o">=</span> <span class="p">(</span><span class="n">nelementsSLR</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">//</span> <span class="n">rchunksize</span> <span class="k">if</span> <span class="n">nboundsLR</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span> <span class="n">nboundsLR</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># correction for -1 bounds</span> <span class="n">nboundsLR</span> <span class="o">+=</span> <span class="mi">2</span> <span class="c"># bounds + begin + end</span> <span class="c"># All bounds values (+begin + end) are at the end of sortedLR</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span><span class="p">[</span><span class="n">nelementsSLR</span><span class="p">:</span><span class="n">nelementsSLR</span> <span class="o">+</span> <span class="n">nboundsLR</span><span class="p">]</span> <span class="k">return</span> <span class="c"># The index is new. Initialize the values</span> <span class="bp">self</span><span class="o">.</span><span class="n">nrows</span> <span class="o">=</span> <span class="mi">0</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="mi">0</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="o">=</span> <span class="mi">0</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># The atom</span> <span class="n">atom</span> <span class="o">=</span> <span class="n">Atom</span><span class="o">.</span><span class="n">from_dtype</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="c"># The filters</span> <span class="n">filters</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">filters</span> <span class="c"># Compute the superblocksize, blocksize, slicesize and chunksize values</span> <span class="c"># (in case these parameters haven't been passed to the constructor)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksizes</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksizes</span> <span class="o">=</span> <span class="n">calc_chunksize</span><span class="p">(</span> <span class="bp">self</span><span class="o">.</span><span class="n">expectedrows</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">)</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">superblocksize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksizes</span> <span class="k">if</span> <span class="n">debug</span><span class="p">:</span> <span class="k">print</span> <span class="s">"blocksizes:"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksizes</span> <span class="c"># Compute the reduction level</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="o">=</span> <span class="n">get_reduction_level</span><span class="p">(</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">)</span> <span class="n">rchunksize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="n">rslicesize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="c"># Save them on disk as attributes</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">superblocksize</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">superblocksize</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">blocksize</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">slicesize</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">uint32</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">uint32</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">)</span> <span class="c"># Save the optlevel as well</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">optlevel</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span> <span class="c"># Save the reduction level</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">reduction</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="c"># Create the IndexArray for sorted values</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="n">IndexArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'sorted'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="s">"Sorted Values"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">)</span> <span class="c"># Create the IndexArray for index values</span> <span class="n">IndexArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'indices'</span><span class="p">,</span> <span class="n">UIntAtom</span><span class="p">(</span><span class="n">itemsize</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">),</span> <span class="s">"Number of chunk in table"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">)</span> <span class="c"># Create the cache for range values (1st order cache)</span> <span class="n">CacheArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'ranges'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="s">"Range Values"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">expectedrows</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">,</span> <span class="n">byteorder</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">)</span> <span class="c"># median ranges</span> <span class="n">EArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'mranges'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,),</span> <span class="s">"Median ranges"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">byteorder</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">,</span> <span class="n">_log</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span> <span class="c"># Create the cache for boundary values (2nd order cache)</span> <span class="n">nbounds_inslice</span> <span class="o">=</span> <span class="p">(</span><span class="n">rslicesize</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">//</span> <span class="n">rchunksize</span> <span class="n">CacheArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'bounds'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">nbounds_inslice</span><span class="p">),</span> <span class="s">"Boundary Values"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">nchunks</span><span class="p">,</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">nbounds_inslice</span><span class="p">),</span> <span class="n">byteorder</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">)</span> <span class="c"># begin, end & median bounds (only for numerical types)</span> <span class="n">EArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'abounds'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,),</span> <span class="s">"Start bounds"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">byteorder</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">,</span> <span class="n">_log</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span> <span class="n">EArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'zbounds'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,),</span> <span class="s">"End bounds"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">byteorder</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">,</span> <span class="n">_log</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span> <span class="n">EArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'mbounds'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,),</span> <span class="s">"Median bounds"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">byteorder</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">,</span> <span class="n">_log</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span> <span class="c"># Create the Array for last (sorted) row values + bounds</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="n">rslicesize</span> <span class="o">+</span> <span class="mi">2</span> <span class="o">+</span> <span class="n">nbounds_inslice</span><span class="p">,)</span> <span class="n">sortedLR</span> <span class="o">=</span> <span class="n">LastRowArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'sortedLR'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Last Row sorted values + bounds"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="p">(</span><span class="n">rchunksize</span><span class="p">,),</span> <span class="n">byteorder</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">)</span> <span class="c"># Create the Array for the number of chunk in last row</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">,)</span> <span class="c"># enough for indexes and length</span> <span class="n">indicesLR</span> <span class="o">=</span> <span class="n">LastRowArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s">'indicesLR'</span><span class="p">,</span> <span class="n">UIntAtom</span><span class="p">(</span><span class="n">itemsize</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">),</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Last Row indices"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">,),</span> <span class="n">byteorder</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">)</span> <span class="c"># The number of elements in LR will be initialized here</span> <span class="n">sortedLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="mi">0</span> <span class="n">indicesLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># All bounds values (+begin + end) are uninitialized in creation time</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span> <span class="o">=</span> <span class="bp">None</span> <span class="c"># The starts and lengths initialization</span> <span class="bp">self</span><span class="o">.</span><span class="n">starts</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nrows</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span> <span class="sd">"""Where the values fulfiling conditions starts for every slice."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">lengths</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nrows</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span> <span class="sd">"""Lengths of the values fulfilling conditions for every slice."""</span> <span class="c"># Finally, create a temporary file for indexes if needed</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">temp_required</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">create_temp</span><span class="p">()</span> <span class="n">_g_postInitHook</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">_g_post_init_hook</span><span class="p">)</span> <span class="k">def</span> <span class="nf">initial_append</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">xarr</span><span class="p">,</span> <span class="n">nrow</span><span class="p">,</span> <span class="n">reduction</span><span class="p">):</span> <span class="sd">"""Compute an initial indices arrays for data to be indexed."""</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">tref</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Entering initial_append"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="n">arr</span> <span class="o">=</span> <span class="n">xarr</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span> <span class="n">indsize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="n">slicesize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">nelementsILR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Before creating idx"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">if</span> <span class="n">indsize</span> <span class="o">==</span> <span class="mi">8</span><span class="p">:</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">arr</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="s">"uint64"</span><span class="p">)</span> <span class="o">+</span> <span class="n">nrow</span> <span class="o">*</span> <span class="n">slicesize</span> <span class="k">elif</span> <span class="n">indsize</span> <span class="o">==</span> <span class="mi">4</span><span class="p">:</span> <span class="c"># For medium (32-bit) all the rows in tables should be</span> <span class="c"># directly reachable. But as len(arr) < 2**31, we can</span> <span class="c"># choose uint32 for representing indices. In this way, we</span> <span class="c"># consume far less memory during the keysort process. The</span> <span class="c"># offset will be added in self.final_idx32() later on.</span> <span class="c">#</span> <span class="c"># This optimization also prevents the values in LR to</span> <span class="c"># participate in the ``swap_chunks`` process, and this is</span> <span class="c"># the main reason to not allow the medium indexes to create</span> <span class="c"># completely sorted indexes. However, I don't find this to</span> <span class="c"># be a big limitation, as probably fully indexes are much</span> <span class="c"># more suitable for producing completely sorted indexes</span> <span class="c"># because in this case the indices part is usable for</span> <span class="c"># getting the reverse indices of the index, and I forsee</span> <span class="c"># this to be a common requirement in many operations (for</span> <span class="c"># example, in table sorts).</span> <span class="c">#</span> <span class="c"># F. Alted 2008-09-15</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">arr</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="s">"uint32"</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">arr</span><span class="p">),</span> <span class="s">"uint</span><span class="si">%d</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">indsize</span> <span class="o">*</span> <span class="mi">8</span><span class="p">))</span> <span class="n">lbucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lbucket</span> <span class="c"># Fill the idx with the bucket indices</span> <span class="n">offset</span> <span class="o">=</span> <span class="n">lbucket</span> <span class="o">-</span> <span class="p">((</span><span class="n">nrow</span> <span class="o">*</span> <span class="p">(</span><span class="n">slicesize</span> <span class="o">%</span> <span class="n">lbucket</span><span class="p">))</span> <span class="o">%</span> <span class="n">lbucket</span><span class="p">)</span> <span class="n">idx</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="n">offset</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">offset</span><span class="p">,</span> <span class="n">slicesize</span><span class="p">,</span> <span class="n">lbucket</span><span class="p">):</span> <span class="n">idx</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span> <span class="o">+</span> <span class="n">lbucket</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">i</span> <span class="o">+</span> <span class="n">lbucket</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">//</span> <span class="n">lbucket</span> <span class="k">if</span> <span class="n">indsize</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> <span class="c"># Add a second offset in this case</span> <span class="c"># First normalize the number of rows</span> <span class="n">offset2</span> <span class="o">=</span> <span class="p">(</span><span class="n">nrow</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslicesblock</span><span class="p">)</span> <span class="o">*</span> <span class="n">slicesize</span> <span class="o">//</span> <span class="n">lbucket</span> <span class="n">idx</span> <span class="o">+=</span> <span class="n">offset2</span> <span class="c"># Add the last row at the beginning of arr & idx (if needed)</span> <span class="k">if</span> <span class="p">(</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">8</span> <span class="ow">and</span> <span class="n">nelementsILR</span> <span class="o">></span> <span class="mi">0</span><span class="p">):</span> <span class="c"># It is possible that the values in LR are already sorted.</span> <span class="c"># Fetch them and override existing values in arr and idx.</span> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">arr</span><span class="p">)</span> <span class="o">></span> <span class="n">nelementsILR</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice_lr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span><span class="p">,</span> <span class="n">arr</span><span class="p">[:</span><span class="n">nelementsILR</span><span class="p">])</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice_lr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">indicesLR</span><span class="p">,</span> <span class="n">idx</span><span class="p">[:</span><span class="n">nelementsILR</span><span class="p">])</span> <span class="c"># In-place sorting</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Before keysort"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="n">indexesextension</span><span class="o">.</span><span class="n">keysort</span><span class="p">(</span><span class="n">arr</span><span class="p">,</span> <span class="n">idx</span><span class="p">)</span> <span class="n">larr</span> <span class="o">=</span> <span class="n">arr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="k">if</span> <span class="n">reduction</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> <span class="c"># It's important to do a copy() here in order to ensure that</span> <span class="c"># sorted._append() will receive a contiguous array.</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Before reduction"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="n">reduc</span> <span class="o">=</span> <span class="n">arr</span><span class="p">[::</span><span class="n">reduction</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"After reduction"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="n">arr</span> <span class="o">=</span> <span class="n">reduc</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"After arr <-- reduc"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="c"># A completely sorted index is not longer possible after an</span> <span class="c"># append of an index with already one slice.</span> <span class="k">if</span> <span class="n">nrow</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">is_csi</span> <span class="o">=</span> <span class="bp">False</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Exiting initial_append"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">return</span> <span class="n">larr</span><span class="p">,</span> <span class="n">arr</span><span class="p">,</span> <span class="n">idx</span> <span class="k">def</span> <span class="nf">final_idx32</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">idx</span><span class="p">,</span> <span class="n">offset</span><span class="p">):</span> <span class="sd">"""Perform final operations in 32-bit indices."""</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">tref</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Entering final_idx32"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="c"># Do an upcast first in order to add the offset.</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">idx</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s">'uint64'</span><span class="p">)</span> <span class="n">idx</span> <span class="o">+=</span> <span class="n">offset</span> <span class="c"># The next partition is valid up to table sizes of</span> <span class="c"># 2**30 * 2**18 = 2**48 bytes, that is, 256 Tera-elements,</span> <span class="c"># which should be a safe figure, at least for a while.</span> <span class="n">idx</span> <span class="o">//=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lbucket</span> <span class="c"># After the division, we can downsize the indexes to 'uint32'</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">idx</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s">'uint32'</span><span class="p">)</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Exiting final_idx32"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">return</span> <span class="n">idx</span> <span class="k">def</span> <span class="nf">append</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">xarr</span><span class="p">,</span> <span class="n">update</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span> <span class="sd">"""Append the array to the index objects"""</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">tref</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Entering append"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">update</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">temp_required</span><span class="p">:</span> <span class="n">where</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="c"># The reduction will take place *after* the optimization process</span> <span class="n">reduction</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">else</span><span class="p">:</span> <span class="n">where</span> <span class="o">=</span> <span class="bp">self</span> <span class="n">reduction</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">sorted</span> <span class="n">indices</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">indices</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">ranges</span> <span class="n">mranges</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">mranges</span> <span class="n">bounds</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">bounds</span> <span class="n">mbounds</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">mbounds</span> <span class="n">abounds</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">abounds</span> <span class="n">zbounds</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">zbounds</span> <span class="n">sortedLR</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">sortedLR</span> <span class="n">indicesLR</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">indicesLR</span> <span class="n">nrows</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">nrows</span> <span class="c"># before sorted.append()</span> <span class="n">larr</span><span class="p">,</span> <span class="n">arr</span><span class="p">,</span> <span class="n">idx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">initial_append</span><span class="p">(</span><span class="n">xarr</span><span class="p">,</span> <span class="n">nrows</span><span class="p">,</span> <span class="n">reduction</span><span class="p">)</span> <span class="c"># Save the sorted array</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">arr</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">size</span><span class="p">))</span> <span class="n">cs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">//</span> <span class="n">reduction</span> <span class="n">ncs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nchunkslice</span> <span class="c"># Save ranges & bounds</span> <span class="n">ranges</span><span class="o">.</span><span class="n">append</span><span class="p">([[</span><span class="n">arr</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">larr</span><span class="p">]])</span> <span class="n">bounds</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">arr</span><span class="p">[</span><span class="n">cs</span><span class="p">::</span><span class="n">cs</span><span class="p">]])</span> <span class="n">abounds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">arr</span><span class="p">[</span><span class="mi">0</span><span class="p">::</span><span class="n">cs</span><span class="p">])</span> <span class="n">zbounds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">arr</span><span class="p">[</span><span class="n">cs</span> <span class="o">-</span> <span class="mi">1</span><span class="p">::</span><span class="n">cs</span><span class="p">])</span> <span class="c"># Compute the medians</span> <span class="n">smedian</span> <span class="o">=</span> <span class="n">arr</span><span class="p">[</span><span class="n">cs</span> <span class="o">//</span> <span class="mi">2</span><span class="p">::</span><span class="n">cs</span><span class="p">]</span> <span class="n">mbounds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">smedian</span><span class="p">)</span> <span class="n">mranges</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">smedian</span><span class="p">[</span><span class="n">ncs</span> <span class="o">//</span> <span class="mi">2</span><span class="p">]])</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Before deleting arr & smedian"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">del</span> <span class="n">arr</span><span class="p">,</span> <span class="n">smedian</span> <span class="c"># delete references</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"After deleting arr & smedian"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="c"># Now that arr is gone, we can upcast the indices and add the offset</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">4</span><span class="p">:</span> <span class="n">idx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">final_idx32</span><span class="p">(</span><span class="n">idx</span><span class="p">,</span> <span class="n">nrows</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">)</span> <span class="n">indices</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">idx</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">idx</span><span class="o">.</span><span class="n">size</span><span class="p">))</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Before deleting idx"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">del</span> <span class="n">idx</span> <span class="c"># Update counters after a successful append</span> <span class="bp">self</span><span class="o">.</span><span class="n">nrows</span> <span class="o">=</span> <span class="n">nrows</span> <span class="o">+</span> <span class="mi">1</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nrows</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># reset the counter of the last row index to 0</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># reset the counter of the last row index to 0</span> <span class="c"># The number of elements will be saved as an attribute.</span> <span class="c"># This is necessary in case the LR arrays can remember its values</span> <span class="c"># after a possible node preemtion/reload.</span> <span class="n">sortedLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="n">indicesLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirtycache</span> <span class="o">=</span> <span class="bp">True</span> <span class="c"># the cache is dirty now</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Exiting append"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">def</span> <span class="nf">append_last_row</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">xarr</span><span class="p">,</span> <span class="n">update</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span> <span class="sd">"""Append the array to the last row index objects"""</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">tref</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Entering appendLR"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="c"># compute the elements in the last row sorted & bounds array</span> <span class="n">nrows</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">update</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">temp_required</span><span class="p">:</span> <span class="n">where</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="c"># The reduction will take place *after* the optimization process</span> <span class="n">reduction</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">else</span><span class="p">:</span> <span class="n">where</span> <span class="o">=</span> <span class="bp">self</span> <span class="n">reduction</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="n">indicesLR</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">indicesLR</span> <span class="n">sortedLR</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">sortedLR</span> <span class="n">larr</span><span class="p">,</span> <span class="n">arr</span><span class="p">,</span> <span class="n">idx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">initial_append</span><span class="p">(</span><span class="n">xarr</span><span class="p">,</span> <span class="n">nrows</span><span class="p">,</span> <span class="n">reduction</span><span class="p">)</span> <span class="n">nelementsSLR</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">arr</span><span class="p">)</span> <span class="n">nelementsILR</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">idx</span><span class="p">)</span> <span class="c"># Build the cache of bounds</span> <span class="n">rchunksize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">//</span> <span class="n">reduction</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">arr</span><span class="p">[::</span><span class="n">rchunksize</span><span class="p">],</span> <span class="p">[</span><span class="n">larr</span><span class="p">]))</span> <span class="c"># The number of elements will be saved as an attribute</span> <span class="n">sortedLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="n">nelementsSLR</span> <span class="n">indicesLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="n">nelementsILR</span> <span class="c"># Save the number of elements, bounds and sorted values</span> <span class="c"># at the end of the sorted array</span> <span class="n">offset2</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">)</span> <span class="n">sortedLR</span><span class="p">[</span><span class="n">nelementsSLR</span><span class="p">:</span><span class="n">nelementsSLR</span> <span class="o">+</span> <span class="n">offset2</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span> <span class="n">sortedLR</span><span class="p">[:</span><span class="n">nelementsSLR</span><span class="p">]</span> <span class="o">=</span> <span class="n">arr</span> <span class="k">del</span> <span class="n">arr</span> <span class="c"># Now that arr is gone, we can upcast the indices and add the offset</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">4</span><span class="p">:</span> <span class="n">idx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">final_idx32</span><span class="p">(</span><span class="n">idx</span><span class="p">,</span> <span class="n">nrows</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">)</span> <span class="c"># Save the reverse index array</span> <span class="n">indicesLR</span><span class="p">[:</span><span class="nb">len</span><span class="p">(</span><span class="n">idx</span><span class="p">)]</span> <span class="o">=</span> <span class="n">idx</span> <span class="k">del</span> <span class="n">idx</span> <span class="c"># Update counters after a successful append</span> <span class="bp">self</span><span class="o">.</span><span class="n">nrows</span> <span class="o">=</span> <span class="n">nrows</span> <span class="o">+</span> <span class="mi">1</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="n">nrows</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="o">+</span> <span class="n">nelementsILR</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="o">=</span> <span class="n">nelementsILR</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="o">=</span> <span class="n">nelementsSLR</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirtycache</span> <span class="o">=</span> <span class="bp">True</span> <span class="c"># the cache is dirty now</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Exiting appendLR"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="n">appendLastRow</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">append_last_row</span><span class="p">)</span> <span class="k">def</span> <span class="nf">optimize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span> <span class="sd">"""Optimize an index so as to allow faster searches.</span> <span class="sd"> verbose</span> <span class="sd"> If True, messages about the progress of the</span> <span class="sd"> optimization process are printed out.</span> <span class="sd"> """</span> <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">temp_required</span><span class="p">:</span> <span class="k">return</span> <span class="k">if</span> <span class="n">verbose</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="bp">True</span> <span class="k">else</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">debug</span> <span class="c"># Initialize last_tover and last_nover</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_tover</span> <span class="o">=</span> <span class="mi">0</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_nover</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># Compute the correct optimizations for current optim level</span> <span class="n">opts</span> <span class="o">=</span> <span class="n">calcoptlevels</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nblocks</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">)</span> <span class="n">optmedian</span><span class="p">,</span> <span class="n">optstarts</span><span class="p">,</span> <span class="n">optstops</span><span class="p">,</span> <span class="n">optfull</span> <span class="o">=</span> <span class="n">opts</span> <span class="k">if</span> <span class="n">debug</span><span class="p">:</span> <span class="k">print</span> <span class="s">"optvalues:"</span><span class="p">,</span> <span class="n">opts</span> <span class="bp">self</span><span class="o">.</span><span class="n">create_temp2</span><span class="p">()</span> <span class="c"># Start the optimization process</span> <span class="k">while</span> <span class="bp">True</span><span class="p">:</span> <span class="k">if</span> <span class="n">optfull</span><span class="p">:</span> <span class="k">for</span> <span class="n">niter</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">optfull</span><span class="p">):</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap</span><span class="p">(</span><span class="s">'chunks'</span><span class="p">,</span> <span class="s">'median'</span><span class="p">):</span> <span class="k">break</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">nblocks</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span> <span class="c"># Swap slices only in the case that we have</span> <span class="c"># several blocks</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap</span><span class="p">(</span><span class="s">'slices'</span><span class="p">,</span> <span class="s">'median'</span><span class="p">):</span> <span class="k">break</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap</span><span class="p">(</span><span class="s">'chunks'</span><span class="p">,</span> <span class="s">'median'</span><span class="p">):</span> <span class="k">break</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap</span><span class="p">(</span><span class="s">'chunks'</span><span class="p">,</span> <span class="s">'start'</span><span class="p">):</span> <span class="k">break</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap</span><span class="p">(</span><span class="s">'chunks'</span><span class="p">,</span> <span class="s">'stop'</span><span class="p">):</span> <span class="k">break</span> <span class="k">else</span><span class="p">:</span> <span class="k">if</span> <span class="n">optmedian</span><span class="p">:</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap</span><span class="p">(</span><span class="s">'chunks'</span><span class="p">,</span> <span class="s">'median'</span><span class="p">):</span> <span class="k">break</span> <span class="k">if</span> <span class="n">optstarts</span><span class="p">:</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap</span><span class="p">(</span><span class="s">'chunks'</span><span class="p">,</span> <span class="s">'start'</span><span class="p">):</span> <span class="k">break</span> <span class="k">if</span> <span class="n">optstops</span><span class="p">:</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap</span><span class="p">(</span><span class="s">'chunks'</span><span class="p">,</span> <span class="s">'stop'</span><span class="p">):</span> <span class="k">break</span> <span class="k">break</span> <span class="c"># If we reach this, exit the loop</span> <span class="c"># Check if we require a complete sort. Important: this step</span> <span class="c"># should be carried out *after* the optimization process has</span> <span class="c"># been completed (this is to guarantee that the complete sort</span> <span class="c"># does not take too much memory).</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">want_complete_sort</span><span class="p">:</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">noverlaps</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">do_complete_sort</span><span class="p">()</span> <span class="c"># Check that we have effectively achieved the complete sort</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">noverlaps</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> <span class="s">"OPSI was not able to achieve a completely sorted index."</span> <span class="s">" Please report this to the authors."</span><span class="p">,</span> <span class="ne">UserWarning</span><span class="p">)</span> <span class="c"># Close and delete the temporal optimization index file</span> <span class="bp">self</span><span class="o">.</span><span class="n">cleanup_temp</span><span class="p">()</span> <span class="k">return</span> <span class="k">def</span> <span class="nf">do_complete_sort</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="sd">"""Bring an already optimized index into a complete sorted state."""</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span> <span class="n">t1</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="n">c1</span> <span class="o">=</span> <span class="n">clock</span><span class="p">()</span> <span class="n">ss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">tmp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">ranges</span><span class="p">[:]</span> <span class="n">nslices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="n">nelementsLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="k">if</span> <span class="n">nelementsLR</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="c"># Add the ranges corresponding to the last row</span> <span class="n">rangeslr</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]])</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">ranges</span><span class="p">,</span> <span class="p">[</span><span class="n">rangeslr</span><span class="p">]))</span> <span class="n">nslices</span> <span class="o">+=</span> <span class="mi">1</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sorted</span> <span class="n">indices</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indices</span> <span class="n">sortedLR</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sortedLR</span> <span class="n">indicesLR</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indicesLR</span> <span class="n">sremain</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="n">iremain</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s">'u</span><span class="si">%d</span><span class="s">'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">)</span> <span class="n">starts</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">nslices</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">int_</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">nslices</span><span class="p">):</span> <span class="c"># Find the overlapping elements for slice i</span> <span class="n">sover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="n">iover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s">'u</span><span class="si">%d</span><span class="s">'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">)</span> <span class="n">prev_end</span> <span class="o">=</span> <span class="n">ranges</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">nslices</span><span class="p">):</span> <span class="n">stj</span> <span class="o">=</span> <span class="n">starts</span><span class="p">[</span><span class="n">j</span><span class="p">]</span> <span class="k">if</span> <span class="p">((</span><span class="n">j</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="ow">and</span> <span class="n">stj</span> <span class="o">==</span> <span class="n">ss</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="n">j</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="ow">and</span> <span class="n">stj</span> <span class="o">==</span> <span class="n">nelementsLR</span><span class="p">)):</span> <span class="c"># This slice has been already dealt with</span> <span class="k">continue</span> <span class="k">if</span> <span class="n">j</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="k">assert</span> <span class="n">stj</span> <span class="o"><</span> <span class="n">ss</span><span class="p">,</span> \ <span class="s">"Two slices cannot overlap completely at this stage!"</span> <span class="n">next_beg</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="n">stj</span><span class="p">]</span> <span class="k">else</span><span class="p">:</span> <span class="k">assert</span> <span class="n">stj</span> <span class="o"><</span> <span class="n">nelementsLR</span><span class="p">,</span> \ <span class="s">"Two slices cannot overlap completely at this stage!"</span> <span class="n">next_beg</span> <span class="o">=</span> <span class="n">sortedLR</span><span class="p">[</span><span class="n">stj</span><span class="p">]</span> <span class="n">next_end</span> <span class="o">=</span> <span class="n">ranges</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="k">if</span> <span class="n">prev_end</span> <span class="o">></span> <span class="n">next_end</span><span class="p">:</span> <span class="c"># Complete overlapping case</span> <span class="k">if</span> <span class="n">j</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="n">sover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">sover</span><span class="p">,</span> <span class="nb">sorted</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="n">stj</span><span class="p">:]))</span> <span class="n">iover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">iover</span><span class="p">,</span> <span class="n">indices</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="n">stj</span><span class="p">:]))</span> <span class="n">starts</span><span class="p">[</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">ss</span> <span class="k">else</span><span class="p">:</span> <span class="n">n</span> <span class="o">=</span> <span class="n">nelementsLR</span> <span class="n">sover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">sover</span><span class="p">,</span> <span class="n">sortedLR</span><span class="p">[</span><span class="n">stj</span><span class="p">:</span><span class="n">n</span><span class="p">]))</span> <span class="n">iover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">iover</span><span class="p">,</span> <span class="n">indicesLR</span><span class="p">[</span><span class="n">stj</span><span class="p">:</span><span class="n">n</span><span class="p">]))</span> <span class="n">starts</span><span class="p">[</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">nelementsLR</span> <span class="k">elif</span> <span class="n">prev_end</span> <span class="o">></span> <span class="n">next_beg</span><span class="p">:</span> <span class="n">idx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_item_lt</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="n">prev_end</span><span class="p">,</span> <span class="n">j</span><span class="p">,</span> <span class="n">ranges</span><span class="p">[</span><span class="n">j</span><span class="p">],</span> <span class="n">stj</span><span class="p">)</span> <span class="k">if</span> <span class="n">j</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="n">sover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">sover</span><span class="p">,</span> <span class="nb">sorted</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="n">stj</span><span class="p">:</span><span class="n">idx</span><span class="p">]))</span> <span class="n">iover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">iover</span><span class="p">,</span> <span class="n">indices</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="n">stj</span><span class="p">:</span><span class="n">idx</span><span class="p">]))</span> <span class="k">else</span><span class="p">:</span> <span class="n">sover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">sover</span><span class="p">,</span> <span class="n">sortedLR</span><span class="p">[</span><span class="n">stj</span><span class="p">:</span><span class="n">idx</span><span class="p">]))</span> <span class="n">iover</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">iover</span><span class="p">,</span> <span class="n">indicesLR</span><span class="p">[</span><span class="n">stj</span><span class="p">:</span><span class="n">idx</span><span class="p">]))</span> <span class="n">starts</span><span class="p">[</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">idx</span> <span class="c"># Build the extended slices to sort out</span> <span class="k">if</span> <span class="n">i</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="n">ssorted</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span> <span class="p">(</span><span class="n">sremain</span><span class="p">,</span> <span class="nb">sorted</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">starts</span><span class="p">[</span><span class="n">i</span><span class="p">]:],</span> <span class="n">sover</span><span class="p">))</span> <span class="n">sindices</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span> <span class="p">(</span><span class="n">iremain</span><span class="p">,</span> <span class="n">indices</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">starts</span><span class="p">[</span><span class="n">i</span><span class="p">]:],</span> <span class="n">iover</span><span class="p">))</span> <span class="k">else</span><span class="p">:</span> <span class="n">ssorted</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span> <span class="p">(</span><span class="n">sremain</span><span class="p">,</span> <span class="n">sortedLR</span><span class="p">[</span><span class="n">starts</span><span class="p">[</span><span class="n">i</span><span class="p">]:</span><span class="n">nelementsLR</span><span class="p">],</span> <span class="n">sover</span><span class="p">))</span> <span class="n">sindices</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span> <span class="p">(</span><span class="n">iremain</span><span class="p">,</span> <span class="n">indicesLR</span><span class="p">[</span><span class="n">starts</span><span class="p">[</span><span class="n">i</span><span class="p">]:</span><span class="n">nelementsLR</span><span class="p">],</span> <span class="n">iover</span><span class="p">))</span> <span class="c"># Sort the extended slices</span> <span class="n">indexesextension</span><span class="o">.</span><span class="n">keysort</span><span class="p">(</span><span class="n">ssorted</span><span class="p">,</span> <span class="n">sindices</span><span class="p">)</span> <span class="c"># Save the first elements of extended slices in the slice i</span> <span class="k">if</span> <span class="n">i</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="nb">sorted</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">]</span> <span class="n">indices</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">sindices</span><span class="p">[:</span><span class="n">ss</span><span class="p">]</span> <span class="c"># Update caches for this slice</span> <span class="bp">self</span><span class="o">.</span><span class="n">update_caches</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="c"># Save the remaining values in a separate array</span> <span class="n">send</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">sover</span><span class="p">)</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">sremain</span><span class="p">)</span> <span class="n">sremain</span> <span class="o">=</span> <span class="n">ssorted</span><span class="p">[</span><span class="n">ss</span><span class="p">:</span><span class="n">ss</span> <span class="o">+</span> <span class="n">send</span><span class="p">]</span> <span class="n">iremain</span> <span class="o">=</span> <span class="n">sindices</span><span class="p">[</span><span class="n">ss</span><span class="p">:</span><span class="n">ss</span> <span class="o">+</span> <span class="n">send</span><span class="p">]</span> <span class="k">else</span><span class="p">:</span> <span class="c"># Still some elements remain for the last row</span> <span class="n">n</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">ssorted</span><span class="p">)</span> <span class="k">assert</span> <span class="n">n</span> <span class="o">==</span> <span class="n">nelementsLR</span> <span class="n">send</span> <span class="o">=</span> <span class="mi">0</span> <span class="n">sortedLR</span><span class="p">[:</span><span class="n">n</span><span class="p">]</span> <span class="o">=</span> <span class="n">ssorted</span> <span class="n">indicesLR</span><span class="p">[:</span><span class="n">n</span><span class="p">]</span> <span class="o">=</span> <span class="n">sindices</span> <span class="c"># Update the caches for last row</span> <span class="n">sortedlr</span> <span class="o">=</span> <span class="n">sortedLR</span><span class="p">[:</span><span class="n">nelementsLR</span><span class="p">]</span> <span class="n">bebounds</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span> <span class="p">(</span><span class="n">sortedlr</span><span class="p">[::</span><span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">],</span> <span class="p">[</span><span class="n">sortedlr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]]))</span> <span class="n">sortedLR</span><span class="p">[</span><span class="n">nelementsLR</span><span class="p">:</span><span class="n">nelementsLR</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">bebounds</span><span class="p">)]</span> <span class="o">=</span> <span class="n">bebounds</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span> <span class="o">=</span> <span class="n">bebounds</span> <span class="c"># Verify that we have dealt with all the remaining values</span> <span class="k">assert</span> <span class="n">send</span> <span class="o">==</span> <span class="mi">0</span> <span class="c"># Compute the overlaps in order to verify that we have achieved</span> <span class="c"># a complete sort. This has to be executed always (and not only</span> <span class="c"># in verbose mode!).</span> <span class="bp">self</span><span class="o">.</span><span class="n">compute_overlaps</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmp</span><span class="p">,</span> <span class="s">"do_complete_sort()"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span> <span class="n">t</span> <span class="o">=</span> <span class="nb">round</span><span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t1</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> <span class="n">c</span> <span class="o">=</span> <span class="nb">round</span><span class="p">(</span><span class="n">clock</span><span class="p">()</span> <span class="o">-</span> <span class="n">c1</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> <span class="k">print</span> <span class="s">"time: </span><span class="si">%s</span><span class="s">. clock: </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span> <span class="k">def</span> <span class="nf">swap</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">what</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span> <span class="sd">"""Swap chunks or slices using a certain bounds reference."""</span> <span class="c"># Thresholds for avoiding continuing the optimization</span> <span class="c"># thnover = 4 * self.slicesize # minimum number of overlapping</span> <span class="c"># # elements</span> <span class="n">thnover</span> <span class="o">=</span> <span class="mi">40</span> <span class="n">thmult</span> <span class="o">=</span> <span class="mf">0.1</span> <span class="c"># minimum ratio of multiplicity (a 10%)</span> <span class="n">thtover</span> <span class="o">=</span> <span class="mf">0.01</span> <span class="c"># minimum overlaping index for slices (a 1%)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span> <span class="n">t1</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="n">c1</span> <span class="o">=</span> <span class="n">clock</span><span class="p">()</span> <span class="k">if</span> <span class="n">what</span> <span class="o">==</span> <span class="s">"chunks"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap_chunks</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span> <span class="k">elif</span> <span class="n">what</span> <span class="o">==</span> <span class="s">"slices"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">swap_slices</span><span class="p">(</span><span class="n">mode</span><span class="p">)</span> <span class="k">if</span> <span class="n">mode</span><span class="p">:</span> <span class="n">message</span> <span class="o">=</span> <span class="s">"swap_</span><span class="si">%s</span><span class="s">(</span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span> <span class="p">(</span><span class="n">what</span><span class="p">,</span> <span class="n">mode</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="n">message</span> <span class="o">=</span> <span class="s">"swap_</span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">what</span><span class="p">,)</span> <span class="p">(</span><span class="n">nover</span><span class="p">,</span> <span class="n">mult</span><span class="p">,</span> <span class="n">tover</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">compute_overlaps</span><span class="p">(</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span><span class="p">,</span> <span class="n">message</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">)</span> <span class="n">rmult</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">mult</span><span class="o">.</span><span class="n">nonzero</span><span class="p">()[</span><span class="mi">0</span><span class="p">])</span> <span class="o">/</span> <span class="nb">float</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">mult</span><span class="p">))</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span> <span class="n">t</span> <span class="o">=</span> <span class="nb">round</span><span class="p">(</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">t1</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> <span class="n">c</span> <span class="o">=</span> <span class="nb">round</span><span class="p">(</span><span class="n">clock</span><span class="p">()</span> <span class="o">-</span> <span class="n">c1</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> <span class="k">print</span> <span class="s">"time: </span><span class="si">%s</span><span class="s">. clock: </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span> <span class="c"># Check that entropy is actually decreasing</span> <span class="k">if</span> <span class="n">what</span> <span class="o">==</span> <span class="s">"chunks"</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_tover</span> <span class="o">></span> <span class="mf">0.</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_nover</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="n">tover_var</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">last_tover</span> <span class="o">-</span> <span class="n">tover</span><span class="p">)</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_tover</span> <span class="n">nover_var</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">last_nover</span> <span class="o">-</span> <span class="n">nover</span><span class="p">)</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_nover</span> <span class="k">if</span> <span class="n">tover_var</span> <span class="o"><</span> <span class="mf">0.05</span> <span class="ow">and</span> <span class="n">nover_var</span> <span class="o"><</span> <span class="mf">0.05</span><span class="p">:</span> <span class="c"># Less than a 5% of improvement is too few</span> <span class="k">return</span> <span class="bp">True</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_tover</span> <span class="o">=</span> <span class="n">tover</span> <span class="bp">self</span><span class="o">.</span><span class="n">last_nover</span> <span class="o">=</span> <span class="n">nover</span> <span class="c"># Check if some threshold has met</span> <span class="k">if</span> <span class="n">nover</span> <span class="o"><</span> <span class="n">thnover</span><span class="p">:</span> <span class="k">return</span> <span class="bp">True</span> <span class="k">if</span> <span class="n">rmult</span> <span class="o"><</span> <span class="n">thmult</span><span class="p">:</span> <span class="k">return</span> <span class="bp">True</span> <span class="c"># Additional check for the overlap ratio</span> <span class="k">if</span> <span class="n">tover</span> <span class="o">>=</span> <span class="mf">0.</span> <span class="ow">and</span> <span class="n">tover</span> <span class="o"><</span> <span class="n">thtover</span><span class="p">:</span> <span class="k">return</span> <span class="bp">True</span> <span class="k">return</span> <span class="bp">False</span> <span class="k">def</span> <span class="nf">create_temp</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="sd">"""Create some temporary objects for slice sorting purposes."""</span> <span class="c"># The index will be dirty during the index optimization process</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirty</span> <span class="o">=</span> <span class="bp">True</span> <span class="c"># Build the name of the temporary file</span> <span class="n">fd</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmpfilename</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkstemp</span><span class="p">(</span> <span class="s">".tmp"</span><span class="p">,</span> <span class="s">"pytables-"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp_dir</span><span class="p">)</span> <span class="c"># Close the file descriptor so as to avoid leaks</span> <span class="n">os</span><span class="o">.</span><span class="n">close</span><span class="p">(</span><span class="n">fd</span><span class="p">)</span> <span class="c"># Create the proper PyTables file</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmpfile</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_openFile</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpfilename</span><span class="p">,</span> <span class="s">"w"</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="o">=</span> <span class="n">tmp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmpfile</span><span class="o">.</span><span class="n">root</span> <span class="n">cs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="n">ss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">filters</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">filters</span> <span class="c"># temporary sorted & indices arrays</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ss</span><span class="p">)</span> <span class="n">atom</span> <span class="o">=</span> <span class="n">Atom</span><span class="o">.</span><span class="n">from_dtype</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="n">EArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'sorted'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temporary sorted"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">cs</span><span class="p">))</span> <span class="n">EArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'indices'</span><span class="p">,</span> <span class="n">UIntAtom</span><span class="p">(</span><span class="n">itemsize</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">),</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temporary indices"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">cs</span><span class="p">))</span> <span class="c"># temporary bounds</span> <span class="n">nbounds_inslice</span> <span class="o">=</span> <span class="p">(</span><span class="n">ss</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">//</span> <span class="n">cs</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">nbounds_inslice</span><span class="p">)</span> <span class="n">EArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'bounds'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temp chunk bounds"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,</span> <span class="n">nbounds_inslice</span><span class="p">))</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">0</span><span class="p">,)</span> <span class="n">EArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'abounds'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temp start bounds"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="n">EArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'zbounds'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temp end bounds"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="n">EArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'mbounds'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Median bounds"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="c"># temporary ranges</span> <span class="n">EArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'ranges'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="s">"Temporary range values"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,</span> <span class="mi">2</span><span class="p">))</span> <span class="n">EArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'mranges'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,),</span> <span class="s">"Median ranges"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="c"># temporary last row (sorted)</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="n">ss</span> <span class="o">+</span> <span class="mi">2</span> <span class="o">+</span> <span class="n">nbounds_inslice</span><span class="p">,)</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'sortedLR'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temp Last Row sorted values + bounds"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="c"># temporary last row (indices)</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="n">ss</span><span class="p">,)</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'indicesLR'</span><span class="p">,</span> <span class="n">UIntAtom</span><span class="p">(</span><span class="n">itemsize</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">),</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temp Last Row indices"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="k">def</span> <span class="nf">create_temp2</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="sd">"""Create some temporary objects for slice sorting purposes."""</span> <span class="c"># The algorithms for doing the swap can be optimized so that</span> <span class="c"># one should be necessary to create temporaries for keeping just</span> <span class="c"># the contents of a single superblock.</span> <span class="c"># F. Alted 2007-01-03</span> <span class="n">cs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="n">ss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">filters</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">filters</span> <span class="c"># temporary sorted & indices arrays</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">,</span> <span class="n">ss</span><span class="p">)</span> <span class="n">atom</span> <span class="o">=</span> <span class="n">Atom</span><span class="o">.</span><span class="n">from_dtype</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="n">tmp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'sorted2'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temporary sorted 2"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">cs</span><span class="p">))</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'indices2'</span><span class="p">,</span> <span class="n">UIntAtom</span><span class="p">(</span><span class="n">itemsize</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">),</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temporary indices 2"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">cs</span><span class="p">))</span> <span class="c"># temporary bounds</span> <span class="n">nbounds_inslice</span> <span class="o">=</span> <span class="p">(</span><span class="n">ss</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">//</span> <span class="n">cs</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">,</span> <span class="n">nbounds_inslice</span><span class="p">)</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'bounds2'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temp chunk bounds 2"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,</span> <span class="n">nbounds_inslice</span><span class="p">))</span> <span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nchunks</span><span class="p">,)</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'abounds2'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temp start bounds 2"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'zbounds2'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Temp end bounds 2"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'mbounds2'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="n">shape</span><span class="p">,</span> <span class="s">"Median bounds 2"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="c"># temporary ranges</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'ranges2'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="s">"Temporary range values 2"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,</span> <span class="mi">2</span><span class="p">))</span> <span class="n">CArray</span><span class="p">(</span><span class="n">tmp</span><span class="p">,</span> <span class="s">'mranges2'</span><span class="p">,</span> <span class="n">atom</span><span class="p">,</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">,),</span> <span class="s">"Median ranges 2"</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="n">chunkshape</span><span class="o">=</span><span class="p">(</span><span class="n">cs</span><span class="p">,))</span> <span class="k">def</span> <span class="nf">cleanup_temp</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="sd">"""Copy the data and delete the temporaries for sorting purposes."""</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span> <span class="k">print</span> <span class="s">"Copying temporary data..."</span> <span class="c"># tmp -> index</span> <span class="n">reduction</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="n">cs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">//</span> <span class="n">reduction</span> <span class="n">ncs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nchunkslice</span> <span class="n">tmp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">):</span> <span class="c"># Copy sorted & indices slices</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sorted</span><span class="p">[</span><span class="n">i</span><span class="p">][::</span><span class="n">reduction</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">sorted</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">size</span><span class="p">))</span> <span class="c"># Compute ranges</span> <span class="bp">self</span><span class="o">.</span><span class="n">ranges</span><span class="o">.</span><span class="n">append</span><span class="p">([[</span><span class="nb">sorted</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">sorted</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]]])</span> <span class="c"># Compute chunk bounds</span> <span class="bp">self</span><span class="o">.</span><span class="n">bounds</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="nb">sorted</span><span class="p">[</span><span class="n">cs</span><span class="p">::</span><span class="n">cs</span><span class="p">]])</span> <span class="c"># Compute start, stop & median bounds and ranges</span> <span class="bp">self</span><span class="o">.</span><span class="n">abounds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">sorted</span><span class="p">[</span><span class="mi">0</span><span class="p">::</span><span class="n">cs</span><span class="p">])</span> <span class="bp">self</span><span class="o">.</span><span class="n">zbounds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">sorted</span><span class="p">[</span><span class="n">cs</span> <span class="o">-</span> <span class="mi">1</span><span class="p">::</span><span class="n">cs</span><span class="p">])</span> <span class="n">smedian</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">[</span><span class="n">cs</span> <span class="o">//</span> <span class="mi">2</span><span class="p">::</span><span class="n">cs</span><span class="p">]</span> <span class="bp">self</span><span class="o">.</span><span class="n">mbounds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">smedian</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">mranges</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">smedian</span><span class="p">[</span><span class="n">ncs</span> <span class="o">//</span> <span class="mi">2</span><span class="p">]])</span> <span class="k">del</span> <span class="nb">sorted</span><span class="p">,</span> <span class="n">smedian</span> <span class="c"># delete references</span> <span class="c"># Now that sorted is gone, we can copy the indices</span> <span class="n">indices</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indices</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="bp">self</span><span class="o">.</span><span class="n">indices</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">indices</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">indices</span><span class="o">.</span><span class="n">size</span><span class="p">))</span> <span class="c"># Now it is the last row turn (if needed)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="c"># First, the sorted values</span> <span class="n">sortedLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span> <span class="n">indicesLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">indicesLR</span> <span class="n">nelementsLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="n">sortedlr</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sortedLR</span><span class="p">[:</span><span class="n">nelementsLR</span><span class="p">][::</span><span class="n">reduction</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> <span class="n">nelementsSLR</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">sortedlr</span><span class="p">)</span> <span class="n">sortedLR</span><span class="p">[:</span><span class="n">nelementsSLR</span><span class="p">]</span> <span class="o">=</span> <span class="n">sortedlr</span> <span class="c"># Now, the bounds</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">sortedlr</span><span class="p">[::</span><span class="n">cs</span><span class="p">],</span> <span class="p">[</span><span class="n">sortedlr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]]))</span> <span class="n">offset2</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">)</span> <span class="n">sortedLR</span><span class="p">[</span><span class="n">nelementsSLR</span><span class="p">:</span><span class="n">nelementsSLR</span> <span class="o">+</span> <span class="n">offset2</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span> <span class="c"># Finally, the indices</span> <span class="n">indicesLR</span><span class="p">[:]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indicesLR</span><span class="p">[:]</span> <span class="c"># Update the number of (reduced) sorted elements</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="o">=</span> <span class="n">nelementsSLR</span> <span class="c"># The number of elements will be saved as an attribute</span> <span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="bp">self</span><span class="o">.</span><span class="n">indicesLR</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">nelements</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span> <span class="k">print</span> <span class="s">"Deleting temporaries..."</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="o">=</span> <span class="bp">None</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmpfile</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> <span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpfilename</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmpfilename</span> <span class="o">=</span> <span class="bp">None</span> <span class="c"># The optimization process has finished, and the index is ok now</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirty</span> <span class="o">=</span> <span class="bp">False</span> <span class="c"># ...but the memory data cache is dirty now</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirtycache</span> <span class="o">=</span> <span class="bp">True</span> <span class="k">def</span> <span class="nf">get_neworder</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">neworder</span><span class="p">,</span> <span class="n">src_disk</span><span class="p">,</span> <span class="n">tmp_disk</span><span class="p">,</span> <span class="n">lastrow</span><span class="p">,</span> <span class="n">nslices</span><span class="p">,</span> <span class="n">offset</span><span class="p">,</span> <span class="n">dtype</span><span class="p">):</span> <span class="sd">"""Get sorted & indices values in new order."""</span> <span class="n">cs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="n">ncs</span> <span class="o">=</span> <span class="n">ncs2</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nchunkslice</span> <span class="n">self_nslices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="n">tmp</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">nslices</span><span class="p">):</span> <span class="n">ns</span> <span class="o">=</span> <span class="n">offset</span> <span class="o">+</span> <span class="n">i</span> <span class="k">if</span> <span class="n">ns</span> <span class="o">==</span> <span class="n">self_nslices</span><span class="p">:</span> <span class="c"># The number of complete chunks in the last row</span> <span class="n">ncs2</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="o">//</span> <span class="n">cs</span> <span class="c"># Get slices in new order</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">ncs2</span><span class="p">):</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">neworder</span><span class="p">[</span><span class="n">i</span> <span class="o">*</span> <span class="n">ncs</span> <span class="o">+</span> <span class="n">j</span><span class="p">]</span> <span class="n">ins</span> <span class="o">=</span> <span class="n">idx</span> <span class="o">//</span> <span class="n">ncs</span> <span class="n">inc</span> <span class="o">=</span> <span class="p">(</span><span class="n">idx</span> <span class="o">-</span> <span class="n">ins</span> <span class="o">*</span> <span class="n">ncs</span><span class="p">)</span> <span class="o">*</span> <span class="n">cs</span> <span class="n">ins</span> <span class="o">+=</span> <span class="n">offset</span> <span class="n">nc</span> <span class="o">=</span> <span class="n">j</span> <span class="o">*</span> <span class="n">cs</span> <span class="k">if</span> <span class="n">ins</span> <span class="o">==</span> <span class="n">self_nslices</span><span class="p">:</span> <span class="n">tmp</span><span class="p">[</span><span class="n">nc</span><span class="p">:</span><span class="n">nc</span> <span class="o">+</span> <span class="n">cs</span><span class="p">]</span> <span class="o">=</span> <span class="n">lastrow</span><span class="p">[</span><span class="n">inc</span><span class="p">:</span><span class="n">inc</span> <span class="o">+</span> <span class="n">cs</span><span class="p">]</span> <span class="k">else</span><span class="p">:</span> <span class="n">tmp</span><span class="p">[</span><span class="n">nc</span><span class="p">:</span><span class="n">nc</span> <span class="o">+</span> <span class="n">cs</span><span class="p">]</span> <span class="o">=</span> <span class="n">src_disk</span><span class="p">[</span><span class="n">ins</span><span class="p">,</span> <span class="n">inc</span><span class="p">:</span><span class="n">inc</span> <span class="o">+</span> <span class="n">cs</span><span class="p">]</span> <span class="k">if</span> <span class="n">ns</span> <span class="o">==</span> <span class="n">self_nslices</span><span class="p">:</span> <span class="c"># The number of complete chunks in the last row</span> <span class="n">lastrow</span><span class="p">[:</span><span class="n">ncs2</span> <span class="o">*</span> <span class="n">cs</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="p">[:</span><span class="n">ncs2</span> <span class="o">*</span> <span class="n">cs</span><span class="p">]</span> <span class="c"># The elements in the last chunk of the last row will</span> <span class="c"># participate in the global reordering later on, during</span> <span class="c"># the phase of sorting of *two* slices at a time</span> <span class="c"># (including the last row slice, see</span> <span class="c"># self.reorder_slices()). The caches for last row will</span> <span class="c"># be updated in self.reorder_slices() too.</span> <span class="c"># F. Altet 2008-08-25</span> <span class="k">else</span><span class="p">:</span> <span class="n">tmp_disk</span><span class="p">[</span><span class="n">ns</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span> <span class="k">def</span> <span class="nf">swap_chunks</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s">"median"</span><span class="p">):</span> <span class="sd">"""Swap & reorder the different chunks in a block."""</span> <span class="n">boundsnames</span> <span class="o">=</span> <span class="p">{</span> <span class="s">'start'</span><span class="p">:</span> <span class="s">'abounds'</span><span class="p">,</span> <span class="s">'stop'</span><span class="p">:</span> <span class="s">'zbounds'</span><span class="p">,</span> <span class="s">'median'</span><span class="p">:</span> <span class="s">'mbounds'</span><span class="p">}</span> <span class="n">tmp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sorted</span> <span class="n">indices</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indices</span> <span class="n">tmp_sorted</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sorted2</span> <span class="n">tmp_indices</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indices2</span> <span class="n">sortedLR</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sortedLR</span> <span class="n">indicesLR</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indicesLR</span> <span class="n">cs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="n">ncs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nchunkslice</span> <span class="n">nsb</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslicesblock</span> <span class="n">ncb</span> <span class="o">=</span> <span class="n">ncs</span> <span class="o">*</span> <span class="n">nsb</span> <span class="n">ncb2</span> <span class="o">=</span> <span class="n">ncb</span> <span class="n">boundsobj</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">_f_get_child</span><span class="p">(</span><span class="n">boundsnames</span><span class="p">[</span><span class="n">mode</span><span class="p">])</span> <span class="n">can_cross_bbounds</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">8</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span> <span class="k">for</span> <span class="n">nblock</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nblocks</span><span class="p">):</span> <span class="c"># Protection for last block having less chunks than ncb</span> <span class="n">remainingchunks</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nchunks</span> <span class="o">-</span> <span class="n">nblock</span> <span class="o">*</span> <span class="n">ncb</span> <span class="k">if</span> <span class="n">remainingchunks</span> <span class="o"><</span> <span class="n">ncb</span><span class="p">:</span> <span class="n">ncb2</span> <span class="o">=</span> <span class="n">remainingchunks</span> <span class="k">if</span> <span class="n">ncb2</span> <span class="o"><=</span> <span class="mi">1</span><span class="p">:</span> <span class="c"># if only zero or one chunks remains we are done</span> <span class="k">break</span> <span class="n">nslices</span> <span class="o">=</span> <span class="n">ncb2</span> <span class="o">//</span> <span class="n">ncs</span> <span class="n">bounds</span> <span class="o">=</span> <span class="n">boundsobj</span><span class="p">[</span><span class="n">nblock</span> <span class="o">*</span> <span class="n">ncb</span><span class="p">:</span><span class="n">nblock</span> <span class="o">*</span> <span class="n">ncb</span> <span class="o">+</span> <span class="n">ncb2</span><span class="p">]</span> <span class="c"># Do this only if lastrow elements can cross block boundaries</span> <span class="k">if</span> <span class="p">(</span><span class="n">nblock</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">nblocks</span> <span class="o">-</span> <span class="mi">1</span> <span class="ow">and</span> <span class="c"># last block</span> <span class="n">can_cross_bbounds</span><span class="p">):</span> <span class="n">nslices</span> <span class="o">+=</span> <span class="mi">1</span> <span class="n">ul</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="o">//</span> <span class="n">cs</span> <span class="n">bounds</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">bounds</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">[:</span><span class="n">ul</span><span class="p">]))</span> <span class="n">sbounds_idx</span> <span class="o">=</span> <span class="n">bounds</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">kind</span><span class="o">=</span><span class="n">defsort</span><span class="p">)</span> <span class="n">offset</span> <span class="o">=</span> <span class="n">nblock</span> <span class="o">*</span> <span class="n">nsb</span> <span class="c"># Swap sorted and indices following the new order</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_neworder</span><span class="p">(</span><span class="n">sbounds_idx</span><span class="p">,</span> <span class="nb">sorted</span><span class="p">,</span> <span class="n">tmp_sorted</span><span class="p">,</span> <span class="n">sortedLR</span><span class="p">,</span> <span class="n">nslices</span><span class="p">,</span> <span class="n">offset</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_neworder</span><span class="p">(</span><span class="n">sbounds_idx</span><span class="p">,</span> <span class="n">indices</span><span class="p">,</span> <span class="n">tmp_indices</span><span class="p">,</span> <span class="n">indicesLR</span><span class="p">,</span> <span class="n">nslices</span><span class="p">,</span> <span class="n">offset</span><span class="p">,</span> <span class="s">'u</span><span class="si">%d</span><span class="s">'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">)</span> <span class="c"># Reorder completely the index at slice level</span> <span class="bp">self</span><span class="o">.</span><span class="n">reorder_slices</span><span class="p">(</span><span class="n">tmp</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span> <span class="k">def</span> <span class="nf">read_slice</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="nb">buffer</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> <span class="sd">"""Read a slice from the `where` dataset and put it in `buffer`."""</span> <span class="c"># Create the buffers for specifying the coordinates</span> <span class="bp">self</span><span class="o">.</span><span class="n">startl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">nslice</span><span class="p">,</span> <span class="n">start</span><span class="p">],</span> <span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">stopl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">nslice</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">start</span> <span class="o">+</span> <span class="nb">buffer</span><span class="o">.</span><span class="n">size</span><span class="p">],</span> <span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">stepl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="n">where</span><span class="o">.</span><span class="n">_g_read_slice</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">startl</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">stopl</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">stepl</span><span class="p">,</span> <span class="nb">buffer</span><span class="p">)</span> <span class="k">def</span> <span class="nf">write_slice</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="nb">buffer</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> <span class="sd">"""Write a `slice` to the `where` dataset with the `buffer` data."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">startl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">nslice</span><span class="p">,</span> <span class="n">start</span><span class="p">],</span> <span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">stopl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">nslice</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">start</span> <span class="o">+</span> <span class="nb">buffer</span><span class="o">.</span><span class="n">size</span><span class="p">],</span> <span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">stepl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="n">countl</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">stopl</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">startl</span> <span class="c"># (1, self.slicesize)</span> <span class="n">where</span><span class="o">.</span><span class="n">_g_write_slice</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">startl</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">stepl</span><span class="p">,</span> <span class="n">countl</span><span class="p">,</span> <span class="nb">buffer</span><span class="p">)</span> <span class="c"># Read version for LastRow</span> <span class="k">def</span> <span class="nf">read_slice_lr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="nb">buffer</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> <span class="sd">"""Read a slice from the `where` dataset and put it in `buffer`."""</span> <span class="n">startl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">start</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="n">stopl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">start</span> <span class="o">+</span> <span class="nb">buffer</span><span class="o">.</span><span class="n">size</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="n">stepl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="n">where</span><span class="o">.</span><span class="n">_g_read_slice</span><span class="p">(</span><span class="n">startl</span><span class="p">,</span> <span class="n">stopl</span><span class="p">,</span> <span class="n">stepl</span><span class="p">,</span> <span class="nb">buffer</span><span class="p">)</span> <span class="c"># Write version for LastRow</span> <span class="k">def</span> <span class="nf">write_sliceLR</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="nb">buffer</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> <span class="sd">"""Write a slice from the `where` dataset with the `buffer` data."""</span> <span class="n">startl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">start</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="n">countl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">start</span> <span class="o">+</span> <span class="nb">buffer</span><span class="o">.</span><span class="n">size</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="n">stepl</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">uint64</span><span class="p">)</span> <span class="n">where</span><span class="o">.</span><span class="n">_g_write_slice</span><span class="p">(</span><span class="n">startl</span><span class="p">,</span> <span class="n">stepl</span><span class="p">,</span> <span class="n">countl</span><span class="p">,</span> <span class="nb">buffer</span><span class="p">)</span> <span class="n">read_sliceLR</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">read_slice_lr</span><span class="p">)</span> <span class="k">def</span> <span class="nf">reorder_slice</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="nb">sorted</span><span class="p">,</span> <span class="n">indices</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">,</span> <span class="n">sindices</span><span class="p">,</span> <span class="n">tmp_sorted</span><span class="p">,</span> <span class="n">tmp_indices</span><span class="p">):</span> <span class="sd">"""Copy & reorder the slice in source to final destination."""</span> <span class="n">ss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="c"># Load the second part in buffers</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice</span><span class="p">(</span><span class="n">tmp_sorted</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[</span><span class="n">ss</span><span class="p">:])</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice</span><span class="p">(</span><span class="n">tmp_indices</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="n">sindices</span><span class="p">[</span><span class="n">ss</span><span class="p">:])</span> <span class="n">indexesextension</span><span class="o">.</span><span class="n">keysort</span><span class="p">(</span><span class="n">ssorted</span><span class="p">,</span> <span class="n">sindices</span><span class="p">)</span> <span class="c"># Write the first part of the buffers to the regular leaves</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_slice</span><span class="p">(</span><span class="nb">sorted</span><span class="p">,</span> <span class="n">nslice</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_slice</span><span class="p">(</span><span class="n">indices</span><span class="p">,</span> <span class="n">nslice</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">sindices</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="c"># Update caches</span> <span class="bp">self</span><span class="o">.</span><span class="n">update_caches</span><span class="p">(</span><span class="n">nslice</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="c"># Shift the slice in the end to the beginning</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">]</span> <span class="o">=</span> <span class="n">ssorted</span><span class="p">[</span><span class="n">ss</span><span class="p">:]</span> <span class="n">sindices</span><span class="p">[:</span><span class="n">ss</span><span class="p">]</span> <span class="o">=</span> <span class="n">sindices</span><span class="p">[</span><span class="n">ss</span><span class="p">:]</span> <span class="k">def</span> <span class="nf">update_caches</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">):</span> <span class="sd">"""Update the caches for faster lookups."""</span> <span class="n">cs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="n">ncs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nchunkslice</span> <span class="n">tmp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="c"># update first & second cache bounds (ranges & bounds)</span> <span class="n">tmp</span><span class="o">.</span><span class="n">ranges</span><span class="p">[</span><span class="n">nslice</span><span class="p">]</span> <span class="o">=</span> <span class="n">ssorted</span><span class="p">[[</span><span class="mi">0</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">]]</span> <span class="n">tmp</span><span class="o">.</span><span class="n">bounds</span><span class="p">[</span><span class="n">nslice</span><span class="p">]</span> <span class="o">=</span> <span class="n">ssorted</span><span class="p">[</span><span class="n">cs</span><span class="p">::</span><span class="n">cs</span><span class="p">]</span> <span class="c"># update start & stop bounds</span> <span class="n">tmp</span><span class="o">.</span><span class="n">abounds</span><span class="p">[</span><span class="n">nslice</span> <span class="o">*</span> <span class="n">ncs</span><span class="p">:(</span><span class="n">nslice</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">ncs</span><span class="p">]</span> <span class="o">=</span> <span class="n">ssorted</span><span class="p">[</span><span class="mi">0</span><span class="p">::</span><span class="n">cs</span><span class="p">]</span> <span class="n">tmp</span><span class="o">.</span><span class="n">zbounds</span><span class="p">[</span><span class="n">nslice</span> <span class="o">*</span> <span class="n">ncs</span><span class="p">:(</span><span class="n">nslice</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">ncs</span><span class="p">]</span> <span class="o">=</span> <span class="n">ssorted</span><span class="p">[</span><span class="n">cs</span> <span class="o">-</span> <span class="mi">1</span><span class="p">::</span><span class="n">cs</span><span class="p">]</span> <span class="c"># update median bounds</span> <span class="n">smedian</span> <span class="o">=</span> <span class="n">ssorted</span><span class="p">[</span><span class="n">cs</span> <span class="o">//</span> <span class="mi">2</span><span class="p">::</span><span class="n">cs</span><span class="p">]</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mbounds</span><span class="p">[</span><span class="n">nslice</span> <span class="o">*</span> <span class="n">ncs</span><span class="p">:(</span><span class="n">nslice</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">ncs</span><span class="p">]</span> <span class="o">=</span> <span class="n">smedian</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mranges</span><span class="p">[</span><span class="n">nslice</span><span class="p">]</span> <span class="o">=</span> <span class="n">smedian</span><span class="p">[</span><span class="n">ncs</span> <span class="o">//</span> <span class="mi">2</span><span class="p">]</span> <span class="k">def</span> <span class="nf">reorder_slices</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tmp</span><span class="p">):</span> <span class="sd">"""Reorder completely the index at slice level.</span> <span class="sd"> This method has to maintain the locality of elements in the</span> <span class="sd"> ambit of ``blocks``, i.e. an element of a ``block`` cannot be</span> <span class="sd"> sent to another ``block`` during this reordering. This is</span> <span class="sd"> *critical* for ``light`` indexes to be able to use this.</span> <span class="sd"> This version of reorder_slices is optimized in that *two*</span> <span class="sd"> complete slices are taken at a time (including the last row</span> <span class="sd"> slice) so as to sort them. Then, each new slice that is read is</span> <span class="sd"> put at the end of this two-slice buffer, while the previous one</span> <span class="sd"> is moved to the beginning of the buffer. This is in order to</span> <span class="sd"> better reduce the entropy of the regular part (i.e. all except</span> <span class="sd"> the last row) of the index.</span> <span class="sd"> A secondary effect of this is that it takes at least *twice* of</span> <span class="sd"> memory than a previous version of reorder_slices() that only</span> <span class="sd"> reorders on a slice-by-slice basis. However, as this is more</span> <span class="sd"> efficient than the old version, one can configure the slicesize</span> <span class="sd"> to be smaller, so the memory consumption is barely similar.</span> <span class="sd"> """</span> <span class="n">tmp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sorted</span> <span class="n">indices</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indices</span> <span class="k">if</span> <span class="n">tmp</span><span class="p">:</span> <span class="n">tmp_sorted</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sorted2</span> <span class="n">tmp_indices</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indices2</span> <span class="k">else</span><span class="p">:</span> <span class="n">tmp_sorted</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sorted</span> <span class="n">tmp_indices</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indices</span> <span class="n">cs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="n">ss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">nsb</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">nslices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="n">nblocks</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nblocks</span> <span class="n">nelementsLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="c"># Create the buffer for reordering 2 slices at a time</span> <span class="n">ssorted</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">ss</span> <span class="o">*</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="n">sindices</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">ss</span> <span class="o">*</span> <span class="mi">2</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">dtype</span><span class="p">(</span><span class="s">'u</span><span class="si">%d</span><span class="s">'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">))</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">8</span><span class="p">:</span> <span class="c"># Bootstrap the process for reordering</span> <span class="c"># Read the first slice in buffers</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice</span><span class="p">(</span><span class="n">tmp_sorted</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice</span><span class="p">(</span><span class="n">tmp_indices</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">sindices</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="n">nslice</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># Just in case the loop behind executes nothing</span> <span class="c"># Loop over the remainding slices in block</span> <span class="k">for</span> <span class="n">nslice</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">nrows</span><span class="p">):</span> <span class="bp">self</span><span class="o">.</span><span class="n">reorder_slice</span><span class="p">(</span><span class="n">nslice</span><span class="p">,</span> <span class="nb">sorted</span><span class="p">,</span> <span class="n">indices</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">,</span> <span class="n">sindices</span><span class="p">,</span> <span class="n">tmp_sorted</span><span class="p">,</span> <span class="n">tmp_indices</span><span class="p">)</span> <span class="c"># End the process (enrolling the lastrow if necessary)</span> <span class="k">if</span> <span class="n">nelementsLR</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="n">sortedLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span><span class="o">.</span><span class="n">sortedLR</span> <span class="n">indicesLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span><span class="o">.</span><span class="n">indicesLR</span> <span class="c"># Shrink the ssorted and sindices arrays to the minimum</span> <span class="n">ssorted2</span> <span class="o">=</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span> <span class="o">+</span> <span class="n">nelementsLR</span><span class="p">]</span> <span class="n">sortedlr</span> <span class="o">=</span> <span class="n">ssorted2</span><span class="p">[</span><span class="n">ss</span><span class="p">:]</span> <span class="n">sindices2</span> <span class="o">=</span> <span class="n">sindices</span><span class="p">[:</span><span class="n">ss</span> <span class="o">+</span> <span class="n">nelementsLR</span><span class="p">]</span> <span class="n">indiceslr</span> <span class="o">=</span> <span class="n">sindices2</span><span class="p">[</span><span class="n">ss</span><span class="p">:]</span> <span class="c"># Read the last row info in the second part of the buffer</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice_lr</span><span class="p">(</span><span class="n">sortedLR</span><span class="p">,</span> <span class="n">sortedlr</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice_lr</span><span class="p">(</span><span class="n">indicesLR</span><span class="p">,</span> <span class="n">indiceslr</span><span class="p">)</span> <span class="n">indexesextension</span><span class="o">.</span><span class="n">keysort</span><span class="p">(</span><span class="n">ssorted2</span><span class="p">,</span> <span class="n">sindices2</span><span class="p">)</span> <span class="c"># Write the second part of the buffers to the lastrow indices</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_sliceLR</span><span class="p">(</span><span class="n">sortedLR</span><span class="p">,</span> <span class="n">sortedlr</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_sliceLR</span><span class="p">(</span><span class="n">indicesLR</span><span class="p">,</span> <span class="n">indiceslr</span><span class="p">)</span> <span class="c"># Update the caches for last row</span> <span class="n">bebounds</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">sortedlr</span><span class="p">[::</span><span class="n">cs</span><span class="p">],</span> <span class="p">[</span><span class="n">sortedlr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]]))</span> <span class="n">sortedLR</span><span class="p">[</span><span class="n">nelementsLR</span><span class="p">:</span><span class="n">nelementsLR</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">bebounds</span><span class="p">)]</span> <span class="o">=</span> <span class="n">bebounds</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span> <span class="o">=</span> <span class="n">bebounds</span> <span class="c"># Write the first part of the buffers to the regular leaves</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_slice</span><span class="p">(</span><span class="nb">sorted</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_slice</span><span class="p">(</span><span class="n">indices</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="n">sindices</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="c"># Update caches for this slice</span> <span class="bp">self</span><span class="o">.</span><span class="n">update_caches</span><span class="p">(</span><span class="n">nslice</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="k">else</span><span class="p">:</span> <span class="c"># Iterate over each block. No data should cross block</span> <span class="c"># boundaries to avoid adressing problems with short indices.</span> <span class="k">for</span> <span class="n">nb</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">nblocks</span><span class="p">):</span> <span class="c"># Bootstrap the process for reordering</span> <span class="c"># Read the first slice in buffers</span> <span class="n">nrow</span> <span class="o">=</span> <span class="n">nb</span> <span class="o">*</span> <span class="n">nsb</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice</span><span class="p">(</span><span class="n">tmp_sorted</span><span class="p">,</span> <span class="n">nrow</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice</span><span class="p">(</span><span class="n">tmp_indices</span><span class="p">,</span> <span class="n">nrow</span><span class="p">,</span> <span class="n">sindices</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="c"># Loop over the remainding slices in block</span> <span class="n">lrb</span> <span class="o">=</span> <span class="n">nrow</span> <span class="o">+</span> <span class="n">nsb</span> <span class="k">if</span> <span class="n">lrb</span> <span class="o">></span> <span class="n">nslices</span><span class="p">:</span> <span class="n">lrb</span> <span class="o">=</span> <span class="n">nslices</span> <span class="n">nslice</span> <span class="o">=</span> <span class="n">nrow</span> <span class="c"># Just in case the loop behind executes nothing</span> <span class="k">for</span> <span class="n">nslice</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">nrow</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">lrb</span><span class="p">):</span> <span class="bp">self</span><span class="o">.</span><span class="n">reorder_slice</span><span class="p">(</span><span class="n">nslice</span><span class="p">,</span> <span class="nb">sorted</span><span class="p">,</span> <span class="n">indices</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">,</span> <span class="n">sindices</span><span class="p">,</span> <span class="n">tmp_sorted</span><span class="p">,</span> <span class="n">tmp_indices</span><span class="p">)</span> <span class="c"># Write the first part of the buffers to the regular leaves</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_slice</span><span class="p">(</span><span class="nb">sorted</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_slice</span><span class="p">(</span><span class="n">indices</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="n">sindices</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="c"># Update caches for this slice</span> <span class="bp">self</span><span class="o">.</span><span class="n">update_caches</span><span class="p">(</span><span class="n">nslice</span><span class="p">,</span> <span class="n">ssorted</span><span class="p">[:</span><span class="n">ss</span><span class="p">])</span> <span class="k">def</span> <span class="nf">swap_slices</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s">"median"</span><span class="p">):</span> <span class="sd">"""Swap slices in a superblock."""</span> <span class="n">tmp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sorted</span> <span class="n">indices</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indices</span> <span class="n">tmp_sorted</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">sorted2</span> <span class="n">tmp_indices</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">indices2</span> <span class="n">ncs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nchunkslice</span> <span class="n">nss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">superblocksize</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">nss2</span> <span class="o">=</span> <span class="n">nss</span> <span class="k">for</span> <span class="n">sblock</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nsuperblocks</span><span class="p">):</span> <span class="c"># Protection for last superblock having less slices than nss</span> <span class="n">remainingslices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="o">-</span> <span class="n">sblock</span> <span class="o">*</span> <span class="n">nss</span> <span class="k">if</span> <span class="n">remainingslices</span> <span class="o"><</span> <span class="n">nss</span><span class="p">:</span> <span class="n">nss2</span> <span class="o">=</span> <span class="n">remainingslices</span> <span class="k">if</span> <span class="n">nss2</span> <span class="o"><=</span> <span class="mi">1</span><span class="p">:</span> <span class="k">break</span> <span class="k">if</span> <span class="n">mode</span> <span class="o">==</span> <span class="s">"start"</span><span class="p">:</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">ranges</span><span class="p">[</span><span class="n">sblock</span> <span class="o">*</span> <span class="n">nss</span><span class="p">:</span><span class="n">sblock</span> <span class="o">*</span> <span class="n">nss</span> <span class="o">+</span> <span class="n">nss2</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span> <span class="k">elif</span> <span class="n">mode</span> <span class="o">==</span> <span class="s">"stop"</span><span class="p">:</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">ranges</span><span class="p">[</span><span class="n">sblock</span> <span class="o">*</span> <span class="n">nss</span><span class="p">:</span><span class="n">sblock</span> <span class="o">*</span> <span class="n">nss</span> <span class="o">+</span> <span class="n">nss2</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="k">elif</span> <span class="n">mode</span> <span class="o">==</span> <span class="s">"median"</span><span class="p">:</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mranges</span><span class="p">[</span><span class="n">sblock</span> <span class="o">*</span> <span class="n">nss</span><span class="p">:</span><span class="n">sblock</span> <span class="o">*</span> <span class="n">nss</span> <span class="o">+</span> <span class="n">nss2</span><span class="p">]</span> <span class="n">sranges_idx</span> <span class="o">=</span> <span class="n">ranges</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">kind</span><span class="o">=</span><span class="n">defsort</span><span class="p">)</span> <span class="c"># Don't swap the superblock at all if one doesn't need to</span> <span class="n">ndiff</span> <span class="o">=</span> <span class="p">(</span><span class="n">sranges_idx</span> <span class="o">!=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">nss2</span><span class="p">))</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">/</span> <span class="mi">2</span> <span class="k">if</span> <span class="n">ndiff</span> <span class="o">*</span> <span class="mi">50</span> <span class="o"><</span> <span class="n">nss2</span><span class="p">:</span> <span class="c"># The number of slices to rearrange is less than 2.5%,</span> <span class="c"># so skip the reordering of this superblock</span> <span class="c"># (too expensive for such a little improvement)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span> <span class="k">print</span> <span class="s">"skipping reordering of superblock ->"</span><span class="p">,</span> <span class="n">sblock</span> <span class="k">continue</span> <span class="n">ns</span> <span class="o">=</span> <span class="n">sblock</span> <span class="o">*</span> <span class="n">nss2</span> <span class="c"># Swap sorted and indices slices following the new order</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">nss2</span><span class="p">):</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">sranges_idx</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="c"># Swap sorted & indices slices</span> <span class="n">oi</span> <span class="o">=</span> <span class="n">ns</span> <span class="o">+</span> <span class="n">i</span> <span class="n">oidx</span> <span class="o">=</span> <span class="n">ns</span> <span class="o">+</span> <span class="n">idx</span> <span class="n">tmp_sorted</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">[</span><span class="n">oidx</span><span class="p">]</span> <span class="n">tmp_indices</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="n">indices</span><span class="p">[</span><span class="n">oidx</span><span class="p">]</span> <span class="c"># Swap start, stop & median ranges</span> <span class="n">tmp</span><span class="o">.</span><span class="n">ranges2</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">ranges</span><span class="p">[</span><span class="n">oidx</span><span class="p">]</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mranges2</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mranges</span><span class="p">[</span><span class="n">oidx</span><span class="p">]</span> <span class="c"># Swap chunk bounds</span> <span class="n">tmp</span><span class="o">.</span><span class="n">bounds2</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">bounds</span><span class="p">[</span><span class="n">oidx</span><span class="p">]</span> <span class="c"># Swap start, stop & median bounds</span> <span class="n">j</span> <span class="o">=</span> <span class="n">oi</span> <span class="o">*</span> <span class="n">ncs</span> <span class="n">jn</span> <span class="o">=</span> <span class="p">(</span><span class="n">oi</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">ncs</span> <span class="n">xj</span> <span class="o">=</span> <span class="n">oidx</span> <span class="o">*</span> <span class="n">ncs</span> <span class="n">xjn</span> <span class="o">=</span> <span class="p">(</span><span class="n">oidx</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">ncs</span> <span class="n">tmp</span><span class="o">.</span><span class="n">abounds2</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">jn</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">abounds</span><span class="p">[</span><span class="n">xj</span><span class="p">:</span><span class="n">xjn</span><span class="p">]</span> <span class="n">tmp</span><span class="o">.</span><span class="n">zbounds2</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">jn</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">zbounds</span><span class="p">[</span><span class="n">xj</span><span class="p">:</span><span class="n">xjn</span><span class="p">]</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mbounds2</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">jn</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mbounds</span><span class="p">[</span><span class="n">xj</span><span class="p">:</span><span class="n">xjn</span><span class="p">]</span> <span class="c"># tmp -> originals</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">nss2</span><span class="p">):</span> <span class="c"># Copy sorted & indices slices</span> <span class="n">oi</span> <span class="o">=</span> <span class="n">ns</span> <span class="o">+</span> <span class="n">i</span> <span class="nb">sorted</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp_sorted</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="n">indices</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp_indices</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="c"># Copy start, stop & median ranges</span> <span class="n">tmp</span><span class="o">.</span><span class="n">ranges</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">ranges2</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mranges</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mranges2</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="c"># Copy chunk bounds</span> <span class="n">tmp</span><span class="o">.</span><span class="n">bounds</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">bounds2</span><span class="p">[</span><span class="n">oi</span><span class="p">]</span> <span class="c"># Copy start, stop & median bounds</span> <span class="n">j</span> <span class="o">=</span> <span class="n">oi</span> <span class="o">*</span> <span class="n">ncs</span> <span class="n">jn</span> <span class="o">=</span> <span class="p">(</span><span class="n">oi</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">ncs</span> <span class="n">tmp</span><span class="o">.</span><span class="n">abounds</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">jn</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">abounds2</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">jn</span><span class="p">]</span> <span class="n">tmp</span><span class="o">.</span><span class="n">zbounds</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">jn</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">zbounds2</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">jn</span><span class="p">]</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mbounds</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">jn</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp</span><span class="o">.</span><span class="n">mbounds2</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">jn</span><span class="p">]</span> <span class="k">def</span> <span class="nf">search_item_lt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">item</span><span class="p">,</span> <span class="n">nslice</span><span class="p">,</span> <span class="n">limits</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> <span class="sd">"""Search a single item in a specific sorted slice."""</span> <span class="c"># This method will only works under the assumtion that item</span> <span class="c"># *is to be found* in the nslice.</span> <span class="k">assert</span> <span class="n">limits</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o"><</span> <span class="n">item</span> <span class="o"><=</span> <span class="n">limits</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">cs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="n">ss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">nelementsLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="n">bstart</span> <span class="o">=</span> <span class="n">start</span> <span class="o">//</span> <span class="n">cs</span> <span class="c"># Find the chunk</span> <span class="k">if</span> <span class="n">nslice</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="n">nchunk</span> <span class="o">=</span> <span class="n">bisect_left</span><span class="p">(</span><span class="n">where</span><span class="o">.</span><span class="n">bounds</span><span class="p">[</span><span class="n">nslice</span><span class="p">],</span> <span class="n">item</span><span class="p">,</span> <span class="n">bstart</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="c"># We need to subtract 1 chunk here because bebounds</span> <span class="c"># has a leading value</span> <span class="n">nchunk</span> <span class="o">=</span> <span class="n">bisect_left</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">,</span> <span class="n">item</span><span class="p">,</span> <span class="n">bstart</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span> <span class="k">assert</span> <span class="n">nchunk</span> <span class="o">>=</span> <span class="mi">0</span> <span class="c"># Find the element in chunk</span> <span class="n">pos</span> <span class="o">=</span> <span class="n">nchunk</span> <span class="o">*</span> <span class="n">cs</span> <span class="k">if</span> <span class="n">nslice</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="n">pos</span> <span class="o">+=</span> <span class="n">bisect_left</span><span class="p">(</span><span class="n">where</span><span class="o">.</span><span class="n">sorted</span><span class="p">[</span><span class="n">nslice</span><span class="p">,</span> <span class="n">pos</span><span class="p">:</span><span class="n">pos</span> <span class="o">+</span> <span class="n">cs</span><span class="p">],</span> <span class="n">item</span><span class="p">)</span> <span class="k">assert</span> <span class="n">pos</span> <span class="o"><=</span> <span class="n">ss</span> <span class="k">else</span><span class="p">:</span> <span class="n">end</span> <span class="o">=</span> <span class="n">pos</span> <span class="o">+</span> <span class="n">cs</span> <span class="k">if</span> <span class="n">end</span> <span class="o">></span> <span class="n">nelementsLR</span><span class="p">:</span> <span class="n">end</span> <span class="o">=</span> <span class="n">nelementsLR</span> <span class="n">pos</span> <span class="o">+=</span> <span class="n">bisect_left</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span><span class="p">[</span><span class="n">pos</span><span class="p">:</span><span class="n">end</span><span class="p">],</span> <span class="n">item</span><span class="p">)</span> <span class="k">assert</span> <span class="n">pos</span> <span class="o"><=</span> <span class="n">nelementsLR</span> <span class="k">assert</span> <span class="n">pos</span> <span class="o">></span> <span class="mi">0</span> <span class="k">return</span> <span class="n">pos</span> <span class="k">def</span> <span class="nf">compute_overlaps_finegrain</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">message</span><span class="p">,</span> <span class="n">verbose</span><span class="p">):</span> <span class="sd">"""Compute some statistics about overlaping of slices in index.</span> <span class="sd"> It returns the following info:</span> <span class="sd"> noverlaps : int</span> <span class="sd"> The total number of elements that overlaps in index.</span> <span class="sd"> multiplicity : array of int</span> <span class="sd"> The number of times that a concrete slice overlaps with any other.</span> <span class="sd"> toverlap : float</span> <span class="sd"> An ovelap index: the sum of the values in segment slices that</span> <span class="sd"> overlaps divided by the entire range of values. This index is only</span> <span class="sd"> computed for numerical types.</span> <span class="sd"> """</span> <span class="n">ss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">ranges</span><span class="p">[:]</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">sorted</span> <span class="n">sortedLR</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">sortedLR</span> <span class="n">nslices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="n">nelementsLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="k">if</span> <span class="n">nelementsLR</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="c"># Add the ranges corresponding to the last row</span> <span class="n">rangeslr</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]])</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">ranges</span><span class="p">,</span> <span class="p">[</span><span class="n">rangeslr</span><span class="p">]))</span> <span class="n">nslices</span> <span class="o">+=</span> <span class="mi">1</span> <span class="n">soverlap</span> <span class="o">=</span> <span class="mf">0.</span> <span class="n">toverlap</span> <span class="o">=</span> <span class="o">-</span><span class="mf">1.</span> <span class="n">multiplicity</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">nslices</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s">"int_"</span><span class="p">)</span> <span class="n">overlaps</span> <span class="o">=</span> <span class="n">multiplicity</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> <span class="n">starts</span> <span class="o">=</span> <span class="n">multiplicity</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">nslices</span><span class="p">):</span> <span class="n">prev_end</span> <span class="o">=</span> <span class="n">ranges</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">nslices</span><span class="p">):</span> <span class="n">stj</span> <span class="o">=</span> <span class="n">starts</span><span class="p">[</span><span class="n">j</span><span class="p">]</span> <span class="k">assert</span> <span class="n">stj</span> <span class="o"><=</span> <span class="n">ss</span> <span class="k">if</span> <span class="n">stj</span> <span class="o">==</span> <span class="n">ss</span><span class="p">:</span> <span class="c"># This slice has already been counted</span> <span class="k">continue</span> <span class="k">if</span> <span class="n">j</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="n">next_beg</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="n">stj</span><span class="p">]</span> <span class="k">else</span><span class="p">:</span> <span class="n">next_beg</span> <span class="o">=</span> <span class="n">sortedLR</span><span class="p">[</span><span class="n">stj</span><span class="p">]</span> <span class="n">next_end</span> <span class="o">=</span> <span class="n">ranges</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="k">if</span> <span class="n">prev_end</span> <span class="o">></span> <span class="n">next_end</span><span class="p">:</span> <span class="c"># Complete overlapping case</span> <span class="n">multiplicity</span><span class="p">[</span><span class="n">j</span> <span class="o">-</span> <span class="n">i</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">j</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="n">overlaps</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">+=</span> <span class="n">ss</span> <span class="o">-</span> <span class="n">stj</span> <span class="n">starts</span><span class="p">[</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">ss</span> <span class="c"># a sentinel</span> <span class="k">else</span><span class="p">:</span> <span class="n">overlaps</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">+=</span> <span class="n">nelementsLR</span> <span class="o">-</span> <span class="n">stj</span> <span class="n">starts</span><span class="p">[</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">nelementsLR</span> <span class="c"># a sentinel</span> <span class="k">elif</span> <span class="n">prev_end</span> <span class="o">></span> <span class="n">next_beg</span><span class="p">:</span> <span class="n">multiplicity</span><span class="p">[</span><span class="n">j</span> <span class="o">-</span> <span class="n">i</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span> <span class="n">idx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_item_lt</span><span class="p">(</span> <span class="n">where</span><span class="p">,</span> <span class="n">prev_end</span><span class="p">,</span> <span class="n">j</span><span class="p">,</span> <span class="n">ranges</span><span class="p">[</span><span class="n">j</span><span class="p">],</span> <span class="n">stj</span><span class="p">)</span> <span class="n">nelem</span> <span class="o">=</span> <span class="n">idx</span> <span class="o">-</span> <span class="n">stj</span> <span class="n">overlaps</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">+=</span> <span class="n">nelem</span> <span class="n">starts</span><span class="p">[</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">idx</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">!=</span> <span class="s">"string"</span><span class="p">:</span> <span class="c"># Convert ranges into floats in order to allow</span> <span class="c"># doing operations with them without overflows</span> <span class="n">soverlap</span> <span class="o">+=</span> <span class="nb">float</span><span class="p">(</span><span class="n">ranges</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span> <span class="o">-</span> <span class="nb">float</span><span class="p">(</span><span class="n">ranges</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span> <span class="c"># Return the overlap as the ratio between overlaps and entire range</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">!=</span> <span class="s">"string"</span><span class="p">:</span> <span class="n">erange</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">ranges</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span> <span class="o">-</span> <span class="nb">float</span><span class="p">(</span><span class="n">ranges</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span> <span class="c"># Check that there is an effective range of values</span> <span class="c"># Beware, erange can be negative in situations where</span> <span class="c"># the values are suffering overflow. This can happen</span> <span class="c"># specially on big signed integer values (on overflows,</span> <span class="c"># the end value will become negative!).</span> <span class="c"># Also, there is no way to compute overlap ratios for</span> <span class="c"># non-numerical types. So, be careful and always check</span> <span class="c"># that toverlap has a positive value (it must have been</span> <span class="c"># initialized to -1. before) before using it.</span> <span class="c"># F. Alted 2007-01-19</span> <span class="k">if</span> <span class="n">erange</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="n">toverlap</span> <span class="o">=</span> <span class="n">soverlap</span> <span class="o">/</span> <span class="n">erange</span> <span class="k">if</span> <span class="n">verbose</span> <span class="ow">and</span> <span class="n">message</span> <span class="o">!=</span> <span class="s">"init"</span><span class="p">:</span> <span class="k">print</span> <span class="s">"toverlap (</span><span class="si">%s</span><span class="s">):"</span> <span class="o">%</span> <span class="n">message</span><span class="p">,</span> <span class="n">toverlap</span> <span class="k">print</span> <span class="s">"multiplicity:</span><span class="se">\n</span><span class="s">"</span><span class="p">,</span> <span class="n">multiplicity</span><span class="p">,</span> <span class="n">multiplicity</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="k">print</span> <span class="s">"overlaps:</span><span class="se">\n</span><span class="s">"</span><span class="p">,</span> <span class="n">overlaps</span><span class="p">,</span> <span class="n">overlaps</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="n">noverlaps</span> <span class="o">=</span> <span class="n">overlaps</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="c"># For full indexes, set the 'is_csi' flag</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">8</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_file</span><span class="o">.</span><span class="n">_iswritable</span><span class="p">():</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">is_csi</span> <span class="o">=</span> <span class="p">(</span><span class="n">noverlaps</span> <span class="o">==</span> <span class="mi">0</span><span class="p">)</span> <span class="c"># Save the number of overlaps for future references</span> <span class="bp">self</span><span class="o">.</span><span class="n">noverlaps</span> <span class="o">=</span> <span class="n">noverlaps</span> <span class="k">return</span> <span class="p">(</span><span class="n">noverlaps</span><span class="p">,</span> <span class="n">multiplicity</span><span class="p">,</span> <span class="n">toverlap</span><span class="p">)</span> <span class="k">def</span> <span class="nf">compute_overlaps</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">,</span> <span class="n">message</span><span class="p">,</span> <span class="n">verbose</span><span class="p">):</span> <span class="sd">"""Compute some statistics about overlaping of slices in index.</span> <span class="sd"> It returns the following info:</span> <span class="sd"> noverlaps : int</span> <span class="sd"> The total number of slices that overlaps in index.</span> <span class="sd"> multiplicity : array of int</span> <span class="sd"> The number of times that a concrete slice overlaps with any other.</span> <span class="sd"> toverlap : float</span> <span class="sd"> An ovelap index: the sum of the values in segment slices that</span> <span class="sd"> overlaps divided by the entire range of values. This index is only</span> <span class="sd"> computed for numerical types.</span> <span class="sd"> """</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">where</span><span class="o">.</span><span class="n">ranges</span><span class="p">[:]</span> <span class="n">nslices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsILR</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="c"># Add the ranges corresponding to the last row</span> <span class="n">rangeslr</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]])</span> <span class="n">ranges</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="n">ranges</span><span class="p">,</span> <span class="p">[</span><span class="n">rangeslr</span><span class="p">]))</span> <span class="n">nslices</span> <span class="o">+=</span> <span class="mi">1</span> <span class="n">noverlaps</span> <span class="o">=</span> <span class="mi">0</span> <span class="n">soverlap</span> <span class="o">=</span> <span class="mf">0.</span> <span class="n">toverlap</span> <span class="o">=</span> <span class="o">-</span><span class="mf">1.</span> <span class="n">multiplicity</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">nslices</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s">"int_"</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">nslices</span><span class="p">):</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">nslices</span><span class="p">):</span> <span class="k">if</span> <span class="n">ranges</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">></span> <span class="n">ranges</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="mi">0</span><span class="p">]:</span> <span class="n">noverlaps</span> <span class="o">+=</span> <span class="mi">1</span> <span class="n">multiplicity</span><span class="p">[</span><span class="n">j</span> <span class="o">-</span> <span class="n">i</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">!=</span> <span class="s">"string"</span><span class="p">:</span> <span class="c"># Convert ranges into floats in order to allow</span> <span class="c"># doing operations with them without overflows</span> <span class="n">soverlap</span> <span class="o">+=</span> <span class="nb">float</span><span class="p">(</span><span class="n">ranges</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span> <span class="o">-</span> <span class="nb">float</span><span class="p">(</span><span class="n">ranges</span><span class="p">[</span><span class="n">j</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span> <span class="c"># Return the overlap as the ratio between overlaps and entire range</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">!=</span> <span class="s">"string"</span><span class="p">:</span> <span class="n">erange</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">ranges</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span> <span class="o">-</span> <span class="nb">float</span><span class="p">(</span><span class="n">ranges</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span> <span class="c"># Check that there is an effective range of values</span> <span class="c"># Beware, erange can be negative in situations where</span> <span class="c"># the values are suffering overflow. This can happen</span> <span class="c"># specially on big signed integer values (on overflows,</span> <span class="c"># the end value will become negative!).</span> <span class="c"># Also, there is no way to compute overlap ratios for</span> <span class="c"># non-numerical types. So, be careful and always check</span> <span class="c"># that toverlap has a positive value (it must have been</span> <span class="c"># initialized to -1. before) before using it.</span> <span class="c"># F. Altet 2007-01-19</span> <span class="k">if</span> <span class="n">erange</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="n">toverlap</span> <span class="o">=</span> <span class="n">soverlap</span> <span class="o">/</span> <span class="n">erange</span> <span class="k">if</span> <span class="n">verbose</span><span class="p">:</span> <span class="k">print</span> <span class="s">"overlaps (</span><span class="si">%s</span><span class="s">):"</span> <span class="o">%</span> <span class="n">message</span><span class="p">,</span> <span class="n">noverlaps</span><span class="p">,</span> <span class="n">toverlap</span> <span class="k">print</span> <span class="n">multiplicity</span> <span class="c"># For full indexes, set the 'is_csi' flag</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="o">==</span> <span class="mi">8</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_file</span><span class="o">.</span><span class="n">_iswritable</span><span class="p">():</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">is_csi</span> <span class="o">=</span> <span class="p">(</span><span class="n">noverlaps</span> <span class="o">==</span> <span class="mi">0</span><span class="p">)</span> <span class="c"># Save the number of overlaps for future references</span> <span class="bp">self</span><span class="o">.</span><span class="n">noverlaps</span> <span class="o">=</span> <span class="n">noverlaps</span> <span class="k">return</span> <span class="p">(</span><span class="n">noverlaps</span><span class="p">,</span> <span class="n">multiplicity</span><span class="p">,</span> <span class="n">toverlap</span><span class="p">)</span> <span class="k">def</span> <span class="nf">read_sorted_indices</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">what</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">,</span> <span class="n">step</span><span class="p">):</span> <span class="sd">"""Return the sorted or indices values in the specified range."""</span> <span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">,</span> <span class="n">step</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_process_range</span><span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">,</span> <span class="n">step</span><span class="p">)</span> <span class="k">if</span> <span class="n">start</span> <span class="o">>=</span> <span class="n">stop</span><span class="p">:</span> <span class="k">return</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="c"># Correction for negative values of step (reverse indices)</span> <span class="k">if</span> <span class="n">step</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span> <span class="n">tmp</span> <span class="o">=</span> <span class="n">start</span> <span class="n">start</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">-</span> <span class="n">stop</span> <span class="n">stop</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="o">-</span> <span class="n">tmp</span> <span class="k">if</span> <span class="n">what</span> <span class="o">==</span> <span class="s">"sorted"</span><span class="p">:</span> <span class="n">values</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span> <span class="n">valuesLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span> <span class="n">buffer_</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">stop</span> <span class="o">-</span> <span class="n">start</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="n">values</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">indices</span> <span class="n">valuesLR</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">indicesLR</span> <span class="n">buffer_</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">stop</span> <span class="o">-</span> <span class="n">start</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s">"u</span><span class="si">%d</span><span class="s">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span><span class="p">)</span> <span class="n">ss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">nrow_start</span> <span class="o">=</span> <span class="n">start</span> <span class="o">//</span> <span class="n">ss</span> <span class="n">istart</span> <span class="o">=</span> <span class="n">start</span> <span class="o">%</span> <span class="n">ss</span> <span class="n">nrow_stop</span> <span class="o">=</span> <span class="n">stop</span> <span class="o">//</span> <span class="n">ss</span> <span class="n">tlen</span> <span class="o">=</span> <span class="n">stop</span> <span class="o">-</span> <span class="n">start</span> <span class="n">bstart</span> <span class="o">=</span> <span class="mi">0</span> <span class="n">ilen</span> <span class="o">=</span> <span class="mi">0</span> <span class="k">for</span> <span class="n">nrow</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="n">nrow_start</span><span class="p">,</span> <span class="n">nrow_stop</span> <span class="o">+</span> <span class="mi">1</span><span class="p">):</span> <span class="n">blen</span> <span class="o">=</span> <span class="n">ss</span> <span class="o">-</span> <span class="n">istart</span> <span class="k">if</span> <span class="n">ilen</span> <span class="o">+</span> <span class="n">blen</span> <span class="o">></span> <span class="n">tlen</span><span class="p">:</span> <span class="n">blen</span> <span class="o">=</span> <span class="n">tlen</span> <span class="o">-</span> <span class="n">ilen</span> <span class="k">if</span> <span class="n">blen</span> <span class="o"><=</span> <span class="mi">0</span><span class="p">:</span> <span class="k">break</span> <span class="k">if</span> <span class="n">nrow</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice</span><span class="p">(</span> <span class="n">values</span><span class="p">,</span> <span class="n">nrow</span><span class="p">,</span> <span class="n">buffer_</span><span class="p">[</span><span class="n">bstart</span><span class="p">:</span><span class="n">bstart</span> <span class="o">+</span> <span class="n">blen</span><span class="p">],</span> <span class="n">istart</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_slice_lr</span><span class="p">(</span> <span class="n">valuesLR</span><span class="p">,</span> <span class="n">buffer_</span><span class="p">[</span><span class="n">bstart</span><span class="p">:</span><span class="n">bstart</span> <span class="o">+</span> <span class="n">blen</span><span class="p">],</span> <span class="n">istart</span><span class="p">)</span> <span class="n">istart</span> <span class="o">=</span> <span class="mi">0</span> <span class="n">bstart</span> <span class="o">+=</span> <span class="n">blen</span> <span class="n">ilen</span> <span class="o">+=</span> <span class="n">blen</span> <span class="k">return</span> <span class="n">buffer_</span><span class="p">[::</span><span class="n">step</span><span class="p">]</span> <div class="viewcode-block" id="Index.read_sorted"><a class="viewcode-back" href="../../usersguide/libref/helper_classes.html#tables.index.Index.read_sorted">[docs]</a> <span class="k">def</span> <span class="nf">read_sorted</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">stop</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">step</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span> <span class="sd">"""Return the sorted values of index in the specified range.</span> <span class="sd"> The meaning of the start, stop and step arguments is the same as in</span> <span class="sd"> :meth:`Table.read_sorted`.</span> <span class="sd"> """</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_sorted_indices</span><span class="p">(</span><span class="s">'sorted'</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">,</span> <span class="n">step</span><span class="p">)</span> </div> <span class="n">readSorted</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">read_sorted</span><span class="p">)</span> <div class="viewcode-block" id="Index.read_indices"><a class="viewcode-back" href="../../usersguide/libref/helper_classes.html#tables.index.Index.read_indices">[docs]</a> <span class="k">def</span> <span class="nf">read_indices</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">stop</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">step</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span> <span class="sd">"""Return the indices values of index in the specified range.</span> <span class="sd"> The meaning of the start, stop and step arguments is the same as in</span> <span class="sd"> :meth:`Table.read_sorted`.</span> <span class="sd"> """</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_sorted_indices</span><span class="p">(</span><span class="s">'indices'</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">,</span> <span class="n">step</span><span class="p">)</span> </div> <span class="n">readIndices</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">read_indices</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_process_range</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">,</span> <span class="n">step</span><span class="p">):</span> <span class="sd">"""Get a range specifc for the index usage."""</span> <span class="k">if</span> <span class="n">start</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span> <span class="ow">and</span> <span class="n">stop</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span> <span class="c"># Special case for the behaviour of PyTables iterators</span> <span class="n">stop</span> <span class="o">=</span> <span class="n">idx2long</span><span class="p">(</span><span class="n">start</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="k">if</span> <span class="n">start</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span> <span class="n">start</span> <span class="o">=</span> <span class="il">0L</span> <span class="k">else</span><span class="p">:</span> <span class="n">start</span> <span class="o">=</span> <span class="n">idx2long</span><span class="p">(</span><span class="n">start</span><span class="p">)</span> <span class="k">if</span> <span class="n">stop</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span> <span class="n">stop</span> <span class="o">=</span> <span class="n">idx2long</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nelements</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="n">stop</span> <span class="o">=</span> <span class="n">idx2long</span><span class="p">(</span><span class="n">stop</span><span class="p">)</span> <span class="k">if</span> <span class="n">step</span> <span class="ow">is</span> <span class="bp">None</span><span class="p">:</span> <span class="n">step</span> <span class="o">=</span> <span class="il">1L</span> <span class="k">else</span><span class="p">:</span> <span class="n">step</span> <span class="o">=</span> <span class="n">idx2long</span><span class="p">(</span><span class="n">step</span><span class="p">)</span> <span class="k">return</span> <span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">,</span> <span class="n">step</span><span class="p">)</span> <span class="n">_processRange</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">_process_range</span><span class="p">)</span> <div class="viewcode-block" id="Index.__getitem__"><a class="viewcode-back" href="../../usersguide/libref/helper_classes.html#tables.index.Index.__getitem__">[docs]</a> <span class="k">def</span> <span class="nf">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">):</span> <span class="sd">"""Return the indices values of index in the specified range.</span> <span class="sd"> If key argument is an integer, the corresponding index is returned. If</span> <span class="sd"> key is a slice, the range of indices determined by it is returned. A</span> <span class="sd"> negative value of step in slice is supported, meaning that the results</span> <span class="sd"> will be returned in reverse order.</span> <span class="sd"> This method is equivalent to :meth:`Index.read_indices`.</span> <span class="sd"> """</span> <span class="k">if</span> <span class="n">is_idx</span><span class="p">(</span><span class="n">key</span><span class="p">):</span> <span class="k">if</span> <span class="n">key</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span> <span class="c"># To support negative values</span> <span class="n">key</span> <span class="o">+=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_indices</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">key</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="nb">slice</span><span class="p">):</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">read_indices</span><span class="p">(</span><span class="n">key</span><span class="o">.</span><span class="n">start</span><span class="p">,</span> <span class="n">key</span><span class="o">.</span><span class="n">stop</span><span class="p">,</span> <span class="n">key</span><span class="o">.</span><span class="n">step</span><span class="p">)</span> </div> <span class="k">def</span> <span class="nf">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="k">def</span> <span class="nf">restorecache</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="s">"Clean the limits cache and resize starts and lengths arrays"</span> <span class="n">params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_file</span><span class="o">.</span><span class="n">params</span> <span class="c"># The sorted IndexArray is absolutely required to be in memory</span> <span class="c"># at the same time than the Index instance, so create a strong</span> <span class="c"># reference to it. We are not introducing leaks because the</span> <span class="c"># strong reference will disappear when this Index instance is</span> <span class="c"># to be closed.</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sorted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sorted</span><span class="o">.</span><span class="n">boundscache</span> <span class="o">=</span> <span class="n">ObjectCache</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s">'BOUNDS_MAX_SLOTS'</span><span class="p">],</span> <span class="n">params</span><span class="p">[</span><span class="s">'BOUNDS_MAX_SIZE'</span><span class="p">],</span> <span class="s">'non-opt types bounds'</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span><span class="o">.</span><span class="n">boundscache</span> <span class="o">=</span> <span class="n">ObjectCache</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s">'BOUNDS_MAX_SLOTS'</span><span class="p">],</span> <span class="n">params</span><span class="p">[</span><span class="s">'BOUNDS_MAX_SIZE'</span><span class="p">],</span> <span class="s">'non-opt types bounds'</span><span class="p">)</span> <span class="sd">"""A cache for the bounds (2nd hash) data. Only used for</span> <span class="sd"> non-optimized types searches."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">limboundscache</span> <span class="o">=</span> <span class="n">ObjectCache</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s">'LIMBOUNDS_MAX_SLOTS'</span><span class="p">],</span> <span class="n">params</span><span class="p">[</span><span class="s">'LIMBOUNDS_MAX_SIZE'</span><span class="p">],</span> <span class="s">'bounding limits'</span><span class="p">)</span> <span class="sd">"""A cache for bounding limits."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">sortedLRcache</span> <span class="o">=</span> <span class="n">ObjectCache</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s">'SORTEDLR_MAX_SLOTS'</span><span class="p">],</span> <span class="n">params</span><span class="p">[</span><span class="s">'SORTEDLR_MAX_SIZE'</span><span class="p">],</span> <span class="s">'last row chunks'</span><span class="p">)</span> <span class="sd">"""A cache for the last row chunks. Only used for searches in</span> <span class="sd"> the last row, and mainly useful for small indexes."""</span> <span class="bp">self</span><span class="o">.</span><span class="n">starts</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nrows</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">lengths</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nrows</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">numpy</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span><span class="o">.</span><span class="n">_init_sorted_slice</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirtycache</span> <span class="o">=</span> <span class="bp">False</span> <span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span> <span class="sd">"""Do a binary search in this index for an item"""</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">tref</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Entering search"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirtycache</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">restorecache</span><span class="p">()</span> <span class="c"># An empty item or if left limit is larger than the right one</span> <span class="c"># means that the number of records is always going to be empty,</span> <span class="c"># so we avoid further computation (including looking up the</span> <span class="c"># limits cache).</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">item</span> <span class="ow">or</span> <span class="n">item</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">></span> <span class="n">item</span><span class="p">[</span><span class="mi">1</span><span class="p">]:</span> <span class="bp">self</span><span class="o">.</span><span class="n">starts</span><span class="p">[:]</span> <span class="o">=</span> <span class="mi">0</span> <span class="bp">self</span><span class="o">.</span><span class="n">lengths</span><span class="p">[:]</span> <span class="o">=</span> <span class="mi">0</span> <span class="k">return</span> <span class="mi">0</span> <span class="n">tlen</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># Check whether the item tuple is in the limits cache or not</span> <span class="n">nslot</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">limboundscache</span><span class="o">.</span><span class="n">getslot</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="k">if</span> <span class="n">nslot</span> <span class="o">>=</span> <span class="mi">0</span><span class="p">:</span> <span class="n">startlengths</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">limboundscache</span><span class="o">.</span><span class="n">getitem</span><span class="p">(</span><span class="n">nslot</span><span class="p">)</span> <span class="c"># Reset the lengths array (not necessary for starts)</span> <span class="bp">self</span><span class="o">.</span><span class="n">lengths</span><span class="p">[:]</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># Now, set the interesting rows</span> <span class="k">for</span> <span class="n">nrow</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">startlengths</span><span class="p">)):</span> <span class="n">nrow2</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">length</span> <span class="o">=</span> <span class="n">startlengths</span><span class="p">[</span><span class="n">nrow</span><span class="p">]</span> <span class="bp">self</span><span class="o">.</span><span class="n">starts</span><span class="p">[</span><span class="n">nrow2</span><span class="p">]</span> <span class="o">=</span> <span class="n">start</span> <span class="bp">self</span><span class="o">.</span><span class="n">lengths</span><span class="p">[</span><span class="n">nrow2</span><span class="p">]</span> <span class="o">=</span> <span class="n">length</span> <span class="n">tlen</span> <span class="o">=</span> <span class="n">tlen</span> <span class="o">+</span> <span class="n">length</span> <span class="k">return</span> <span class="n">tlen</span> <span class="c"># The item is not in cache. Do the real lookup.</span> <span class="nb">sorted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">opt_search_types</span><span class="p">:</span> <span class="c"># The next are optimizations. However, they hide the</span> <span class="c"># CPU functions consumptions from python profiles.</span> <span class="c"># You may want to de-activate them during profiling.</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"int32"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_i</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"int64"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_ll</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"float16"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_e</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"float32"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_f</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"float64"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_d</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"float96"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_g</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"float128"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_g</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"uint32"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_ui</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"uint64"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_ull</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"int8"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_b</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"int16"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_s</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"uint8"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_ub</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">type</span> <span class="o">==</span> <span class="s">"uint16"</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin_na_us</span><span class="p">(</span><span class="o">*</span><span class="n">item</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="k">assert</span> <span class="bp">False</span><span class="p">,</span> <span class="s">"This can't happen!"</span> <span class="k">else</span><span class="p">:</span> <span class="n">tlen</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_scalar</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="nb">sorted</span><span class="p">)</span> <span class="c"># Get possible remaining values in last row</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="c"># Look for more indexes in the last row</span> <span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">search_last_row</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">starts</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">start</span> <span class="bp">self</span><span class="o">.</span><span class="n">lengths</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">stop</span> <span class="o">-</span> <span class="n">start</span> <span class="n">tlen</span> <span class="o">+=</span> <span class="n">stop</span> <span class="o">-</span> <span class="n">start</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">limboundscache</span><span class="o">.</span><span class="n">couldenablecache</span><span class="p">():</span> <span class="c"># Get a startlengths tuple and save it in cache.</span> <span class="c"># This is quite slow, but it is a good way to compress</span> <span class="c"># the bounds info. Moreover, the .couldenablecache()</span> <span class="c"># is doing a good work so as to avoid computing this</span> <span class="c"># when it is not necessary to do it.</span> <span class="n">startlengths</span> <span class="o">=</span> <span class="p">[]</span> <span class="k">for</span> <span class="n">nrow</span><span class="p">,</span> <span class="n">length</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lengths</span><span class="p">):</span> <span class="k">if</span> <span class="n">length</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> <span class="n">startlengths</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">nrow</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">starts</span><span class="p">[</span><span class="n">nrow</span><span class="p">],</span> <span class="n">length</span><span class="p">))</span> <span class="c"># Compute the size of the recarray (aproximately)</span> <span class="c"># The +1 at the end is important to avoid 0 lengths</span> <span class="c"># (remember, the object headers take some space)</span> <span class="n">size</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">startlengths</span><span class="p">)</span> <span class="o">*</span> <span class="mi">8</span> <span class="o">*</span> <span class="mi">2</span> <span class="o">+</span> <span class="mi">1</span> <span class="c"># Put this startlengths list in cache</span> <span class="bp">self</span><span class="o">.</span><span class="n">limboundscache</span><span class="o">.</span><span class="n">setitem</span><span class="p">(</span><span class="n">item</span><span class="p">,</span> <span class="n">startlengths</span><span class="p">,</span> <span class="n">size</span><span class="p">)</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Exiting search"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">return</span> <span class="n">tlen</span> <span class="c"># This is an scalar version of search. It works with strings as well.</span> <span class="k">def</span> <span class="nf">search_scalar</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">,</span> <span class="nb">sorted</span><span class="p">):</span> <span class="sd">"""Do a binary search in this index for an item."""</span> <span class="n">tlen</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># Do the lookup for values fullfilling the conditions</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nslices</span><span class="p">):</span> <span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">)</span> <span class="o">=</span> <span class="nb">sorted</span><span class="o">.</span><span class="n">_search_bin</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">starts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">start</span> <span class="bp">self</span><span class="o">.</span><span class="n">lengths</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">stop</span> <span class="o">-</span> <span class="n">start</span> <span class="n">tlen</span> <span class="o">+=</span> <span class="n">stop</span> <span class="o">-</span> <span class="n">start</span> <span class="k">return</span> <span class="n">tlen</span> <span class="k">def</span> <span class="nf">search_last_row</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span> <span class="c"># Variable initialization</span> <span class="n">item1</span><span class="p">,</span> <span class="n">item2</span> <span class="o">=</span> <span class="n">item</span> <span class="n">bebounds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">bebounds</span> <span class="n">b0</span><span class="p">,</span> <span class="n">b1</span> <span class="o">=</span> <span class="n">bebounds</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">bebounds</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="n">bounds</span> <span class="o">=</span> <span class="n">bebounds</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="n">itemsize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">itemsize</span> <span class="n">sortedLRcache</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sortedLRcache</span> <span class="n">hi</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelementsSLR</span> <span class="c"># maximum number of elements</span> <span class="n">rchunksize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="n">nchunk</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="c"># Lookup for item1</span> <span class="k">if</span> <span class="n">item1</span> <span class="o">></span> <span class="n">b0</span><span class="p">:</span> <span class="k">if</span> <span class="n">item1</span> <span class="o"><=</span> <span class="n">b1</span><span class="p">:</span> <span class="c"># Search the appropriate chunk in bounds cache</span> <span class="n">nchunk</span> <span class="o">=</span> <span class="n">bisect_left</span><span class="p">(</span><span class="n">bounds</span><span class="p">,</span> <span class="n">item1</span><span class="p">)</span> <span class="c"># Lookup for this chunk in cache</span> <span class="n">nslot</span> <span class="o">=</span> <span class="n">sortedLRcache</span><span class="o">.</span><span class="n">getslot</span><span class="p">(</span><span class="n">nchunk</span><span class="p">)</span> <span class="k">if</span> <span class="n">nslot</span> <span class="o">>=</span> <span class="mi">0</span><span class="p">:</span> <span class="n">chunk</span> <span class="o">=</span> <span class="n">sortedLRcache</span><span class="o">.</span><span class="n">getitem</span><span class="p">(</span><span class="n">nslot</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="n">begin</span> <span class="o">=</span> <span class="n">rchunksize</span> <span class="o">*</span> <span class="n">nchunk</span> <span class="n">end</span> <span class="o">=</span> <span class="n">rchunksize</span> <span class="o">*</span> <span class="p">(</span><span class="n">nchunk</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="k">if</span> <span class="n">end</span> <span class="o">></span> <span class="n">hi</span><span class="p">:</span> <span class="n">end</span> <span class="o">=</span> <span class="n">hi</span> <span class="c"># Read the chunk from disk</span> <span class="n">chunk</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span><span class="o">.</span><span class="n">_read_sorted_slice</span><span class="p">(</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span><span class="p">,</span> <span class="n">begin</span><span class="p">,</span> <span class="n">end</span><span class="p">)</span> <span class="c"># Put it in cache. It's important to *copy*</span> <span class="c"># the buffer, as it is reused in future reads!</span> <span class="n">sortedLRcache</span><span class="o">.</span><span class="n">setitem</span><span class="p">(</span><span class="n">nchunk</span><span class="p">,</span> <span class="n">chunk</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="p">(</span><span class="n">end</span> <span class="o">-</span> <span class="n">begin</span><span class="p">)</span> <span class="o">*</span> <span class="n">itemsize</span><span class="p">)</span> <span class="n">start</span> <span class="o">=</span> <span class="n">bisect_left</span><span class="p">(</span><span class="n">chunk</span><span class="p">,</span> <span class="n">item1</span><span class="p">)</span> <span class="n">start</span> <span class="o">+=</span> <span class="n">rchunksize</span> <span class="o">*</span> <span class="n">nchunk</span> <span class="k">else</span><span class="p">:</span> <span class="n">start</span> <span class="o">=</span> <span class="n">hi</span> <span class="k">else</span><span class="p">:</span> <span class="n">start</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># Lookup for item2</span> <span class="k">if</span> <span class="n">item2</span> <span class="o">>=</span> <span class="n">b0</span><span class="p">:</span> <span class="k">if</span> <span class="n">item2</span> <span class="o"><</span> <span class="n">b1</span><span class="p">:</span> <span class="c"># Search the appropriate chunk in bounds cache</span> <span class="n">nchunk2</span> <span class="o">=</span> <span class="n">bisect_right</span><span class="p">(</span><span class="n">bounds</span><span class="p">,</span> <span class="n">item2</span><span class="p">)</span> <span class="k">if</span> <span class="n">nchunk2</span> <span class="o">!=</span> <span class="n">nchunk</span><span class="p">:</span> <span class="c"># Lookup for this chunk in cache</span> <span class="n">nslot</span> <span class="o">=</span> <span class="n">sortedLRcache</span><span class="o">.</span><span class="n">getslot</span><span class="p">(</span><span class="n">nchunk2</span><span class="p">)</span> <span class="k">if</span> <span class="n">nslot</span> <span class="o">>=</span> <span class="mi">0</span><span class="p">:</span> <span class="n">chunk</span> <span class="o">=</span> <span class="n">sortedLRcache</span><span class="o">.</span><span class="n">getitem</span><span class="p">(</span><span class="n">nslot</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="n">begin</span> <span class="o">=</span> <span class="n">rchunksize</span> <span class="o">*</span> <span class="n">nchunk2</span> <span class="n">end</span> <span class="o">=</span> <span class="n">rchunksize</span> <span class="o">*</span> <span class="p">(</span><span class="n">nchunk2</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="k">if</span> <span class="n">end</span> <span class="o">></span> <span class="n">hi</span><span class="p">:</span> <span class="n">end</span> <span class="o">=</span> <span class="n">hi</span> <span class="c"># Read the chunk from disk</span> <span class="n">chunk</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span><span class="o">.</span><span class="n">_read_sorted_slice</span><span class="p">(</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span><span class="p">,</span> <span class="n">begin</span><span class="p">,</span> <span class="n">end</span><span class="p">)</span> <span class="c"># Put it in cache. It's important to *copy*</span> <span class="c"># the buffer, as it is reused in future reads!</span> <span class="c"># See bug #60 in xot.carabos.com</span> <span class="n">sortedLRcache</span><span class="o">.</span><span class="n">setitem</span><span class="p">(</span><span class="n">nchunk2</span><span class="p">,</span> <span class="n">chunk</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="p">(</span><span class="n">end</span> <span class="o">-</span> <span class="n">begin</span><span class="p">)</span> <span class="o">*</span> <span class="n">itemsize</span><span class="p">)</span> <span class="n">stop</span> <span class="o">=</span> <span class="n">bisect_right</span><span class="p">(</span><span class="n">chunk</span><span class="p">,</span> <span class="n">item2</span><span class="p">)</span> <span class="n">stop</span> <span class="o">+=</span> <span class="n">rchunksize</span> <span class="o">*</span> <span class="n">nchunk2</span> <span class="k">else</span><span class="p">:</span> <span class="n">stop</span> <span class="o">=</span> <span class="n">hi</span> <span class="k">else</span><span class="p">:</span> <span class="n">stop</span> <span class="o">=</span> <span class="mi">0</span> <span class="k">return</span> <span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">)</span> <span class="n">searchLastRow</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">search_last_row</span><span class="p">)</span> <span class="k">def</span> <span class="nf">get_chunkmap</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="sd">"""Compute a map with the interesting chunks in index"""</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">tref</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Entering get_chunkmap"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="n">ss</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span> <span class="n">nsb</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslicesblock</span> <span class="n">nslices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nslices</span> <span class="n">lbucket</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lbucket</span> <span class="n">indsize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">indsize</span> <span class="n">bucketsinblock</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span><span class="p">)</span> <span class="o">/</span> <span class="n">lbucket</span> <span class="n">nchunks</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nelements</span><span class="p">)</span> <span class="o">/</span> <span class="n">lbucket</span><span class="p">))</span> <span class="n">chunkmap</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">nchunks</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s">"bool"</span><span class="p">)</span> <span class="n">reduction</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduction</span> <span class="n">starts</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">starts</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">reduction</span> <span class="o">+</span> <span class="mi">1</span> <span class="n">stops</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">starts</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">lengths</span><span class="p">)</span> <span class="o">*</span> <span class="n">reduction</span> <span class="n">starts</span><span class="p">[</span><span class="n">starts</span> <span class="o"><</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span> <span class="c"># All negative values set to zero</span> <span class="n">indices</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">indices</span> <span class="k">for</span> <span class="n">nslice</span> <span class="ow">in</span> <span class="nb">xrange</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nrows</span><span class="p">):</span> <span class="n">start</span> <span class="o">=</span> <span class="n">starts</span><span class="p">[</span><span class="n">nslice</span><span class="p">]</span> <span class="n">stop</span> <span class="o">=</span> <span class="n">stops</span><span class="p">[</span><span class="n">nslice</span><span class="p">]</span> <span class="k">if</span> <span class="n">stop</span> <span class="o">></span> <span class="n">start</span><span class="p">:</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">stop</span> <span class="o">-</span> <span class="n">start</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s">'u</span><span class="si">%d</span><span class="s">'</span> <span class="o">%</span> <span class="n">indsize</span><span class="p">)</span> <span class="k">if</span> <span class="n">nslice</span> <span class="o"><</span> <span class="n">nslices</span><span class="p">:</span> <span class="n">indices</span><span class="o">.</span><span class="n">_read_index_slice</span><span class="p">(</span><span class="n">nslice</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">,</span> <span class="n">idx</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">indicesLR</span><span class="o">.</span><span class="n">_read_index_slice</span><span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">stop</span><span class="p">,</span> <span class="n">idx</span><span class="p">)</span> <span class="k">if</span> <span class="n">indsize</span> <span class="o">==</span> <span class="mi">8</span><span class="p">:</span> <span class="n">idx</span> <span class="o">//=</span> <span class="n">lbucket</span> <span class="k">elif</span> <span class="n">indsize</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> <span class="c"># The chunkmap size cannot be never larger than 'int_'</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">idx</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s">"int_"</span><span class="p">)</span> <span class="n">offset</span> <span class="o">=</span> <span class="nb">long</span><span class="p">((</span><span class="n">nslice</span> <span class="o">//</span> <span class="n">nsb</span><span class="p">)</span> <span class="o">*</span> <span class="n">bucketsinblock</span><span class="p">)</span> <span class="n">idx</span> <span class="o">+=</span> <span class="n">offset</span> <span class="k">elif</span> <span class="n">indsize</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> <span class="c"># The chunkmap size cannot be never larger than 'int_'</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">idx</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s">"int_"</span><span class="p">)</span> <span class="n">offset</span> <span class="o">=</span> <span class="p">(</span><span class="n">nslice</span> <span class="o">*</span> <span class="n">ss</span><span class="p">)</span> <span class="o">//</span> <span class="n">lbucket</span> <span class="n">idx</span> <span class="o">+=</span> <span class="n">offset</span> <span class="n">chunkmap</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span> <span class="o">=</span> <span class="bp">True</span> <span class="c"># The case lbucket < nrowsinchunk should only happen in tests</span> <span class="n">nrowsinchunk</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nrowsinchunk</span> <span class="k">if</span> <span class="n">lbucket</span> <span class="o">!=</span> <span class="n">nrowsinchunk</span><span class="p">:</span> <span class="c"># Map the 'coarse grain' chunkmap into the 'true' chunkmap</span> <span class="n">nelements</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span> <span class="n">tnchunks</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="n">nelements</span><span class="p">)</span> <span class="o">/</span> <span class="n">nrowsinchunk</span><span class="p">))</span> <span class="n">tchunkmap</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">tnchunks</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s">"bool"</span><span class="p">)</span> <span class="n">ratio</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">lbucket</span><span class="p">)</span> <span class="o">/</span> <span class="n">nrowsinchunk</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">chunkmap</span><span class="o">.</span><span class="n">nonzero</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span> <span class="n">starts</span> <span class="o">=</span> <span class="p">(</span><span class="n">idx</span> <span class="o">*</span> <span class="n">ratio</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s">'int_'</span><span class="p">)</span> <span class="n">stops</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">ceil</span><span class="p">((</span><span class="n">idx</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">ratio</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s">'int_'</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">idx</span><span class="p">)):</span> <span class="n">tchunkmap</span><span class="p">[</span><span class="n">starts</span><span class="p">[</span><span class="n">i</span><span class="p">]:</span><span class="n">stops</span><span class="p">[</span><span class="n">i</span><span class="p">]]</span> <span class="o">=</span> <span class="bp">True</span> <span class="n">chunkmap</span> <span class="o">=</span> <span class="n">tchunkmap</span> <span class="k">if</span> <span class="n">profile</span><span class="p">:</span> <span class="n">show_stats</span><span class="p">(</span><span class="s">"Exiting get_chunkmap"</span><span class="p">,</span> <span class="n">tref</span><span class="p">)</span> <span class="k">return</span> <span class="n">chunkmap</span> <span class="k">def</span> <span class="nf">get_lookup_range</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ops</span><span class="p">,</span> <span class="n">limits</span><span class="p">):</span> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">ops</span><span class="p">)</span> <span class="ow">in</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">limits</span><span class="p">)</span> <span class="ow">in</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">ops</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">limits</span><span class="p">)</span> <span class="n">column</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">column</span> <span class="n">coldtype</span> <span class="o">=</span> <span class="n">column</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">base</span> <span class="n">itemsize</span> <span class="o">=</span> <span class="n">coldtype</span><span class="o">.</span><span class="n">itemsize</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">limits</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span> <span class="k">assert</span> <span class="n">ops</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">in</span> <span class="p">[</span><span class="s">'lt'</span><span class="p">,</span> <span class="s">'le'</span><span class="p">,</span> <span class="s">'eq'</span><span class="p">,</span> <span class="s">'ge'</span><span class="p">,</span> <span class="s">'gt'</span><span class="p">]</span> <span class="n">limit</span> <span class="o">=</span> <span class="n">limits</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="n">op</span> <span class="o">=</span> <span class="n">ops</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">if</span> <span class="n">op</span> <span class="o">==</span> <span class="s">'lt'</span><span class="p">:</span> <span class="n">range_</span> <span class="o">=</span> <span class="p">(</span><span class="n">inftype</span><span class="p">(</span><span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">,</span> <span class="n">sign</span><span class="o">=-</span><span class="mi">1</span><span class="p">),</span> <span class="n">nextafter</span><span class="p">(</span><span class="n">limit</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">))</span> <span class="k">elif</span> <span class="n">op</span> <span class="o">==</span> <span class="s">'le'</span><span class="p">:</span> <span class="n">range_</span> <span class="o">=</span> <span class="p">(</span><span class="n">inftype</span><span class="p">(</span><span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">,</span> <span class="n">sign</span><span class="o">=-</span><span class="mi">1</span><span class="p">),</span> <span class="n">limit</span><span class="p">)</span> <span class="k">elif</span> <span class="n">op</span> <span class="o">==</span> <span class="s">'gt'</span><span class="p">:</span> <span class="n">range_</span> <span class="o">=</span> <span class="p">(</span><span class="n">nextafter</span><span class="p">(</span><span class="n">limit</span><span class="p">,</span> <span class="o">+</span><span class="mi">1</span><span class="p">,</span> <span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">),</span> <span class="n">inftype</span><span class="p">(</span><span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">,</span> <span class="n">sign</span><span class="o">=+</span><span class="mi">1</span><span class="p">))</span> <span class="k">elif</span> <span class="n">op</span> <span class="o">==</span> <span class="s">'ge'</span><span class="p">:</span> <span class="n">range_</span> <span class="o">=</span> <span class="p">(</span><span class="n">limit</span><span class="p">,</span> <span class="n">inftype</span><span class="p">(</span><span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">,</span> <span class="n">sign</span><span class="o">=+</span><span class="mi">1</span><span class="p">))</span> <span class="k">elif</span> <span class="n">op</span> <span class="o">==</span> <span class="s">'eq'</span><span class="p">:</span> <span class="n">range_</span> <span class="o">=</span> <span class="p">(</span><span class="n">limit</span><span class="p">,</span> <span class="n">limit</span><span class="p">)</span> <span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">limits</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> <span class="k">assert</span> <span class="n">ops</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">in</span> <span class="p">(</span><span class="s">'gt'</span><span class="p">,</span> <span class="s">'ge'</span><span class="p">)</span> <span class="ow">and</span> <span class="n">ops</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">in</span> <span class="p">(</span><span class="s">'lt'</span><span class="p">,</span> <span class="s">'le'</span><span class="p">)</span> <span class="n">lower</span><span class="p">,</span> <span class="n">upper</span> <span class="o">=</span> <span class="n">limits</span> <span class="k">if</span> <span class="n">lower</span> <span class="o">></span> <span class="n">upper</span><span class="p">:</span> <span class="c"># ``a <[=] x <[=] b`` is always false if ``a > b``.</span> <span class="k">return</span> <span class="p">()</span> <span class="k">if</span> <span class="n">ops</span> <span class="o">==</span> <span class="p">(</span><span class="s">'gt'</span><span class="p">,</span> <span class="s">'lt'</span><span class="p">):</span> <span class="c"># lower < col < upper</span> <span class="n">range_</span> <span class="o">=</span> <span class="p">(</span><span class="n">nextafter</span><span class="p">(</span><span class="n">lower</span><span class="p">,</span> <span class="o">+</span><span class="mi">1</span><span class="p">,</span> <span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">),</span> <span class="n">nextafter</span><span class="p">(</span><span class="n">upper</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">))</span> <span class="k">elif</span> <span class="n">ops</span> <span class="o">==</span> <span class="p">(</span><span class="s">'ge'</span><span class="p">,</span> <span class="s">'lt'</span><span class="p">):</span> <span class="c"># lower <= col < upper</span> <span class="n">range_</span> <span class="o">=</span> <span class="p">(</span><span class="n">lower</span><span class="p">,</span> <span class="n">nextafter</span><span class="p">(</span><span class="n">upper</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">))</span> <span class="k">elif</span> <span class="n">ops</span> <span class="o">==</span> <span class="p">(</span><span class="s">'gt'</span><span class="p">,</span> <span class="s">'le'</span><span class="p">):</span> <span class="c"># lower < col <= upper</span> <span class="n">range_</span> <span class="o">=</span> <span class="p">(</span><span class="n">nextafter</span><span class="p">(</span><span class="n">lower</span><span class="p">,</span> <span class="o">+</span><span class="mi">1</span><span class="p">,</span> <span class="n">coldtype</span><span class="p">,</span> <span class="n">itemsize</span><span class="p">),</span> <span class="n">upper</span><span class="p">)</span> <span class="k">elif</span> <span class="n">ops</span> <span class="o">==</span> <span class="p">(</span><span class="s">'ge'</span><span class="p">,</span> <span class="s">'le'</span><span class="p">):</span> <span class="c"># lower <= col <= upper</span> <span class="n">range_</span> <span class="o">=</span> <span class="p">(</span><span class="n">lower</span><span class="p">,</span> <span class="n">upper</span><span class="p">)</span> <span class="k">return</span> <span class="n">range_</span> <span class="n">getLookupRange</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">get_lookup_range</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_f_remove</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">recursive</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span> <span class="sd">"""Remove this Index object"""</span> <span class="c"># Index removal is always recursive,</span> <span class="c"># no matter what `recursive` says.</span> <span class="nb">super</span><span class="p">(</span><span class="n">Index</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">_f_remove</span><span class="p">(</span><span class="bp">True</span><span class="p">)</span> <span class="k">def</span> <span class="nf">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="sd">"""This provides a more compact representation than __repr__"""</span> <span class="c"># The filters</span> <span class="n">filters</span> <span class="o">=</span> <span class="s">""</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">filters</span><span class="o">.</span><span class="n">complevel</span><span class="p">:</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">filters</span><span class="o">.</span><span class="n">shuffle</span><span class="p">:</span> <span class="n">filters</span> <span class="o">+=</span> <span class="s">", shuffle"</span> <span class="n">filters</span> <span class="o">+=</span> <span class="s">", </span><span class="si">%s</span><span class="s">(</span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">filters</span><span class="o">.</span><span class="n">complib</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">filters</span><span class="o">.</span><span class="n">complevel</span><span class="p">)</span> <span class="k">return</span> <span class="s">"Index(</span><span class="si">%s</span><span class="s">, </span><span class="si">%s%s</span><span class="s">).is_csi=</span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> \ <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">kind</span><span class="p">,</span> <span class="n">filters</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_csi</span><span class="p">)</span> <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="sd">"""This provides more metainfo than standard __repr__"""</span> <span class="n">cpathname</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="o">.</span><span class="n">_v_pathname</span> <span class="o">+</span> <span class="s">".cols."</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">pathname</span> <span class="n">retstr</span> <span class="o">=</span> <span class="s">"""</span><span class="si">%s</span><span class="s"> (Index for column </span><span class="si">%s</span><span class="s">)</span> <span class="s"> optlevel := </span><span class="si">%s</span><span class="s"></span> <span class="s"> kind := </span><span class="si">%s</span><span class="s"></span> <span class="s"> filters := </span><span class="si">%s</span><span class="s"></span> <span class="s"> is_csi := </span><span class="si">%s</span><span class="s"></span> <span class="s"> nelements := </span><span class="si">%s</span><span class="s"></span> <span class="s"> chunksize := </span><span class="si">%s</span><span class="s"></span> <span class="s"> slicesize := </span><span class="si">%s</span><span class="s"></span> <span class="s"> blocksize := </span><span class="si">%s</span><span class="s"></span> <span class="s"> superblocksize := </span><span class="si">%s</span><span class="s"></span> <span class="s"> filters := </span><span class="si">%s</span><span class="s"></span> <span class="s"> dirty := </span><span class="si">%s</span><span class="s"></span> <span class="s"> byteorder := </span><span class="si">%r</span><span class="s">"""</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_v_pathname</span><span class="p">,</span> <span class="n">cpathname</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">optlevel</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">kind</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">filters</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_csi</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">nelements</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">chunksize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">slicesize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">blocksize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">superblocksize</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">filters</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dirty</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">byteorder</span><span class="p">)</span> <span class="n">retstr</span> <span class="o">+=</span> <span class="s">"</span><span class="se">\n</span><span class="s"> sorted := </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">sorted</span> <span class="n">retstr</span> <span class="o">+=</span> <span class="s">"</span><span class="se">\n</span><span class="s"> indices := </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">indices</span> <span class="n">retstr</span> <span class="o">+=</span> <span class="s">"</span><span class="se">\n</span><span class="s"> ranges := </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">ranges</span> <span class="n">retstr</span> <span class="o">+=</span> <span class="s">"</span><span class="se">\n</span><span class="s"> bounds := </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">bounds</span> <span class="n">retstr</span> <span class="o">+=</span> <span class="s">"</span><span class="se">\n</span><span class="s"> sortedLR := </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">sortedLR</span> <span class="n">retstr</span> <span class="o">+=</span> <span class="s">"</span><span class="se">\n</span><span class="s"> indicesLR := </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">indicesLR</span> <span class="k">return</span> <span class="n">retstr</span> </div> <span class="k">class</span> <span class="nc">IndexesDescG</span><span class="p">(</span><span class="n">NotLoggedMixin</span><span class="p">,</span> <span class="n">Group</span><span class="p">):</span> <span class="n">_c_classid</span> <span class="o">=</span> <span class="s">'DINDEX'</span> <span class="n">_c_classId</span> <span class="o">=</span> <span class="n">previous_api_property</span><span class="p">(</span><span class="s">'_c_classid'</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_g_width_warning</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> <span class="s">"the number of indexed columns on a single description group "</span> <span class="s">"is exceeding the recommended maximum (</span><span class="si">%d</span><span class="s">); "</span> <span class="s">"be ready to see PyTables asking for *lots* of memory "</span> <span class="s">"and possibly slow I/O"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_max_group_width</span><span class="p">,</span> <span class="n">PerformanceWarning</span><span class="p">)</span> <span class="n">_g_widthWarning</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">_g_width_warning</span><span class="p">)</span> <span class="k">class</span> <span class="nc">IndexesTableG</span><span class="p">(</span><span class="n">NotLoggedMixin</span><span class="p">,</span> <span class="n">Group</span><span class="p">):</span> <span class="n">_c_classid</span> <span class="o">=</span> <span class="s">'TINDEX'</span> <span class="n">_c_classId</span> <span class="o">=</span> <span class="n">previous_api_property</span><span class="p">(</span><span class="s">'_c_classid'</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_getauto</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="k">if</span> <span class="s">'AUTO_INDEX'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="p">:</span> <span class="k">return</span> <span class="n">default_auto_index</span> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">AUTO_INDEX</span> <span class="k">def</span> <span class="nf">_setauto</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">auto</span><span class="p">):</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">AUTO_INDEX</span> <span class="o">=</span> <span class="nb">bool</span><span class="p">(</span><span class="n">auto</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_delauto</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_attrs</span><span class="o">.</span><span class="n">AUTO_INDEX</span> <span class="n">auto</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">_getauto</span><span class="p">,</span> <span class="n">_setauto</span><span class="p">,</span> <span class="n">_delauto</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_g_width_warning</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> <span class="s">"the number of indexed columns on a single table "</span> <span class="s">"is exceeding the recommended maximum (</span><span class="si">%d</span><span class="s">); "</span> <span class="s">"be ready to see PyTables asking for *lots* of memory "</span> <span class="s">"and possibly slow I/O"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_max_group_width</span><span class="p">,</span> <span class="n">PerformanceWarning</span><span class="p">)</span> <span class="n">_g_widthWarning</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">_g_width_warning</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_g_check_name</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s">'_i_'</span><span class="p">):</span> <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> <span class="s">"names of index groups must start with ``_i_``: </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="n">name</span><span class="p">)</span> <span class="n">_g_checkName</span> <span class="o">=</span> <span class="n">previous_api</span><span class="p">(</span><span class="n">_g_check_name</span><span class="p">)</span> <span class="k">def</span> <span class="nf">_gettable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> <span class="n">names</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_pathname</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s">"/"</span><span class="p">)</span> <span class="n">tablename</span> <span class="o">=</span> <span class="n">names</span><span class="o">.</span><span class="n">pop</span><span class="p">()[</span><span class="mi">3</span><span class="p">:]</span> <span class="c"># "_i_" is at the beginning</span> <span class="n">parentpathname</span> <span class="o">=</span> <span class="s">"/"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">names</span><span class="p">)</span> <span class="n">tablepathname</span> <span class="o">=</span> <span class="n">join_path</span><span class="p">(</span><span class="n">parentpathname</span><span class="p">,</span> <span class="n">tablename</span><span class="p">)</span> <span class="n">table</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_v_file</span><span class="o">.</span><span class="n">_get_node</span><span class="p">(</span><span class="n">tablepathname</span><span class="p">)</span> <span class="k">return</span> <span class="n">table</span> <span class="n">table</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span> <span class="n">_gettable</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">,</span> <span class="s">"Accessor for the `Table` object of this `IndexesTableG` container."</span><span class="p">)</span> <span class="k">class</span> <span class="nc">OldIndex</span><span class="p">(</span><span class="n">NotLoggedMixin</span><span class="p">,</span> <span class="n">Group</span><span class="p">):</span> <span class="sd">"""This is meant to hide indexes of PyTables 1.x files."""</span> <span class="n">_c_classid</span> <span class="o">=</span> <span class="s">'CINDEX'</span> <span class="n">_c_classId</span> <span class="o">=</span> <span class="n">previous_api_property</span><span class="p">(</span><span class="s">'_c_classid'</span><span class="p">)</span> <span class="c">## Local Variables:</span> <span class="c">## mode: python</span> <span class="c">## py-indent-offset: 4</span> <span class="c">## tab-width: 4</span> <span class="c">## fill-column: 72</span> <span class="c">## End:</span> </pre></div> </div> </div> </div> <div class="sphinxsidebar"> <div class="sphinxsidebarwrapper"> <p class="logo"><a href="../../index.html"> <img class="logo" src="../../_static/logo-pytables-small.png" alt="Logo"/> </a></p> <div id="searchbox" style="display: none"> <h3>Quick search</h3> <form class="search" action="../../search.html" method="get"> <input type="text" name="q" /> <input type="submit" value="Go" /> <input type="hidden" name="check_keywords" value="yes" /> <input type="hidden" name="area" value="default" /> </form> <p class="searchtip" style="font-size: 90%"> Enter search terms or a module, class or function name. </p> </div> <script type="text/javascript">$('#searchbox').show(0);</script> </div> </div> <div class="clearer"></div> </div> <div class="relbar-bottom"> <div class="related"> <h3>Navigation</h3> <ul> <li class="right" style="margin-right: 10px"> <a href="../../genindex.html" title="General Index" >index</a></li> <li class="right" > <a href="../../py-modindex.html" title="Python Module Index" >modules</a> </li> <li class="right" > <a href="../../np-modindex.html" title="Python Module Index" >modules</a> </li> <li><a href="../../index.html">PyTables 3.0.0 documentation</a> »</li> <li><a href="../index.html" >Module code</a> »</li> <li><a href="../tables.html" >tables</a> »</li> </ul> </div> </div> <div class="footer"> © Copyright 2011-2013, PyTables maintainers. Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3. </div> <!-- cloud_sptheme 1.3 --> </body> </html>