Sophie

Sophie

distrib > Fedora > 14 > x86_64 > media > updates > by-pkgid > 71d40963b505df4524269198e237b3e3 > files > 764

virtuoso-opensource-doc-6.1.4-2.fc14.noarch.rpm

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
 <head profile="http://internetalchemy.org/2003/02/profile">
  <link rel="foaf" type="application/rdf+xml" title="FOAF" href="http://www.openlinksw.com/dataspace/uda/about.rdf" />
  <link rel="schema.dc" href="http://purl.org/dc/elements/1.1/" />
  <meta name="dc.title" content="18. Free Text Search" />
  <meta name="dc.subject" content="18. Free Text Search" />
  <meta name="dc.creator" content="OpenLink Software Documentation Team ;&#10;" />
  <meta name="dc.copyright" content="OpenLink Software, 1999 - 2009" />
  <link rel="top" href="index.html" title="OpenLink Virtuoso Universal Server: Documentation" />
  <link rel="search" href="/doc/adv_search.vspx" title="Search OpenLink Virtuoso Universal Server: Documentation" />
  <link rel="parent" href="freetext.html" title="Chapter Contents" />
  <link rel="prev" href="internetservices.html" title="Internet Services" />
  <link rel="next" href="txtidxquickstart.html" title="Basic Concepts" />
  <link rel="shortcut icon" href="../images/misc/favicon.ico" type="image/x-icon" />
  <link rel="stylesheet" type="text/css" href="doc.css" />
  <link rel="stylesheet" type="text/css" href="/doc/translation.css" />
  <title>18. Free Text Search</title>
  <meta http-equiv="Content-Type" content="text/xhtml; charset=UTF-8" />
  <meta name="author" content="OpenLink Software Documentation Team ;&#10;" />
  <meta name="copyright" content="OpenLink Software, 1999 - 2009" />
  <meta name="keywords" content="" />
  <meta name="GENERATOR" content="OpenLink XSLT Team" />
 </head>
 <body>
  <div id="header">
    <a name="freetext" />
    <img src="../images/misc/logo.jpg" alt="" />
    <h1>18. Free Text Search</h1>
  </div>
  <div id="navbartop">
   <div>
      <a class="link" href="freetext.html">Chapter Contents</a> | <a class="link" href="ldap.html" title="LDAP">Prev</a> | <a class="link" href="txtidxquickstart.html" title="Basic Concepts">Next</a>
   </div>
  </div>
  <div id="currenttoc">
   <form method="post" action="/doc/adv_search.vspx">
    <div class="search">Keyword Search: <br />
        <input type="text" name="q" /> <input type="submit" name="go" value="Go" />
    </div>
   </form>
   <div>
      <a href="http://www.openlinksw.com/">www.openlinksw.com</a>
   </div>
   <div>
      <a href="http://docs.openlinksw.com/">docs.openlinksw.com</a>
   </div>
    <br />
   <div>
      <a href="index.html">Book Home</a>
   </div>
    <br />
   <div>
      <a href="contents.html">Contents</a>
   </div>
   <div>
      <a href="preface.html">Preface</a>
   </div>
    <br />
   <div>
      <a href="overview.html">Overview</a>
   </div>
   <div>
      <a href="installation.html">Installation Guide</a>
   </div>
   <div>
      <a href="quicktours.html">Quick Start &amp; Tours</a>
   </div>
   <div>
      <a href="sampleapps.html">Sample ODBC &amp; JDBC Applications</a>
   </div>
   <div>
      <a href="concepts.html">Conceptual Overview</a>
   </div>
   <div>
      <a href="server.html">Administration</a>
   </div>
   <div>
      <a href="accessinterfaces.html">Data Access Interfaces</a>
   </div>
   <div>
      <a href="sqlreference.html">SQL Reference</a>
   </div>
   <div>
      <a href="sqlprocedures.html">SQL Procedure Language Guide</a>
   </div>
   <div>
      <a href="hooks.html">Database Event Hooks</a>
   </div>
   <div>
      <a href="repl.html">Data Replication, Synchronization and Transformation Services</a>
   </div>
   <div>
      <a href="webappdevelopment.html">Web Application Development</a>
   </div>
   <div>
      <a href="webandxml.html">XML Support</a>
   </div>
   <div>
      <a href="rdfandsparql.html">RDF Data Access and Data Management</a>
   </div>
   <div>
      <a href="webservices.html">Web Services</a>
   </div>
   <div>
      <a href="runtimehosting.html">Runtime Hosting</a>
   </div>
   <div>
      <a href="internetservices.html">Internet Services</a>
   </div>
   <div class="selected">
      <a href="freetext.html">Free Text Search</a>
    <div>
        <a href="txtidxquickstart.html" title="Basic Concepts">Basic Concepts</a>
    </div>
    <div>
        <a href="creatingtxtidxs.html" title="Creating Free Text Indexes">Creating Free Text Indexes</a>
    </div>
    <div>
        <a href="queryingftcols.html" title="Querying Free Text Indexes">Querying Free Text Indexes</a>
    </div>
    <div>
        <a href="txttrig.html" title="Text Triggers">Text Triggers</a>
    </div>
    <div>
        <a href="tablesandinternals.html" title="Generated Tables and Internals">Generated Tables and Internals</a>
    </div>
    <div>
        <a href="droptxtindex.html" title="Removing A Text Index">Removing A Text Index</a>
    </div>
    <div>
        <a href="droptxttrig.html" title="Removing A Text Trigger">Removing A Text Trigger</a>
    </div>
    <div>
        <a href="ftinternationalization.html" title="Internationalization &amp; Unicode">Internationalization &amp; Unicode</a>
    </div>
    <div>
        <a href="ftperformance.html" title="Performance">Performance</a>
    </div>
    <div>
        <a href="fttfuncs.html" title="Free Text Functions">Free Text Functions</a>
    </div>
   </div>
   <div>
      <a href="tpcc.html">TPC C Benchmark Kit</a>
   </div>
   <div>
      <a href="xa.html">Using Virtuoso with Tuxedo</a>
   </div>
   <div>
      <a href="appendixa.html">Appendix</a>
   </div>
   <div>
      <a href="functions.html">Virtuoso Functions Guide</a>
   </div>
    <br />
  </div>
  <div id="text">
   <div class="abstract">
      <h2>Abstract</h2>
      <p>
Virtuoso provides a compact and efficient free text indexing capability
for text and XML data.  A free text index can be created on any character
column, including wide and long data.
	</p>
      <p>
The <a href="queryingftcols.html#containspredicate">contains</a> SQL predicate allows content
based retrieval of textual data.
This predicate takes a column and a text expression and is true if the
pattern of words in the text
expression occurs in the column value.  There must exist a previously
created text index of the column.  The text expression can contain single words
and phrases connected by boolean connectives or the proximity
operator.  Words can contain wildcards but must begin with at least three
non-wildcard characters if a wildcard is to be used.  While it is enough to
declare a free text index on a column and then just use the contains predicate
for many applications, Virtuoso offers a range of options for tailoring
how the indexing works.
	</p>
      <p>
If a certain application specific order of search results is desired more
frequently than others, it is possible to specify a single or multipart key
in the order of which hits will be returned from contains searches.  Both
ascending and descending order of the key is supported.  To restart a search
in the middle it is possible to specify a starting and ending key value.  This
works if the results are generated in the order of the 
<a href="creatingtxtidxs.html#appspecificdocid">application specific doc ID</a>.
	</p>
      <p>
If non-text criteria are often used to filter or sort results of contains
searches, it is possible to cluster these non-text data inside the free text
index for faster retrieval.  It is often substantially faster to retrieve the
extra data from inside the text index than to get them from the row referenced
by the text index. Such data are called <a href="creatingtxtidxs.html#offbanddata">offband data</a>,
since they are not actually text but are stored similarly to text.
	</p>
      <p>
It is possible to pre-process the text before it is indexed or unindexed.
This feature can be used for data normalization
and/or for adding content from other than the primary text field being indexed
into the index.  One example is adding the names of all newsgroups where an
article appears to the index when indexing a news article.  Thus when retrieving
articles based on text and newsgroup, group can be used to very efficiently
filter out the hits that are not in the group, even if the text indexed does
not itself contain the group name.  Another application of the same technique
is adding text from multiple columns into the same index.
	</p>
      <p>
If the column being indexed is XML data, this can be declared and enforced
by the text index.  XML data will be indexed specially to support efficient
XPATH predicate evaluation with the <a href="queryingxmldata.html#xcontainspredicate">xcontains</a> predicate.
	</p>
      <p>
<a href="txttrig.html">Text Triggers</a> is a feature that allows the
storage of a large body of free text queries and automatically generating hits
when documents matching the criteria are added to the index.  This is useful
for personalized data feeds, user profiles, content classification etc, which
Virtuoso can send the results to in an email message.  The
conditions can be either free text expressions or XPATH expressions for XML content.
	</p>
      <p>
The text index can be kept synchronous with the data being indexed, so
that the index is updated in the same transaction as the data.  The other
possibility is to maintain the text index asynchronously as a scheduled task (batch mode),
which can execute up to an order of magnitude faster.  The asynchronous
mode of operation offers substantially higher performance if changes of multiple
entries are processed in one batch index refresh.
	</p>
   </div>
    <h2>Table of Contents</h2>
   <div class="minitoc">
    <div>
        <a class="sect1" href="txtidxquickstart.html">18.1. Basic Concepts</a>
    </div>
    <div>
        <a class="sect1" href="creatingtxtidxs.html">18.2. Creating Free Text Indexes</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#createtxtidxstmt">18.2.1. The CREATE TEXT INDEX statement</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#appspecificdocid">18.2.2. Choosing An Application Specific Document ID</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#compositedatatype">18.2.3. The composite Data Type</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#fttexamples">18.2.4. Free Text Index Examples</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#preprocessingandext">18.2.5. Pre-processing and Extending the Content Being Indexed</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#hitscores">18.2.6. Hit Scores</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#wordranges">18.2.7. Word Ranges</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#offbanddata">18.2.8. Using Offband Data for Faster Filtering</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#orderofhits">18.2.9. Order of Hits</a>
    </div>
    <div>
        <a class="sect2" href="creatingtxtidxs.html#noisewords">18.2.10. Noise Words</a>
    </div>
    <div>
        <a class="sect1" href="queryingftcols.html">18.3. Querying Free Text Indexes</a>
    </div>
    <div>
        <a class="sect2" href="queryingftcols.html#containspredicate">18.3.1. CONTAINS predicate</a>
    </div>
    <div>
        <a class="sect2" href="queryingftcols.html#fttcomments">18.3.2. Comments</a>
    </div>
    <div>
        <a class="sect2" href="queryingftcols.html#textexprsyntax">18.3.3. Text Expression Syntax</a>
    </div>
    <div>
        <a class="sect1" href="txttrig.html">18.4. Text Triggers</a>
    </div>
    <div>
        <a class="sect2" href="txttrig.html#createtxttrg">18.4.1. Creating Text Triggers</a>
    </div>
    <div>
        <a class="sect2" href="txttrig.html#createddbobjs">18.4.2. Created Database Objects</a>
    </div>
    <div>
        <a class="sect1" href="tablesandinternals.html">18.5. Generated Tables and Internals</a>
    </div>
    <div>
        <a class="sect2" href="tablesandinternals.html#gentabsaprocs">18.5.1. Generated Tables and Procedures </a>
    </div>
    <div>
        <a class="sect2" href="tablesandinternals.html#procs">18.5.2. The procedures are:</a>
    </div>
    <div>
        <a class="sect2" href="tablesandinternals.html#fttrigtblsandprocs">18.5.3. Tables and Procedures Created By  Text Triggers</a>
    </div>
    <div>
        <a class="sect1" href="droptxtindex.html">18.6. Removing A Text Index</a>
    </div>
    <div>
        <a class="sect1" href="droptxttrig.html">18.7. Removing A Text Trigger</a>
    </div>
    <div>
        <a class="sect1" href="ftinternationalization.html">18.8. Internationalization &amp; Unicode</a>
    </div>
    <div>
        <a class="sect1" href="ftperformance.html">18.9. Performance</a>
    </div>
    <div>
        <a class="sect2" href="ftperformance.html#restrictions">18.9.1. Restrictions</a>
    </div>
    <div>
        <a class="sect1" href="fttfuncs.html">18.10. Free Text Functions</a>
    </div>
   </div>
    <br />
   <table border="0" width="90%" id="navbarbottom">
    <tr>
        <td align="left" width="33%">
          <a href="ldap.html" title="LDAP">Previous</a>
          <br />LDAP</td>
     <td align="center" width="34%">
          <a href="freetext.html">Chapter Contents</a>
     </td>
        <td align="right" width="33%">
          <a href="txtidxquickstart.html" title="Basic Concepts">Next</a>
          <br />Basic Concepts</td>
    </tr>
   </table>
  </div>
  <div id="footer">
    <div>Copyright© 1999 - 2009 OpenLink Software All rights reserved.</div>
   <div id="validation">
    <a href="http://validator.w3.org/check/referer">
        <img src="http://www.w3.org/Icons/valid-xhtml10" alt="Valid XHTML 1.0!" height="31" width="88" />
    </a>
    <a href="http://jigsaw.w3.org/css-validator/">
        <img src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!" height="31" width="88" />
    </a>
   </div>
  </div>
 </body>
</html>