Sophie

Sophie

distrib > Fedora > 15 > i386 > by-pkgid > 41ccc3b206824bb136a22fdbd1e55090 > files > 251

fontaine-0-3.svn39.fc15.i686.rpm

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<title>FONTAINE: Utf8String.h Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<link href="doxygen.css" rel="stylesheet" type="text/css"/>
</head>
<body>
<!-- Generated by Doxygen 1.7.3 -->
<div id="top">
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
 <tbody>
 <tr style="height: 56px;">
  <td style="padding-left: 0.5em;">
   <div id="projectname">FONTAINE&#160;<span id="projectnumber">1.0</span></div>
  </td>
 </tr>
 </tbody>
</table>
</div>
  <div id="navrow1" class="tabs">
    <ul class="tablist">
      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
      <li><a href="namespaces.html"><span>Namespaces</span></a></li>
      <li><a href="annotated.html"><span>Classes</span></a></li>
      <li class="current"><a href="files.html"><span>Files</span></a></li>
    </ul>
  </div>
  <div id="navrow2" class="tabs2">
    <ul class="tablist">
      <li><a href="files.html"><span>File&#160;List</span></a></li>
      <li><a href="globals.html"><span>File&#160;Members</span></a></li>
    </ul>
  </div>
<div class="header">
  <div class="headertitle">
<h1>Utf8String.h</h1>  </div>
</div>
<div class="contents">
<a href="Utf8String_8h.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">//</span>
<a name="l00002"></a>00002 <span class="comment">// The Fontaine Font Analysis Project </span>
<a name="l00003"></a>00003 <span class="comment">// </span>
<a name="l00004"></a>00004 <span class="comment">// Copyright (c) 2009 by Edward H. Trager</span>
<a name="l00005"></a>00005 <span class="comment">// All Rights Reserved</span>
<a name="l00006"></a>00006 <span class="comment">// </span>
<a name="l00007"></a>00007 <span class="comment">// Released under the GNU GPL version 2.0 or later.</span>
<a name="l00008"></a>00008 <span class="comment">//     </span>
<a name="l00009"></a>00009 
<a name="l00010"></a>00010 
<a name="l00012"></a>00012 <span class="comment">//</span>
<a name="l00013"></a>00013 <span class="comment">// This file was originally part of the MADELINE 2 program </span>
<a name="l00014"></a>00014 <span class="comment">// written by Edward H. Trager and Ritu Khanna</span>
<a name="l00015"></a>00015 <span class="comment">// Copyright (c) 2005 by the</span>
<a name="l00016"></a>00016 <span class="comment">// Regents of the University of Michigan.</span>
<a name="l00017"></a>00017 <span class="comment">// All Rights Reserved.</span>
<a name="l00018"></a>00018 <span class="comment">// Released under the GNU General Public License v. 2.0 or later.</span>
<a name="l00019"></a>00019 <span class="comment">// </span>
<a name="l00021"></a>00021 <span class="comment"></span><span class="comment">//</span>
<a name="l00022"></a>00022 <span class="comment">// utf8String.h</span>
<a name="l00023"></a>00023 <span class="comment">//</span>
<a name="l00024"></a>00024 <span class="comment">// (c) 2006 by Edward H. Trager</span>
<a name="l00025"></a>00025 <span class="comment">// released under the GNU General Public License</span>
<a name="l00026"></a>00026 <span class="comment">// </span>
<a name="l00027"></a>00027 <span class="comment">// This file was originally written for inclusion</span>
<a name="l00028"></a>00028 <span class="comment">// in &quot;Font Playground&quot; .</span>
<a name="l00029"></a>00029 <span class="comment">//</span>
<a name="l00030"></a>00030 <span class="comment">// 2006.04.30.et.</span>
<a name="l00031"></a>00031 <span class="comment">// LAST UPDATE: 2007.01.08</span>
<a name="l00032"></a>00032 <span class="comment">// </span>
<a name="l00033"></a>00033 
<a name="l00034"></a>00034 <span class="preprocessor">#ifndef UTF8STRING_INCLUDED</span>
<a name="l00035"></a>00035 <span class="preprocessor"></span><span class="preprocessor">#define UTF8STRING_INCLUDED</span>
<a name="l00036"></a>00036 <span class="preprocessor"></span>
<a name="l00037"></a>00037 <span class="preprocessor">#include &quot;<a class="code" href="ScriptCodes_8h.html">ScriptCodes.h</a>&quot;</span>
<a name="l00038"></a>00038 <span class="preprocessor">#include &lt;string&gt;</span>
<a name="l00039"></a>00039 
<a name="l00040"></a><a class="code" href="Utf8String_8h.html#a0ae75062ae07deca9402a1fb245818b7">00040</a> <span class="keyword">typedef</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">long</span>  <a class="code" href="Utf8String_8h.html#a0ae75062ae07deca9402a1fb245818b7">UTF32</a>; <span class="comment">// at least 32 bits</span>
<a name="l00041"></a><a class="code" href="Utf8String_8h.html#a50b71724cb39637be87babb9ddbd5822">00041</a> <span class="keyword">typedef</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">short</span> <a class="code" href="Utf8String_8h.html#a50b71724cb39637be87babb9ddbd5822">UTF16</a>; <span class="comment">// at least 16 bits</span>
<a name="l00042"></a><a class="code" href="Utf8String_8h.html#aafe543c40e242c865f970b57a29122ab">00042</a> <span class="keyword">typedef</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>  <a class="code" href="Utf8String_8h.html#aafe543c40e242c865f970b57a29122ab">UTF8</a>;
<a name="l00043"></a>00043 
<a name="l00044"></a><a class="code" href="Utf8String_8h.html#a44b240b95a93f71535c03f5e26d7dbe1">00044</a> <span class="preprocessor">#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD</span>
<a name="l00045"></a><a class="code" href="Utf8String_8h.html#a26c4390ae1463df1e6075ea585ed79a3">00045</a> <span class="preprocessor"></span><span class="preprocessor">#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF</span>
<a name="l00046"></a>00046 <span class="preprocessor"></span>
<a name="l00047"></a>00047 <span class="comment">//</span>
<a name="l00048"></a>00048 <span class="comment">// The following are needed for UTF-16 conversion:</span>
<a name="l00049"></a>00049 <span class="comment">// </span>
<a name="l00050"></a><a class="code" href="Utf8String_8h.html#a4ab84eb26356a90f3b7b9ac7aca1edfe">00050</a> <span class="preprocessor">#define UNI_SUR_HIGH_START  (UTF32)0xD800</span>
<a name="l00051"></a><a class="code" href="Utf8String_8h.html#ae5481872f1061e4e2a66849802b4b81e">00051</a> <span class="preprocessor"></span><span class="preprocessor">#define UNI_SUR_HIGH_END    (UTF32)0xDBFF</span>
<a name="l00052"></a><a class="code" href="Utf8String_8h.html#a23de5862375b48afcb4e3ff7b56a274d">00052</a> <span class="preprocessor"></span><span class="preprocessor">#define UNI_SUR_LOW_START   (UTF32)0xDC00</span>
<a name="l00053"></a><a class="code" href="Utf8String_8h.html#ab9a531ffb73be79f7089049c1b84dc59">00053</a> <span class="preprocessor"></span><span class="preprocessor">#define UNI_SUR_LOW_END     (UTF32)0xDFFF</span>
<a name="l00054"></a>00054 <span class="preprocessor"></span>
<a name="l00055"></a><a class="code" href="classUTF8String.html">00055</a> <span class="keyword">class </span><a class="code" href="classUTF8String.html">UTF8String</a> : <span class="keyword">public</span> std::string {
<a name="l00056"></a>00056         
<a name="l00057"></a>00057         
<a name="l00058"></a>00058 <span class="keyword">private</span>:
<a name="l00059"></a>00059         
<a name="l00060"></a>00060         <span class="keyword">const</span> <span class="keywordtype">char</span> *<a class="code" href="classUTF8String.html#a31a90b1bcbbfbc3885a15a64dcbc5e16">_UTF32ValueToUTF8</a>( <a class="code" href="Utf8String_8h.html#a0ae75062ae07deca9402a1fb245818b7">UTF32</a> UTF32Value );
<a name="l00061"></a>00061         
<a name="l00062"></a>00062 <span class="keyword">public</span>:
<a name="l00063"></a>00063         
<a name="l00064"></a>00064         <span class="comment">// Default constructor just calls base class std::String():</span>
<a name="l00065"></a>00065         <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>();
<a name="l00066"></a>00066         <span class="comment">// Copy Constructors:</span>
<a name="l00067"></a>00067         <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>(<span class="keyword">const</span> std::string &amp;s);
<a name="l00068"></a>00068         <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>(<span class="keyword">const</span> <a class="code" href="classUTF8String.html">UTF8String</a> &amp;s);
<a name="l00069"></a>00069         <span class="comment">// How many Unicode values are stored in the string?:</span>
<a name="l00070"></a>00070         <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> <a class="code" href="classUTF8String.html#aa13bb70b043fe6c470685a7f2ff27a54">unicodeValueCount</a>() <span class="keyword">const</span>;
<a name="l00071"></a>00071         <span class="comment">// Get the Unicode substring starting at the &quot;stt&quot; unicode value --</span>
<a name="l00072"></a>00072         <span class="comment">// Note that stt=1 (*not* zero) returns the entire string:</span>
<a name="l00073"></a>00073         <a class="code" href="classUTF8String.html">UTF8String</a> <a class="code" href="classUTF8String.html#ac79b42cc365d5fce07331812be576e7b">unicodeSubString</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> stt,<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> howManyCharacters=0) <span class="keyword">const</span>;
<a name="l00074"></a>00074         <span class="comment">// Read-only bracket operator retrieves the nth unicode character --</span>
<a name="l00075"></a>00075         <span class="comment">// Note that pos=1 (*not* zero) specifies the first character:</span>
<a name="l00076"></a>00076         <a class="code" href="classUTF8String.html">UTF8String</a> <a class="code" href="classUTF8String.html#a189cda7f838b2dbdd5f92f17a5ae8444">operator[]</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> pos) <span class="keyword">const</span>;
<a name="l00077"></a>00077         <span class="comment">// Return the Unicode code value of the nth Unicode character:</span>
<a name="l00078"></a>00078         <a class="code" href="Utf8String_8h.html#a0ae75062ae07deca9402a1fb245818b7">UTF32</a> <a class="code" href="classUTF8String.html#a2c7a362af792e813042335d6570f7669">unicodeValueAtPosition</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> pos=0) <span class="keyword">const</span>;
<a name="l00079"></a>00079         
<a name="l00080"></a>00080         <span class="comment">//</span>
<a name="l00081"></a>00081         <span class="comment">// Return a substring less than or equal to the howManyCharacters in</span>
<a name="l00082"></a>00082         <span class="comment">// length where the end of the string is on a word boundary.</span>
<a name="l00083"></a>00083         <span class="comment">// </span>
<a name="l00084"></a>00084         <a class="code" href="classUTF8String.html">UTF8String</a> <a class="code" href="classUTF8String.html#a78fab2501f3ab2fc17caea6f85f58b3d">unicodeSubStringOnWordBoundary</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> stt,<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> howManyCharacters) <span class="keyword">const</span>;
<a name="l00085"></a>00085         
<a name="l00086"></a>00086         <span class="comment">// Returns boolean TRUE if the string begins with a character</span>
<a name="l00087"></a>00087         <span class="comment">// from a right-to-left script:</span>
<a name="l00088"></a>00088         <span class="keywordtype">bool</span> <a class="code" href="classUTF8String.html#a2d4df8a82057efe65a25981c58719d1a">isRTL</a>(<span class="keywordtype">void</span>) <span class="keyword">const</span>;
<a name="l00089"></a>00089         <span class="comment">// Returns a boolean TRUE if the string begins with a character</span>
<a name="l00090"></a>00090         <span class="comment">// from an Indic or Indic-derived script.  Such scripts have</span>
<a name="l00091"></a>00091         <span class="comment">// special complex text layout requirements:</span>
<a name="l00092"></a>00092         <span class="keywordtype">bool</span> <a class="code" href="classUTF8String.html#aa1b127af95c1f892642db7c215b916f6">isIndic</a>(<span class="keywordtype">void</span>) <span class="keyword">const</span>;
<a name="l00093"></a>00093         
<a name="l00094"></a>00094         <span class="comment">// Returns a boolean TRUE if the string begins with a character</span>
<a name="l00095"></a>00095         <span class="comment">// from the Arabic script.  This script has</span>
<a name="l00096"></a>00096         <span class="comment">// special complex text layout requirements:</span>
<a name="l00097"></a>00097         <span class="keywordtype">bool</span> <a class="code" href="classUTF8String.html#a100a35531e6987d708c6ca43a43a4e2b">isArabic</a>(<span class="keywordtype">void</span>) <span class="keyword">const</span>;
<a name="l00098"></a>00098         
<a name="l00099"></a>00099         <span class="comment">//</span>
<a name="l00100"></a>00100         <span class="comment">// Returns a script code based on the Unicode range of the first</span>
<a name="l00101"></a>00101         <span class="comment">// character in the string: Currently only handles the Arabic and</span>
<a name="l00102"></a>00102         <span class="comment">// Indic cases relevant for complex text layout</span>
<a name="l00103"></a>00103         <span class="comment">// </span>
<a name="l00104"></a>00104         <a class="code" href="ScriptCodes_8h.html#af6634f94f563da7b93f24bd70f4a2f24">SCRIPTCODE</a> <a class="code" href="classUTF8String.html#a0301a334d89aa2e978e3d339a4f8f137">getScriptCode</a>(<span class="keywordtype">void</span>);
<a name="l00105"></a>00105         
<a name="l00106"></a>00106         <span class="comment">// Returns a UTF32 String:</span>
<a name="l00107"></a>00107         std::basic_string&lt;UTF32&gt; <a class="code" href="classUTF8String.html#a91200be00a213d4610f6c1e54f62fd3e">UTF32String</a>() <span class="keyword">const</span>;
<a name="l00108"></a>00108         
<a name="l00109"></a>00109         <span class="comment">//</span>
<a name="l00110"></a>00110         <span class="comment">// Append and Derived Overloaded Assignment operators:</span>
<a name="l00111"></a>00111         <span class="comment">//</span>
<a name="l00112"></a>00112         <a class="code" href="classUTF8String.html">UTF8String</a>&amp; <a class="code" href="classUTF8String.html#aec383aefa7cde5485d86cb2d957b6fba">append</a>( <span class="keyword">const</span> std::basic_string&lt;UTF32&gt; &amp;UTF32String );
<a name="l00113"></a>00113         <a class="code" href="classUTF8String.html">UTF8String</a>&amp; <a class="code" href="classUTF8String.html#aec383aefa7cde5485d86cb2d957b6fba">append</a>( <span class="keyword">const</span> std::basic_string&lt;UTF16&gt; &amp;UTF16String );
<a name="l00114"></a>00114         
<a name="l00115"></a>00115         <a class="code" href="classUTF8String.html">UTF8String</a>&amp; <a class="code" href="classUTF8String.html#a7ebe343bf47bfd2aef0abfd3c80e45c2">operator+=</a>( <span class="keyword">const</span> std::basic_string&lt;UTF32&gt; &amp;UTF32String );
<a name="l00116"></a>00116         <a class="code" href="classUTF8String.html">UTF8String</a>&amp; <a class="code" href="classUTF8String.html#a7ebe343bf47bfd2aef0abfd3c80e45c2">operator+=</a>( <span class="keyword">const</span> std::basic_string&lt;UTF16&gt; &amp;UTF16String );
<a name="l00117"></a>00117         
<a name="l00118"></a>00118         <a class="code" href="classUTF8String.html">UTF8String</a>&amp; <a class="code" href="classUTF8String.html#af6c81cef869390eee4e40d302e495052">operator=</a>( <span class="keyword">const</span> std::basic_string&lt;UTF32&gt; &amp;UTF32String );
<a name="l00119"></a>00119         <a class="code" href="classUTF8String.html">UTF8String</a>&amp; <a class="code" href="classUTF8String.html#af6c81cef869390eee4e40d302e495052">operator=</a>( <span class="keyword">const</span> std::basic_string&lt;UTF16&gt; &amp;UTF16String );
<a name="l00120"></a>00120         
<a name="l00121"></a>00121         <span class="comment">// </span>
<a name="l00122"></a>00122         <span class="comment">// Specialized constructors:</span>
<a name="l00123"></a>00123         <span class="comment">// </span>
<a name="l00124"></a>00124         <span class="comment">// Construct a UTF8String from a UTF32 or UTF16 string:</span>
<a name="l00125"></a>00125         <span class="comment">// </span>
<a name="l00126"></a>00126         <span class="comment">// These also ultimately use the append() methods from above:</span>
<a name="l00127"></a>00127         <span class="comment">// </span>
<a name="l00128"></a>00128         <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>( <span class="keyword">const</span> std::basic_string&lt;UTF32&gt; &amp;UTF32String );
<a name="l00129"></a>00129         <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>( <span class="keyword">const</span> std::basic_string&lt;UTF16&gt; &amp;UTF16String );
<a name="l00130"></a>00130         
<a name="l00131"></a>00131 };
<a name="l00132"></a>00132 
<a name="l00133"></a>00133 <span class="preprocessor">#endif</span>
<a name="l00134"></a>00134 <span class="preprocessor"></span>
</pre></div></div>
</div>
<hr class="footer"/><address class="footer"><small>Generated on Tue Feb 8 2011 for FONTAINE by&#160;
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.3 </small></address>
</body>
</html>