<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> <title>FONTAINE: Utf8String.h Source File</title> <link href="tabs.css" rel="stylesheet" type="text/css"/> <link href="doxygen.css" rel="stylesheet" type="text/css"/> </head> <body> <!-- Generated by Doxygen 1.7.3 --> <div id="top"> <div id="titlearea"> <table cellspacing="0" cellpadding="0"> <tbody> <tr style="height: 56px;"> <td style="padding-left: 0.5em;"> <div id="projectname">FONTAINE <span id="projectnumber">1.0</span></div> </td> </tr> </tbody> </table> </div> <div id="navrow1" class="tabs"> <ul class="tablist"> <li><a href="index.html"><span>Main Page</span></a></li> <li><a href="namespaces.html"><span>Namespaces</span></a></li> <li><a href="annotated.html"><span>Classes</span></a></li> <li class="current"><a href="files.html"><span>Files</span></a></li> </ul> </div> <div id="navrow2" class="tabs2"> <ul class="tablist"> <li><a href="files.html"><span>File List</span></a></li> <li><a href="globals.html"><span>File Members</span></a></li> </ul> </div> <div class="header"> <div class="headertitle"> <h1>Utf8String.h</h1> </div> </div> <div class="contents"> <a href="Utf8String_8h.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">//</span> <a name="l00002"></a>00002 <span class="comment">// The Fontaine Font Analysis Project </span> <a name="l00003"></a>00003 <span class="comment">// </span> <a name="l00004"></a>00004 <span class="comment">// Copyright (c) 2009 by Edward H. Trager</span> <a name="l00005"></a>00005 <span class="comment">// All Rights Reserved</span> <a name="l00006"></a>00006 <span class="comment">// </span> <a name="l00007"></a>00007 <span class="comment">// Released under the GNU GPL version 2.0 or later.</span> <a name="l00008"></a>00008 <span class="comment">// </span> <a name="l00009"></a>00009 <a name="l00010"></a>00010 <a name="l00012"></a>00012 <span class="comment">//</span> <a name="l00013"></a>00013 <span class="comment">// This file was originally part of the MADELINE 2 program </span> <a name="l00014"></a>00014 <span class="comment">// written by Edward H. Trager and Ritu Khanna</span> <a name="l00015"></a>00015 <span class="comment">// Copyright (c) 2005 by the</span> <a name="l00016"></a>00016 <span class="comment">// Regents of the University of Michigan.</span> <a name="l00017"></a>00017 <span class="comment">// All Rights Reserved.</span> <a name="l00018"></a>00018 <span class="comment">// Released under the GNU General Public License v. 2.0 or later.</span> <a name="l00019"></a>00019 <span class="comment">// </span> <a name="l00021"></a>00021 <span class="comment"></span><span class="comment">//</span> <a name="l00022"></a>00022 <span class="comment">// utf8String.h</span> <a name="l00023"></a>00023 <span class="comment">//</span> <a name="l00024"></a>00024 <span class="comment">// (c) 2006 by Edward H. Trager</span> <a name="l00025"></a>00025 <span class="comment">// released under the GNU General Public License</span> <a name="l00026"></a>00026 <span class="comment">// </span> <a name="l00027"></a>00027 <span class="comment">// This file was originally written for inclusion</span> <a name="l00028"></a>00028 <span class="comment">// in "Font Playground" .</span> <a name="l00029"></a>00029 <span class="comment">//</span> <a name="l00030"></a>00030 <span class="comment">// 2006.04.30.et.</span> <a name="l00031"></a>00031 <span class="comment">// LAST UPDATE: 2007.01.08</span> <a name="l00032"></a>00032 <span class="comment">// </span> <a name="l00033"></a>00033 <a name="l00034"></a>00034 <span class="preprocessor">#ifndef UTF8STRING_INCLUDED</span> <a name="l00035"></a>00035 <span class="preprocessor"></span><span class="preprocessor">#define UTF8STRING_INCLUDED</span> <a name="l00036"></a>00036 <span class="preprocessor"></span> <a name="l00037"></a>00037 <span class="preprocessor">#include "<a class="code" href="ScriptCodes_8h.html">ScriptCodes.h</a>"</span> <a name="l00038"></a>00038 <span class="preprocessor">#include <string></span> <a name="l00039"></a>00039 <a name="l00040"></a><a class="code" href="Utf8String_8h.html#a0ae75062ae07deca9402a1fb245818b7">00040</a> <span class="keyword">typedef</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">long</span> <a class="code" href="Utf8String_8h.html#a0ae75062ae07deca9402a1fb245818b7">UTF32</a>; <span class="comment">// at least 32 bits</span> <a name="l00041"></a><a class="code" href="Utf8String_8h.html#a50b71724cb39637be87babb9ddbd5822">00041</a> <span class="keyword">typedef</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">short</span> <a class="code" href="Utf8String_8h.html#a50b71724cb39637be87babb9ddbd5822">UTF16</a>; <span class="comment">// at least 16 bits</span> <a name="l00042"></a><a class="code" href="Utf8String_8h.html#aafe543c40e242c865f970b57a29122ab">00042</a> <span class="keyword">typedef</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> <a class="code" href="Utf8String_8h.html#aafe543c40e242c865f970b57a29122ab">UTF8</a>; <a name="l00043"></a>00043 <a name="l00044"></a><a class="code" href="Utf8String_8h.html#a44b240b95a93f71535c03f5e26d7dbe1">00044</a> <span class="preprocessor">#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD</span> <a name="l00045"></a><a class="code" href="Utf8String_8h.html#a26c4390ae1463df1e6075ea585ed79a3">00045</a> <span class="preprocessor"></span><span class="preprocessor">#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF</span> <a name="l00046"></a>00046 <span class="preprocessor"></span> <a name="l00047"></a>00047 <span class="comment">//</span> <a name="l00048"></a>00048 <span class="comment">// The following are needed for UTF-16 conversion:</span> <a name="l00049"></a>00049 <span class="comment">// </span> <a name="l00050"></a><a class="code" href="Utf8String_8h.html#a4ab84eb26356a90f3b7b9ac7aca1edfe">00050</a> <span class="preprocessor">#define UNI_SUR_HIGH_START (UTF32)0xD800</span> <a name="l00051"></a><a class="code" href="Utf8String_8h.html#ae5481872f1061e4e2a66849802b4b81e">00051</a> <span class="preprocessor"></span><span class="preprocessor">#define UNI_SUR_HIGH_END (UTF32)0xDBFF</span> <a name="l00052"></a><a class="code" href="Utf8String_8h.html#a23de5862375b48afcb4e3ff7b56a274d">00052</a> <span class="preprocessor"></span><span class="preprocessor">#define UNI_SUR_LOW_START (UTF32)0xDC00</span> <a name="l00053"></a><a class="code" href="Utf8String_8h.html#ab9a531ffb73be79f7089049c1b84dc59">00053</a> <span class="preprocessor"></span><span class="preprocessor">#define UNI_SUR_LOW_END (UTF32)0xDFFF</span> <a name="l00054"></a>00054 <span class="preprocessor"></span> <a name="l00055"></a><a class="code" href="classUTF8String.html">00055</a> <span class="keyword">class </span><a class="code" href="classUTF8String.html">UTF8String</a> : <span class="keyword">public</span> std::string { <a name="l00056"></a>00056 <a name="l00057"></a>00057 <a name="l00058"></a>00058 <span class="keyword">private</span>: <a name="l00059"></a>00059 <a name="l00060"></a>00060 <span class="keyword">const</span> <span class="keywordtype">char</span> *<a class="code" href="classUTF8String.html#a31a90b1bcbbfbc3885a15a64dcbc5e16">_UTF32ValueToUTF8</a>( <a class="code" href="Utf8String_8h.html#a0ae75062ae07deca9402a1fb245818b7">UTF32</a> UTF32Value ); <a name="l00061"></a>00061 <a name="l00062"></a>00062 <span class="keyword">public</span>: <a name="l00063"></a>00063 <a name="l00064"></a>00064 <span class="comment">// Default constructor just calls base class std::String():</span> <a name="l00065"></a>00065 <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>(); <a name="l00066"></a>00066 <span class="comment">// Copy Constructors:</span> <a name="l00067"></a>00067 <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>(<span class="keyword">const</span> std::string &s); <a name="l00068"></a>00068 <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>(<span class="keyword">const</span> <a class="code" href="classUTF8String.html">UTF8String</a> &s); <a name="l00069"></a>00069 <span class="comment">// How many Unicode values are stored in the string?:</span> <a name="l00070"></a>00070 <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> <a class="code" href="classUTF8String.html#aa13bb70b043fe6c470685a7f2ff27a54">unicodeValueCount</a>() <span class="keyword">const</span>; <a name="l00071"></a>00071 <span class="comment">// Get the Unicode substring starting at the "stt" unicode value --</span> <a name="l00072"></a>00072 <span class="comment">// Note that stt=1 (*not* zero) returns the entire string:</span> <a name="l00073"></a>00073 <a class="code" href="classUTF8String.html">UTF8String</a> <a class="code" href="classUTF8String.html#ac79b42cc365d5fce07331812be576e7b">unicodeSubString</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> stt,<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> howManyCharacters=0) <span class="keyword">const</span>; <a name="l00074"></a>00074 <span class="comment">// Read-only bracket operator retrieves the nth unicode character --</span> <a name="l00075"></a>00075 <span class="comment">// Note that pos=1 (*not* zero) specifies the first character:</span> <a name="l00076"></a>00076 <a class="code" href="classUTF8String.html">UTF8String</a> <a class="code" href="classUTF8String.html#a189cda7f838b2dbdd5f92f17a5ae8444">operator[]</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> pos) <span class="keyword">const</span>; <a name="l00077"></a>00077 <span class="comment">// Return the Unicode code value of the nth Unicode character:</span> <a name="l00078"></a>00078 <a class="code" href="Utf8String_8h.html#a0ae75062ae07deca9402a1fb245818b7">UTF32</a> <a class="code" href="classUTF8String.html#a2c7a362af792e813042335d6570f7669">unicodeValueAtPosition</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> pos=0) <span class="keyword">const</span>; <a name="l00079"></a>00079 <a name="l00080"></a>00080 <span class="comment">//</span> <a name="l00081"></a>00081 <span class="comment">// Return a substring less than or equal to the howManyCharacters in</span> <a name="l00082"></a>00082 <span class="comment">// length where the end of the string is on a word boundary.</span> <a name="l00083"></a>00083 <span class="comment">// </span> <a name="l00084"></a>00084 <a class="code" href="classUTF8String.html">UTF8String</a> <a class="code" href="classUTF8String.html#a78fab2501f3ab2fc17caea6f85f58b3d">unicodeSubStringOnWordBoundary</a>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> stt,<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> howManyCharacters) <span class="keyword">const</span>; <a name="l00085"></a>00085 <a name="l00086"></a>00086 <span class="comment">// Returns boolean TRUE if the string begins with a character</span> <a name="l00087"></a>00087 <span class="comment">// from a right-to-left script:</span> <a name="l00088"></a>00088 <span class="keywordtype">bool</span> <a class="code" href="classUTF8String.html#a2d4df8a82057efe65a25981c58719d1a">isRTL</a>(<span class="keywordtype">void</span>) <span class="keyword">const</span>; <a name="l00089"></a>00089 <span class="comment">// Returns a boolean TRUE if the string begins with a character</span> <a name="l00090"></a>00090 <span class="comment">// from an Indic or Indic-derived script. Such scripts have</span> <a name="l00091"></a>00091 <span class="comment">// special complex text layout requirements:</span> <a name="l00092"></a>00092 <span class="keywordtype">bool</span> <a class="code" href="classUTF8String.html#aa1b127af95c1f892642db7c215b916f6">isIndic</a>(<span class="keywordtype">void</span>) <span class="keyword">const</span>; <a name="l00093"></a>00093 <a name="l00094"></a>00094 <span class="comment">// Returns a boolean TRUE if the string begins with a character</span> <a name="l00095"></a>00095 <span class="comment">// from the Arabic script. This script has</span> <a name="l00096"></a>00096 <span class="comment">// special complex text layout requirements:</span> <a name="l00097"></a>00097 <span class="keywordtype">bool</span> <a class="code" href="classUTF8String.html#a100a35531e6987d708c6ca43a43a4e2b">isArabic</a>(<span class="keywordtype">void</span>) <span class="keyword">const</span>; <a name="l00098"></a>00098 <a name="l00099"></a>00099 <span class="comment">//</span> <a name="l00100"></a>00100 <span class="comment">// Returns a script code based on the Unicode range of the first</span> <a name="l00101"></a>00101 <span class="comment">// character in the string: Currently only handles the Arabic and</span> <a name="l00102"></a>00102 <span class="comment">// Indic cases relevant for complex text layout</span> <a name="l00103"></a>00103 <span class="comment">// </span> <a name="l00104"></a>00104 <a class="code" href="ScriptCodes_8h.html#af6634f94f563da7b93f24bd70f4a2f24">SCRIPTCODE</a> <a class="code" href="classUTF8String.html#a0301a334d89aa2e978e3d339a4f8f137">getScriptCode</a>(<span class="keywordtype">void</span>); <a name="l00105"></a>00105 <a name="l00106"></a>00106 <span class="comment">// Returns a UTF32 String:</span> <a name="l00107"></a>00107 std::basic_string<UTF32> <a class="code" href="classUTF8String.html#a91200be00a213d4610f6c1e54f62fd3e">UTF32String</a>() <span class="keyword">const</span>; <a name="l00108"></a>00108 <a name="l00109"></a>00109 <span class="comment">//</span> <a name="l00110"></a>00110 <span class="comment">// Append and Derived Overloaded Assignment operators:</span> <a name="l00111"></a>00111 <span class="comment">//</span> <a name="l00112"></a>00112 <a class="code" href="classUTF8String.html">UTF8String</a>& <a class="code" href="classUTF8String.html#aec383aefa7cde5485d86cb2d957b6fba">append</a>( <span class="keyword">const</span> std::basic_string<UTF32> &UTF32String ); <a name="l00113"></a>00113 <a class="code" href="classUTF8String.html">UTF8String</a>& <a class="code" href="classUTF8String.html#aec383aefa7cde5485d86cb2d957b6fba">append</a>( <span class="keyword">const</span> std::basic_string<UTF16> &UTF16String ); <a name="l00114"></a>00114 <a name="l00115"></a>00115 <a class="code" href="classUTF8String.html">UTF8String</a>& <a class="code" href="classUTF8String.html#a7ebe343bf47bfd2aef0abfd3c80e45c2">operator+=</a>( <span class="keyword">const</span> std::basic_string<UTF32> &UTF32String ); <a name="l00116"></a>00116 <a class="code" href="classUTF8String.html">UTF8String</a>& <a class="code" href="classUTF8String.html#a7ebe343bf47bfd2aef0abfd3c80e45c2">operator+=</a>( <span class="keyword">const</span> std::basic_string<UTF16> &UTF16String ); <a name="l00117"></a>00117 <a name="l00118"></a>00118 <a class="code" href="classUTF8String.html">UTF8String</a>& <a class="code" href="classUTF8String.html#af6c81cef869390eee4e40d302e495052">operator=</a>( <span class="keyword">const</span> std::basic_string<UTF32> &UTF32String ); <a name="l00119"></a>00119 <a class="code" href="classUTF8String.html">UTF8String</a>& <a class="code" href="classUTF8String.html#af6c81cef869390eee4e40d302e495052">operator=</a>( <span class="keyword">const</span> std::basic_string<UTF16> &UTF16String ); <a name="l00120"></a>00120 <a name="l00121"></a>00121 <span class="comment">// </span> <a name="l00122"></a>00122 <span class="comment">// Specialized constructors:</span> <a name="l00123"></a>00123 <span class="comment">// </span> <a name="l00124"></a>00124 <span class="comment">// Construct a UTF8String from a UTF32 or UTF16 string:</span> <a name="l00125"></a>00125 <span class="comment">// </span> <a name="l00126"></a>00126 <span class="comment">// These also ultimately use the append() methods from above:</span> <a name="l00127"></a>00127 <span class="comment">// </span> <a name="l00128"></a>00128 <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>( <span class="keyword">const</span> std::basic_string<UTF32> &UTF32String ); <a name="l00129"></a>00129 <a class="code" href="classUTF8String.html#aa203ebccb270106dabf265b785cbd01f">UTF8String</a>( <span class="keyword">const</span> std::basic_string<UTF16> &UTF16String ); <a name="l00130"></a>00130 <a name="l00131"></a>00131 }; <a name="l00132"></a>00132 <a name="l00133"></a>00133 <span class="preprocessor">#endif</span> <a name="l00134"></a>00134 <span class="preprocessor"></span> </pre></div></div> </div> <hr class="footer"/><address class="footer"><small>Generated on Tue Feb 8 2011 for FONTAINE by  <a href="http://www.doxygen.org/index.html"> <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.3 </small></address> </body> </html>