<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <link rel="stylesheet" href="style.css" type="text/css"> <meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type"> <link rel="Start" href="index.html"> <link rel="previous" href="Batteries.UChar.html"> <link rel="next" href="Batteries.Text.html"> <link rel="Up" href="Batteries.html"> <link title="Index of types" rel=Appendix href="index_types.html"> <link title="Index of exceptions" rel=Appendix href="index_exceptions.html"> <link title="Index of values" rel=Appendix href="index_values.html"> <link title="Index of class methods" rel=Appendix href="index_methods.html"> <link title="Index of classes" rel=Appendix href="index_classes.html"> <link title="Index of modules" rel=Appendix href="index_modules.html"> <link title="Index of module types" rel=Appendix href="index_module_types.html"> <link title="BatArray" rel="Chapter" href="BatArray.html"> <link title="BatAvlTree" rel="Chapter" href="BatAvlTree.html"> <link title="BatBase64" rel="Chapter" href="BatBase64.html"> <link title="BatBig_int" rel="Chapter" href="BatBig_int.html"> <link title="BatBigarray" rel="Chapter" href="BatBigarray.html"> <link title="BatBitSet" rel="Chapter" href="BatBitSet.html"> <link title="BatBool" rel="Chapter" href="BatBool.html"> <link title="BatBounded" rel="Chapter" href="BatBounded.html"> <link title="BatBuffer" rel="Chapter" href="BatBuffer.html"> <link title="BatCache" rel="Chapter" href="BatCache.html"> <link title="BatChar" rel="Chapter" href="BatChar.html"> <link title="BatCharParser" rel="Chapter" href="BatCharParser.html"> <link title="BatComplex" rel="Chapter" href="BatComplex.html"> <link title="BatConcurrent" rel="Chapter" href="BatConcurrent.html"> <link title="BatDeque" rel="Chapter" href="BatDeque.html"> <link title="BatDigest" rel="Chapter" href="BatDigest.html"> <link title="BatDllist" rel="Chapter" href="BatDllist.html"> <link title="BatDynArray" rel="Chapter" href="BatDynArray.html"> <link title="BatEnum" rel="Chapter" href="BatEnum.html"> <link title="BatFile" rel="Chapter" href="BatFile.html"> <link title="BatFingerTree" rel="Chapter" href="BatFingerTree.html"> <link title="BatFloat" rel="Chapter" href="BatFloat.html"> <link title="BatFormat" rel="Chapter" href="BatFormat.html"> <link title="BatGc" rel="Chapter" href="BatGc.html"> <link title="BatGenlex" rel="Chapter" href="BatGenlex.html"> <link title="BatGlobal" rel="Chapter" href="BatGlobal.html"> <link title="BatHashcons" rel="Chapter" href="BatHashcons.html"> <link title="BatHashtbl" rel="Chapter" href="BatHashtbl.html"> <link title="BatHeap" rel="Chapter" href="BatHeap.html"> <link title="BatIMap" rel="Chapter" href="BatIMap.html"> <link title="BatIO" rel="Chapter" href="BatIO.html"> <link title="BatISet" rel="Chapter" href="BatISet.html"> <link title="BatInnerIO" rel="Chapter" href="BatInnerIO.html"> <link title="BatInnerPervasives" rel="Chapter" href="BatInnerPervasives.html"> <link title="BatInnerWeaktbl" rel="Chapter" href="BatInnerWeaktbl.html"> <link title="BatInt" rel="Chapter" href="BatInt.html"> <link title="BatInt32" rel="Chapter" href="BatInt32.html"> <link title="BatInt64" rel="Chapter" href="BatInt64.html"> <link title="BatInterfaces" rel="Chapter" href="BatInterfaces.html"> <link title="BatLazyList" rel="Chapter" href="BatLazyList.html"> <link title="BatLexing" rel="Chapter" href="BatLexing.html"> <link title="BatList" rel="Chapter" href="BatList.html"> <link title="BatLog" rel="Chapter" href="BatLog.html"> <link title="BatLogger" rel="Chapter" href="BatLogger.html"> <link title="BatMap" rel="Chapter" href="BatMap.html"> <link title="BatMarshal" rel="Chapter" href="BatMarshal.html"> <link title="BatMultiMap" rel="Chapter" href="BatMultiMap.html"> <link title="BatMultiPMap" rel="Chapter" href="BatMultiPMap.html"> <link title="BatMutex" rel="Chapter" href="BatMutex.html"> <link title="BatNativeint" rel="Chapter" href="BatNativeint.html"> <link title="BatNum" rel="Chapter" href="BatNum.html"> <link title="BatNumber" rel="Chapter" href="BatNumber.html"> <link title="BatOo" rel="Chapter" href="BatOo.html"> <link title="BatOptParse" rel="Chapter" href="BatOptParse.html"> <link title="BatOption" rel="Chapter" href="BatOption.html"> <link title="BatOrd" rel="Chapter" href="BatOrd.html"> <link title="BatParserCo" rel="Chapter" href="BatParserCo.html"> <link title="BatPathGen" rel="Chapter" href="BatPathGen.html"> <link title="BatPervasives" rel="Chapter" href="BatPervasives.html"> <link title="BatPrintexc" rel="Chapter" href="BatPrintexc.html"> <link title="BatPrintf" rel="Chapter" href="BatPrintf.html"> <link title="BatQueue" rel="Chapter" href="BatQueue.html"> <link title="BatRMutex" rel="Chapter" href="BatRMutex.html"> <link title="BatRandom" rel="Chapter" href="BatRandom.html"> <link title="BatRef" rel="Chapter" href="BatRef.html"> <link title="BatRefList" rel="Chapter" href="BatRefList.html"> <link title="BatResult" rel="Chapter" href="BatResult.html"> <link title="BatReturn" rel="Chapter" href="BatReturn.html"> <link title="BatScanf" rel="Chapter" href="BatScanf.html"> <link title="BatSeq" rel="Chapter" href="BatSeq.html"> <link title="BatSet" rel="Chapter" href="BatSet.html"> <link title="BatSplay" rel="Chapter" href="BatSplay.html"> <link title="BatStack" rel="Chapter" href="BatStack.html"> <link title="BatStream" rel="Chapter" href="BatStream.html"> <link title="BatString" rel="Chapter" href="BatString.html"> <link title="BatSubstring" rel="Chapter" href="BatSubstring.html"> <link title="BatSys" rel="Chapter" href="BatSys.html"> <link title="BatText" rel="Chapter" href="BatText.html"> <link title="BatTuple" rel="Chapter" href="BatTuple.html"> <link title="BatUChar" rel="Chapter" href="BatUChar.html"> <link title="BatUTF8" rel="Chapter" href="BatUTF8.html"> <link title="BatUnit" rel="Chapter" href="BatUnit.html"> <link title="BatUnix" rel="Chapter" href="BatUnix.html"> <link title="BatUref" rel="Chapter" href="BatUref.html"> <link title="BatVect" rel="Chapter" href="BatVect.html"> <link title="Batteries" rel="Chapter" href="Batteries.html"> <link title="BatteriesConfig" rel="Chapter" href="BatteriesConfig.html"> <link title="BatteriesPrint" rel="Chapter" href="BatteriesPrint.html"> <link title="BatteriesThread" rel="Chapter" href="BatteriesThread.html"> <link title="Extlib" rel="Chapter" href="Extlib.html"><title>Batteries user guide : Batteries.UTF8</title> </head> <body> <div class="navbar"><a class="pre" href="Batteries.UChar.html" title="Batteries.UChar">Previous</a> <a class="up" href="Batteries.html" title="Batteries">Up</a> <a class="post" href="Batteries.Text.html" title="Batteries.Text">Next</a> </div> <h1>Module <a href="type_Batteries.UTF8.html">Batteries.UTF8</a></h1> <pre><span class="keyword">module</span> UTF8: <code class="type">BatUTF8</code></pre><hr width="100%"> <pre><span id="TYPEt"><span class="keyword">type</span> <code class="type"></code>t</span> = <code class="type">string</code> </pre> <div class="info "> UTF-8 encoded Unicode strings. The type is normal string.<br> </div> <pre><span id="EXCEPTIONMalformed_code"><span class="keyword">exception</span> Malformed_code</span></pre> <pre><span id="VALvalidate"><span class="keyword">val</span> validate</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> unit</code></pre><div class="info "> <code class="code">validate s</code> successes if s is valid UTF-8, otherwise raises Malformed_code. Other functions assume strings are valid UTF-8, so it is prudent to test their validity for strings from untrusted origins.<br> </div> <pre><span id="VALget"><span class="keyword">val</span> get</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> int -> <a href="BatUChar.html#TYPEt">BatUChar.t</a></code></pre><div class="info "> <code class="code">get s n</code> returns <code class="code">n</code>-th Unicode character of <code class="code">s</code>. The call requires O(n)-time.<br> </div> <pre><span id="VALinit"><span class="keyword">val</span> init</span> : <code class="type">int -> (int -> <a href="BatUChar.html#TYPEt">BatUChar.t</a>) -> <a href="BatUTF8.html#TYPEt">t</a></code></pre><div class="info "> <code class="code">init len f</code> returns a new string which contains <code class="code">len</code> Unicode characters. The i-th Unicode character is initialized by <code class="code">f i</code><br> </div> <pre><span id="VALlength"><span class="keyword">val</span> length</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> int</code></pre><div class="info "> <code class="code">length s</code> returns the number of Unicode characters contained in s<br> </div> <pre><span id="TYPEindex"><span class="keyword">type</span> <code class="type"></code>index</span> = <code class="type">int</code> </pre> <div class="info "> Positions in the string represented by the number of bytes from the head. The location of the first character is <code class="code">0</code><br> </div> <pre><span id="VALnth"><span class="keyword">val</span> nth</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> int -> <a href="BatUTF8.html#TYPEindex">index</a></code></pre><div class="info "> <code class="code">nth s n</code> returns the position of the <code class="code">n</code>-th Unicode character. The call requires O(n)-time<br> </div> <pre><span id="VALfirst"><span class="keyword">val</span> first</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> <a href="BatUTF8.html#TYPEindex">index</a></code></pre><div class="info "> The position of the head of the first Unicode character.<br> </div> <pre><span id="VALlast"><span class="keyword">val</span> last</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> <a href="BatUTF8.html#TYPEindex">index</a></code></pre><div class="info "> The position of the head of the last Unicode character.<br> </div> <pre><span id="VALlook"><span class="keyword">val</span> look</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> <a href="BatUTF8.html#TYPEindex">index</a> -> <a href="BatUChar.html#TYPEt">BatUChar.t</a></code></pre><div class="info "> <code class="code">look s i</code> returns the Unicode character of the location <code class="code">i</code> in the string <code class="code">s</code>.<br> </div> <pre><span id="VALout_of_range"><span class="keyword">val</span> out_of_range</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> <a href="BatUTF8.html#TYPEindex">index</a> -> bool</code></pre><div class="info "> <code class="code">out_of_range s i</code> tests whether <code class="code">i</code> is a position inside of <code class="code">s</code>.<br> </div> <pre><span id="VALcompare_index"><span class="keyword">val</span> compare_index</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> <a href="BatUTF8.html#TYPEindex">index</a> -> <a href="BatUTF8.html#TYPEindex">index</a> -> int</code></pre><div class="info "> <code class="code">compare_index s i1 i2</code> returns a value < 0 if <code class="code">i1</code> is the position located before <code class="code">i2</code>, 0 if <code class="code">i1</code> and <code class="code">i2</code> points the same location, a value > 0 if <code class="code">i1</code> is the position located after <code class="code">i2</code>.<br> </div> <pre><span id="VALnext"><span class="keyword">val</span> next</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> <a href="BatUTF8.html#TYPEindex">index</a> -> <a href="BatUTF8.html#TYPEindex">index</a></code></pre><div class="info "> <code class="code">next s i</code> returns the position of the head of the Unicode character located immediately after <code class="code">i</code>. If <code class="code">i</code> is inside of <code class="code">s</code>, the function always successes. If <code class="code">i</code> is inside of <code class="code">s</code> and there is no Unicode character after <code class="code">i</code>, the position outside <code class="code">s</code> is returned. If <code class="code">i</code> is not inside of <code class="code">s</code>, the behaviour is unspecified.<br> </div> <pre><span id="VALprev"><span class="keyword">val</span> prev</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> <a href="BatUTF8.html#TYPEindex">index</a> -> <a href="BatUTF8.html#TYPEindex">index</a></code></pre><div class="info "> <code class="code">prev s i</code> returns the position of the head of the Unicode character located immediately before <code class="code">i</code>. If <code class="code">i</code> is inside of <code class="code">s</code>, the function always successes. If <code class="code">i</code> is inside of <code class="code">s</code> and there is no Unicode character before <code class="code">i</code>, the position outside <code class="code">s</code> is returned. If <code class="code">i</code> is not inside of <code class="code">s</code>, the behaviour is unspecified.<br> </div> <pre><span id="VALmove"><span class="keyword">val</span> move</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> <a href="BatUTF8.html#TYPEindex">index</a> -> int -> <a href="BatUTF8.html#TYPEindex">index</a></code></pre><div class="info "> <code class="code">move s i n</code> returns <code class="code">n</code>-th Unicode character after <code class="code">i</code> if n >= 0, <code class="code">n</code>-th Unicode character before <code class="code">i</code> if n < 0. If there is no such character, the result is unspecified.<br> </div> <pre><span id="VALiter"><span class="keyword">val</span> iter</span> : <code class="type">(<a href="BatUChar.html#TYPEt">BatUChar.t</a> -> unit) -> <a href="BatUTF8.html#TYPEt">t</a> -> unit</code></pre><div class="info "> <code class="code">iter f s</code> applies <code class="code">f</code> to all Unicode characters in <code class="code">s</code>. The order of application is same to the order of the Unicode characters in <code class="code">s</code>.<br> </div> <pre><span id="VALcompare"><span class="keyword">val</span> compare</span> : <code class="type"><a href="BatUTF8.html#TYPEt">t</a> -> <a href="BatUTF8.html#TYPEt">t</a> -> int</code></pre><div class="info "> Code point comparison by the lexicographic order. <code class="code">compare s1 s2</code> returns a positive integer if <code class="code">s1</code> > <code class="code">s2</code>, 0 if <code class="code">s1</code> = <code class="code">s2</code>, a negative integer if <code class="code">s1</code> < <code class="code">s2</code>.<br> </div> <pre><span class="keyword">module</span> <a href="BatUTF8.Buf.html">Buf</a>: <code class="code"><span class="keyword">sig</span></code> <a href="BatUTF8.Buf.html">..</a> <code class="code"><span class="keyword">end</span></code><code class="type"> with type buf = Buffer.t</code></pre><div class="info"> Buffer module for UTF-8 strings </div> </body></html>