<html> <head> <link rel="stylesheet" href="style.css" type="text/css"> <link rel="Start" href="index.html"> <link rel="previous" href="StdLabels.html"> <link rel="next" href="Stream.html"> <link rel="Up" href="index.html"> <link title="Index of types" rel=Appendix href="index_types.html"> <link title="Index of exceptions" rel=Appendix href="index_exceptions.html"> <link title="Index of values" rel=Appendix href="index_values.html"> <link title="Index of modules" rel=Appendix href="index_modules.html"> <link title="Index of module types" rel=Appendix href="index_module_types.html"> <link title="Arg" rel="Chapter" href="Arg.html"> <link title="Arith_status" rel="Chapter" href="Arith_status.html"> <link title="Array" rel="Chapter" href="Array.html"> <link title="ArrayLabels" rel="Chapter" href="ArrayLabels.html"> <link title="Big_int" rel="Chapter" href="Big_int.html"> <link title="Bigarray" rel="Chapter" href="Bigarray.html"> <link title="Buffer" rel="Chapter" href="Buffer.html"> <link title="Callback" rel="Chapter" href="Callback.html"> <link title="CamlinternalOO" rel="Chapter" href="CamlinternalOO.html"> <link title="Char" rel="Chapter" href="Char.html"> <link title="Complex" rel="Chapter" href="Complex.html"> <link title="Condition" rel="Chapter" href="Condition.html"> <link title="Dbm" rel="Chapter" href="Dbm.html"> <link title="Digest" rel="Chapter" href="Digest.html"> <link title="Dynlink" rel="Chapter" href="Dynlink.html"> <link title="Event" rel="Chapter" href="Event.html"> <link title="Filename" rel="Chapter" href="Filename.html"> <link title="Format" rel="Chapter" href="Format.html"> <link title="Gc" rel="Chapter" href="Gc.html"> <link title="Genlex" rel="Chapter" href="Genlex.html"> <link title="Graphics" rel="Chapter" href="Graphics.html"> <link title="GraphicsX11" rel="Chapter" href="GraphicsX11.html"> <link title="Hashtbl" rel="Chapter" href="Hashtbl.html"> <link title="Int32" rel="Chapter" href="Int32.html"> <link title="Int64" rel="Chapter" href="Int64.html"> <link title="Lazy" rel="Chapter" href="Lazy.html"> <link title="Lexing" rel="Chapter" href="Lexing.html"> <link title="List" rel="Chapter" href="List.html"> <link title="ListLabels" rel="Chapter" href="ListLabels.html"> <link title="Map" rel="Chapter" href="Map.html"> <link title="Marshal" rel="Chapter" href="Marshal.html"> <link title="MoreLabels" rel="Chapter" href="MoreLabels.html"> <link title="Mutex" rel="Chapter" href="Mutex.html"> <link title="Nativeint" rel="Chapter" href="Nativeint.html"> <link title="Num" rel="Chapter" href="Num.html"> <link title="Obj" rel="Chapter" href="Obj.html"> <link title="Oo" rel="Chapter" href="Oo.html"> <link title="Parsing" rel="Chapter" href="Parsing.html"> <link title="Pervasives" rel="Chapter" href="Pervasives.html"> <link title="Printexc" rel="Chapter" href="Printexc.html"> <link title="Printf" rel="Chapter" href="Printf.html"> <link title="Queue" rel="Chapter" href="Queue.html"> <link title="Random" rel="Chapter" href="Random.html"> <link title="Scanf" rel="Chapter" href="Scanf.html"> <link title="Set" rel="Chapter" href="Set.html"> <link title="Sort" rel="Chapter" href="Sort.html"> <link title="Stack" rel="Chapter" href="Stack.html"> <link title="StdLabels" rel="Chapter" href="StdLabels.html"> <link title="Str" rel="Chapter" href="Str.html"> <link title="Stream" rel="Chapter" href="Stream.html"> <link title="String" rel="Chapter" href="String.html"> <link title="StringLabels" rel="Chapter" href="StringLabels.html"> <link title="Sys" rel="Chapter" href="Sys.html"> <link title="Thread" rel="Chapter" href="Thread.html"> <link title="ThreadUnix" rel="Chapter" href="ThreadUnix.html"> <link title="Tk" rel="Chapter" href="Tk.html"> <link title="Unix" rel="Chapter" href="Unix.html"> <link title="UnixLabels" rel="Chapter" href="UnixLabels.html"> <link title="Weak" rel="Chapter" href="Weak.html"><link title="Regular expressions" rel="Section" href="#6_Regularexpressions"> <link title="String matching and searching" rel="Section" href="#6_Stringmatchingandsearching"> <link title="Replacement" rel="Section" href="#6_Replacement"> <link title="Splitting" rel="Section" href="#6_Splitting"> <link title="Extracting substrings" rel="Section" href="#6_Extractingsubstrings"> <title>Str</title> </head> <body> <div class="navbar"><a href="StdLabels.html">Previous</a> <a href="index.html">Up</a> <a href="Stream.html">Next</a> </div> <center><h1>Module <a href="type_Str.html">Str</a></h1></center> <br> <pre><span class="keyword">module</span> Str: <code class="type">sig end</code></pre><div class="info"> Regular expressions and high-level string processing<br> </div> <hr width="100%"> <br> <br> <a name="6_Regularexpressions"></a> <table cellpadding=5 cellspacing=5 width="100%"> <tr class="title6"><td><div align=center> <span class="title6">Regular expressions</span> </div> </td> </tr> </table> <br><br> <br><code><span class="keyword">type</span> <a name="TYPEregexp"></a>regexp </code> <div class="info"> The type of compiled regular expressions.<br> </div> <br> <pre><span class="keyword">val</span> <a name="VALregexp"></a>regexp : <code class="type">string -> <a href="Str.html#TYPEregexp">regexp</a></code></pre><div class="info"> Compile a regular expression. The syntax for regular expressions is the same as in Gnu Emacs. The special characters are <code class="code">$^.*+?[]</code>. The following constructs are recognized:<ul> <li><code class="code">. </code> matches any character except newline</li> <li><code class="code">* </code> (postfix) matches the previous expression zero, one or several times</li> <li><code class="code">+ </code> (postfix) matches the previous expression one or several times</li> <li><code class="code">? </code> (postfix) matches the previous expression once or not at all</li> <li><code class="code">[..] </code> character set; ranges are denoted with <code class="code">-</code>, as in <code class="code">[a-z]</code>; an initial <code class="code">^</code>, as in <code class="code">[^0-9]</code>, complements the set</li> <li><code class="code">^ </code> matches at beginning of line</li> <li><code class="code">$ </code> matches at end of line</li> <li><code class="code">\| </code> (infix) alternative between two expressions</li> <li><code class="code">\(..\)</code> grouping and naming of the enclosed expression</li> <li><code class="code">\1 </code> the text matched by the first <code class="code">\(...\)</code> expression (<code class="code">\2</code> for the second expression, etc)</li> <li><code class="code">\b </code> matches word boundaries</li> <li><code class="code">\ </code> quotes special characters.</li> </ul> <br> </div> <pre><span class="keyword">val</span> <a name="VALregexp_case_fold"></a>regexp_case_fold : <code class="type">string -> <a href="Str.html#TYPEregexp">regexp</a></code></pre><div class="info"> Same as <code class="code">regexp</code>, but the compiled expression will match text in a case-insensitive way: uppercase and lowercase letters will be considered equivalent.<br> </div> <pre><span class="keyword">val</span> <a name="VALquote"></a>quote : <code class="type">string -> string</code></pre><div class="info"> <code class="code"><span class="constructor">Str</span>.quote s</code> returns a regexp string that matches exactly <code class="code">s</code> and nothing else.<br> </div> <pre><span class="keyword">val</span> <a name="VALregexp_string"></a>regexp_string : <code class="type">string -> <a href="Str.html#TYPEregexp">regexp</a></code></pre><div class="info"> <code class="code"><span class="constructor">Str</span>.regexp_string s</code> returns a regular expression that matches exactly <code class="code">s</code> and nothing else.<br> </div> <pre><span class="keyword">val</span> <a name="VALregexp_string_case_fold"></a>regexp_string_case_fold : <code class="type">string -> <a href="Str.html#TYPEregexp">regexp</a></code></pre><div class="info"> <code class="code"><span class="constructor">Str</span>.regexp_string_case_fold</code> is similar to <a href="Str.html#VALregexp_string"><code class="code"><span class="constructor">Str</span>.regexp_string</code></a>, but the regexp matches in a case-insensitive way.<br> </div> <br> <br> <a name="6_Stringmatchingandsearching"></a> <table cellpadding=5 cellspacing=5 width="100%"> <tr class="title6"><td><div align=center> <span class="title6">String matching and searching</span> </div> </td> </tr> </table> <br><br> <pre><span class="keyword">val</span> <a name="VALstring_match"></a>string_match : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> int -> bool</code></pre><div class="info"> <code class="code">string_match r s start</code> tests whether the characters in <code class="code">s</code> starting at position <code class="code">start</code> match the regular expression <code class="code">r</code>. The first character of a string has position <code class="code">0</code>, as usual.<br> </div> <pre><span class="keyword">val</span> <a name="VALsearch_forward"></a>search_forward : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> int -> int</code></pre><div class="info"> <code class="code">search_forward r s start</code> searchs the string <code class="code">s</code> for a substring matching the regular expression <code class="code">r</code>. The search starts at position <code class="code">start</code> and proceeds towards the end of the string. Return the position of the first character of the matched substring, or raise <code class="code"><span class="constructor">Not_found</span></code> if no substring matches.<br> </div> <pre><span class="keyword">val</span> <a name="VALsearch_backward"></a>search_backward : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> int -> int</code></pre><div class="info"> Same as <a href="Str.html#VALsearch_forward"><code class="code"><span class="constructor">Str</span>.search_forward</code></a>, but the search proceeds towards the beginning of the string.<br> </div> <pre><span class="keyword">val</span> <a name="VALstring_partial_match"></a>string_partial_match : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> int -> bool</code></pre><div class="info"> Similar to <a href="Str.html#VALstring_match"><code class="code"><span class="constructor">Str</span>.string_match</code></a>, but succeeds whenever the argument string is a prefix of a string that matches. This includes the case of a true complete match.<br> </div> <pre><span class="keyword">val</span> <a name="VALmatched_string"></a>matched_string : <code class="type">string -> string</code></pre><div class="info"> <code class="code">matched_string s</code> returns the substring of <code class="code">s</code> that was matched by the latest <a href="Str.html#VALstring_match"><code class="code"><span class="constructor">Str</span>.string_match</code></a>, <a href="Str.html#VALsearch_forward"><code class="code"><span class="constructor">Str</span>.search_forward</code></a> or <a href="Str.html#VALsearch_backward"><code class="code"><span class="constructor">Str</span>.search_backward</code></a>. The user must make sure that the parameter <code class="code">s</code> is the same string that was passed to the matching or searching function.<br> </div> <pre><span class="keyword">val</span> <a name="VALmatch_beginning"></a>match_beginning : <code class="type">unit -> int</code></pre><div class="info"> <code class="code">match_beginning()</code> returns the position of the first character of the substring that was matched by <a href="Str.html#VALstring_match"><code class="code"><span class="constructor">Str</span>.string_match</code></a>, <a href="Str.html#VALsearch_forward"><code class="code"><span class="constructor">Str</span>.search_forward</code></a> or <a href="Str.html#VALsearch_backward"><code class="code"><span class="constructor">Str</span>.search_backward</code></a>.<br> </div> <pre><span class="keyword">val</span> <a name="VALmatch_end"></a>match_end : <code class="type">unit -> int</code></pre><div class="info"> <code class="code">match_end()</code> returns the position of the character following the last character of the substring that was matched by <code class="code">string_match</code>, <code class="code">search_forward</code> or <code class="code">search_backward</code>.<br> </div> <pre><span class="keyword">val</span> <a name="VALmatched_group"></a>matched_group : <code class="type">int -> string -> string</code></pre><div class="info"> <code class="code">matched_group n s</code> returns the substring of <code class="code">s</code> that was matched by the <code class="code">n</code>th group <code class="code">\(...\)</code> of the regular expression during the latest <a href="Str.html#VALstring_match"><code class="code"><span class="constructor">Str</span>.string_match</code></a>, <a href="Str.html#VALsearch_forward"><code class="code"><span class="constructor">Str</span>.search_forward</code></a> or <a href="Str.html#VALsearch_backward"><code class="code"><span class="constructor">Str</span>.search_backward</code></a>. The user must make sure that the parameter <code class="code">s</code> is the same string that was passed to the matching or searching function. <code class="code">matched_group n s</code> raises <code class="code"><span class="constructor">Not_found</span></code> if the <code class="code">n</code>th group of the regular expression was not matched. This can happen with groups inside alternatives <code class="code">\|</code>, options <code class="code">?</code> or repetitions <code class="code">*</code>. For instance, the empty string will match <code class="code">\(a\)*</code>, but <code class="code">matched_group 1 <span class="string">""</span></code> will raise <code class="code"><span class="constructor">Not_found</span></code> because the first group itself was not matched.<br> </div> <pre><span class="keyword">val</span> <a name="VALgroup_beginning"></a>group_beginning : <code class="type">int -> int</code></pre><div class="info"> <code class="code">group_beginning n</code> returns the position of the first character of the substring that was matched by the <code class="code">n</code>th group of the regular expression.<br> <b>Raises</b> <code>Not_found</code> if the <code class="code">n</code>th group of the regular expression was not matched.<br> </div> <pre><span class="keyword">val</span> <a name="VALgroup_end"></a>group_end : <code class="type">int -> int</code></pre><div class="info"> <code class="code">group_end n</code> returns the position of the character following the last character of substring that was matched by the <code class="code">n</code>th group of the regular expression.<br> <b>Raises</b> <code>Not_found</code> if the <code class="code">n</code>th group of the regular expression was not matched.<br> </div> <br> <br> <a name="6_Replacement"></a> <table cellpadding=5 cellspacing=5 width="100%"> <tr class="title6"><td><div align=center> <span class="title6">Replacement</span> </div> </td> </tr> </table> <br><br> <pre><span class="keyword">val</span> <a name="VALglobal_replace"></a>global_replace : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> string -> string</code></pre><div class="info"> <code class="code">global_replace regexp templ s</code> returns a string identical to <code class="code">s</code>, except that all substrings of <code class="code">s</code> that match <code class="code">regexp</code> have been replaced by <code class="code">templ</code>. The replacement template <code class="code">templ</code> can contain <code class="code">\1</code>, <code class="code">\2</code>, etc; these sequences will be replaced by the text matched by the corresponding group in the regular expression. <code class="code">\0</code> stands for the text matched by the whole regular expression.<br> </div> <pre><span class="keyword">val</span> <a name="VALreplace_first"></a>replace_first : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> string -> string</code></pre><div class="info"> Same as <a href="Str.html#VALglobal_replace"><code class="code"><span class="constructor">Str</span>.global_replace</code></a>, except that only the first substring matching the regular expression is replaced.<br> </div> <pre><span class="keyword">val</span> <a name="VALglobal_substitute"></a>global_substitute : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> (string -> string) -> string -> string</code></pre><div class="info"> <code class="code">global_substitute regexp subst s</code> returns a string identical to <code class="code">s</code>, except that all substrings of <code class="code">s</code> that match <code class="code">regexp</code> have been replaced by the result of function <code class="code">subst</code>. The function <code class="code">subst</code> is called once for each matching substring, and receives <code class="code">s</code> (the whole text) as argument.<br> </div> <pre><span class="keyword">val</span> <a name="VALsubstitute_first"></a>substitute_first : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> (string -> string) -> string -> string</code></pre><div class="info"> Same as <a href="Str.html#VALglobal_substitute"><code class="code"><span class="constructor">Str</span>.global_substitute</code></a>, except that only the first substring matching the regular expression is replaced.<br> </div> <pre><span class="keyword">val</span> <a name="VALreplace_matched"></a>replace_matched : <code class="type">string -> string -> string</code></pre><div class="info"> <code class="code">replace_matched repl s</code> returns the replacement text <code class="code">repl</code> in which <code class="code">\1</code>, <code class="code">\2</code>, etc. have been replaced by the text matched by the corresponding groups in the most recent matching operation. <code class="code">s</code> must be the same string that was matched during this matching operation.<br> </div> <br> <br> <a name="6_Splitting"></a> <table cellpadding=5 cellspacing=5 width="100%"> <tr class="title6"><td><div align=center> <span class="title6">Splitting</span> </div> </td> </tr> </table> <br><br> <pre><span class="keyword">val</span> <a name="VALsplit"></a>split : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> string list</code></pre><div class="info"> <code class="code">split r s</code> splits <code class="code">s</code> into substrings, taking as delimiters the substrings that match <code class="code">r</code>, and returns the list of substrings. For instance, <code class="code">split (regexp <span class="string">"[ \t]+"</span>) s</code> splits <code class="code">s</code> into blank-separated words. An occurrence of the delimiter at the beginning and at the end of the string is ignored.<br> </div> <pre><span class="keyword">val</span> <a name="VALbounded_split"></a>bounded_split : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> int -> string list</code></pre><div class="info"> Same as <a href="Str.html#VALsplit"><code class="code"><span class="constructor">Str</span>.split</code></a>, but splits into at most <code class="code">n</code> substrings, where <code class="code">n</code> is the extra integer parameter.<br> </div> <pre><span class="keyword">val</span> <a name="VALsplit_delim"></a>split_delim : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> string list</code></pre><div class="info"> Same as <a href="Str.html#VALsplit"><code class="code"><span class="constructor">Str</span>.split</code></a> but occurrences of the delimiter at the beginning and at the end of the string are recognized and returned as empty strings in the result. For instance, <code class="code">split_delim (regexp <span class="string">" "</span>) <span class="string">" abc "</span></code> returns <code class="code">[<span class="string">""</span>; <span class="string">"abc"</span>; <span class="string">""</span>]</code>, while <code class="code">split</code> with the same arguments returns <code class="code">[<span class="string">"abc"</span>]</code>.<br> </div> <pre><span class="keyword">val</span> <a name="VALbounded_split_delim"></a>bounded_split_delim : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> int -> string list</code></pre><div class="info"> Same as <a href="Str.html#VALbounded_split"><code class="code"><span class="constructor">Str</span>.bounded_split</code></a>, but occurrences of the delimiter at the beginning and at the end of the string are recognized and returned as empty strings in the result. For instance, <code class="code">split_delim (regexp <span class="string">" "</span>) <span class="string">" abc "</span></code> returns <code class="code">[<span class="string">""</span>; <span class="string">"abc"</span>; <span class="string">""</span>]</code>, while <code class="code">split</code> with the same arguments returns <code class="code">[<span class="string">"abc"</span>]</code>.<br> </div> <br><code><span class="keyword">type</span> <a name="TYPEsplit_result"></a>split_result =<br></code><table border="0" cellpadding="1"> <tr> <td align="left" valign="top" > <code><span class="keyword">|</span></code></td> <td align="left" valign="top" > <code><span class="constructor">Text</span> <span class="keyword">of</span> <code class="type">string</code></code></td> </tr> <tr> <td align="left" valign="top" > <code><span class="keyword">|</span></code></td> <td align="left" valign="top" > <code><span class="constructor">Delim</span> <span class="keyword">of</span> <code class="type">string</code></code></td> </tr></table> <br> <pre><span class="keyword">val</span> <a name="VALfull_split"></a>full_split : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> <a href="Str.html#TYPEsplit_result">split_result</a> list</code></pre><div class="info"> Same as <a href="Str.html#VALsplit_delim"><code class="code"><span class="constructor">Str</span>.split_delim</code></a>, but returns the delimiters as well as the substrings contained between delimiters. The former are tagged <code class="code"><span class="constructor">Delim</span></code> in the result list; the latter are tagged <code class="code"><span class="constructor">Text</span></code>. For instance, <code class="code">full_split (regexp <span class="string">"[{}]"</span>) <span class="string">"{ab}"</span></code> returns <code class="code">[<span class="constructor">Delim</span> <span class="string">"{"</span>; <span class="constructor">Text</span> <span class="string">"ab"</span>; <span class="constructor">Delim</span> <span class="string">"}"</span>]</code>.<br> </div> <pre><span class="keyword">val</span> <a name="VALbounded_full_split"></a>bounded_full_split : <code class="type"><a href="Str.html#TYPEregexp">regexp</a> -> string -> int -> <a href="Str.html#TYPEsplit_result">split_result</a> list</code></pre><div class="info"> Same as <a href="Str.html#VALbounded_split_delim"><code class="code"><span class="constructor">Str</span>.bounded_split_delim</code></a>, but returns the delimiters as well as the substrings contained between delimiters. The former are tagged <code class="code"><span class="constructor">Delim</span></code> in the result list; the latter are tagged <code class="code"><span class="constructor">Text</span></code>. For instance, <code class="code">full_split (regexp <span class="string">"[{}]"</span>) <span class="string">"{ab}"</span></code> returns <code class="code">[<span class="constructor">Delim</span> <span class="string">"{"</span>; <span class="constructor">Text</span> <span class="string">"ab"</span>; <span class="constructor">Delim</span> <span class="string">"}"</span>]</code>.<br> </div> <br> <br> <a name="6_Extractingsubstrings"></a> <table cellpadding=5 cellspacing=5 width="100%"> <tr class="title6"><td><div align=center> <span class="title6">Extracting substrings</span> </div> </td> </tr> </table> <br><br> <pre><span class="keyword">val</span> <a name="VALstring_before"></a>string_before : <code class="type">string -> int -> string</code></pre><div class="info"> <code class="code">string_before s n</code> returns the substring of all characters of <code class="code">s</code> that precede position <code class="code">n</code> (excluding the character at position <code class="code">n</code>).<br> </div> <pre><span class="keyword">val</span> <a name="VALstring_after"></a>string_after : <code class="type">string -> int -> string</code></pre><div class="info"> <code class="code">string_after s n</code> returns the substring of all characters of <code class="code">s</code> that follow position <code class="code">n</code> (including the character at position <code class="code">n</code>).<br> </div> <pre><span class="keyword">val</span> <a name="VALfirst_chars"></a>first_chars : <code class="type">string -> int -> string</code></pre><div class="info"> <code class="code">first_chars s n</code> returns the first <code class="code">n</code> characters of <code class="code">s</code>. This is the same function as <a href="Str.html#VALstring_before"><code class="code"><span class="constructor">Str</span>.string_before</code></a>.<br> </div> <pre><span class="keyword">val</span> <a name="VALlast_chars"></a>last_chars : <code class="type">string -> int -> string</code></pre><div class="info"> <code class="code">last_chars s n</code> returns the last <code class="code">n</code> characters of <code class="code">s</code>.<br> </div> </body></html>