Sophie

Sophie

distrib > * > cooker > x86_64 > by-pkgid > c82fbb12a1c61d087a9308adf47a46ab > files > 342

ocaml-ocamlnet-devel-2.2.9-10.x86_64.rpm

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type">
<link rel="Start" href="index.html">
<link rel="previous" href="Netaccel_link.html">
<link rel="next" href="Netstring_str.html">
<link rel="Up" href="index.html">
<link title="Index of types" rel=Appendix href="index_types.html">
<link title="Index of exceptions" rel=Appendix href="index_exceptions.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of class attributes" rel=Appendix href="index_attributes.html">
<link title="Index of class methods" rel=Appendix href="index_methods.html">
<link title="Index of classes" rel=Appendix href="index_classes.html">
<link title="Index of class types" rel=Appendix href="index_class_types.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="Index of module types" rel=Appendix href="index_module_types.html">
<link title="Uq_gtk" rel="Chapter" href="Uq_gtk.html">
<link title="Equeue" rel="Chapter" href="Equeue.html">
<link title="Unixqueue" rel="Chapter" href="Unixqueue.html">
<link title="Uq_engines" rel="Chapter" href="Uq_engines.html">
<link title="Uq_socks5" rel="Chapter" href="Uq_socks5.html">
<link title="Unixqueue_mt" rel="Chapter" href="Unixqueue_mt.html">
<link title="Equeue_intro" rel="Chapter" href="Equeue_intro.html">
<link title="Uq_ssl" rel="Chapter" href="Uq_ssl.html">
<link title="Uq_tcl" rel="Chapter" href="Uq_tcl.html">
<link title="Netcgi_common" rel="Chapter" href="Netcgi_common.html">
<link title="Netcgi" rel="Chapter" href="Netcgi.html">
<link title="Netcgi_ajp" rel="Chapter" href="Netcgi_ajp.html">
<link title="Netcgi_scgi" rel="Chapter" href="Netcgi_scgi.html">
<link title="Netcgi_cgi" rel="Chapter" href="Netcgi_cgi.html">
<link title="Netcgi_fcgi" rel="Chapter" href="Netcgi_fcgi.html">
<link title="Netcgi_dbi" rel="Chapter" href="Netcgi_dbi.html">
<link title="Netcgi1_compat" rel="Chapter" href="Netcgi1_compat.html">
<link title="Netcgi_test" rel="Chapter" href="Netcgi_test.html">
<link title="Netcgi_porting" rel="Chapter" href="Netcgi_porting.html">
<link title="Netcgi_plex" rel="Chapter" href="Netcgi_plex.html">
<link title="Http_client" rel="Chapter" href="Http_client.html">
<link title="Telnet_client" rel="Chapter" href="Telnet_client.html">
<link title="Ftp_data_endpoint" rel="Chapter" href="Ftp_data_endpoint.html">
<link title="Ftp_client" rel="Chapter" href="Ftp_client.html">
<link title="Nethttpd_types" rel="Chapter" href="Nethttpd_types.html">
<link title="Nethttpd_kernel" rel="Chapter" href="Nethttpd_kernel.html">
<link title="Nethttpd_reactor" rel="Chapter" href="Nethttpd_reactor.html">
<link title="Nethttpd_engine" rel="Chapter" href="Nethttpd_engine.html">
<link title="Nethttpd_services" rel="Chapter" href="Nethttpd_services.html">
<link title="Nethttpd_plex" rel="Chapter" href="Nethttpd_plex.html">
<link title="Nethttpd_intro" rel="Chapter" href="Nethttpd_intro.html">
<link title="Netplex_types" rel="Chapter" href="Netplex_types.html">
<link title="Netplex_mp" rel="Chapter" href="Netplex_mp.html">
<link title="Netplex_mt" rel="Chapter" href="Netplex_mt.html">
<link title="Netplex_log" rel="Chapter" href="Netplex_log.html">
<link title="Netplex_controller" rel="Chapter" href="Netplex_controller.html">
<link title="Netplex_container" rel="Chapter" href="Netplex_container.html">
<link title="Netplex_sockserv" rel="Chapter" href="Netplex_sockserv.html">
<link title="Netplex_workload" rel="Chapter" href="Netplex_workload.html">
<link title="Netplex_main" rel="Chapter" href="Netplex_main.html">
<link title="Netplex_config" rel="Chapter" href="Netplex_config.html">
<link title="Netplex_kit" rel="Chapter" href="Netplex_kit.html">
<link title="Rpc_netplex" rel="Chapter" href="Rpc_netplex.html">
<link title="Netplex_cenv" rel="Chapter" href="Netplex_cenv.html">
<link title="Netplex_intro" rel="Chapter" href="Netplex_intro.html">
<link title="Netshm" rel="Chapter" href="Netshm.html">
<link title="Netshm_data" rel="Chapter" href="Netshm_data.html">
<link title="Netshm_hashtbl" rel="Chapter" href="Netshm_hashtbl.html">
<link title="Netshm_array" rel="Chapter" href="Netshm_array.html">
<link title="Netshm_intro" rel="Chapter" href="Netshm_intro.html">
<link title="Netconversion" rel="Chapter" href="Netconversion.html">
<link title="Netchannels" rel="Chapter" href="Netchannels.html">
<link title="Netstream" rel="Chapter" href="Netstream.html">
<link title="Mimestring" rel="Chapter" href="Mimestring.html">
<link title="Netmime" rel="Chapter" href="Netmime.html">
<link title="Netsendmail" rel="Chapter" href="Netsendmail.html">
<link title="Neturl" rel="Chapter" href="Neturl.html">
<link title="Netaddress" rel="Chapter" href="Netaddress.html">
<link title="Netbuffer" rel="Chapter" href="Netbuffer.html">
<link title="Netdate" rel="Chapter" href="Netdate.html">
<link title="Netencoding" rel="Chapter" href="Netencoding.html">
<link title="Netulex" rel="Chapter" href="Netulex.html">
<link title="Netaccel" rel="Chapter" href="Netaccel.html">
<link title="Netaccel_link" rel="Chapter" href="Netaccel_link.html">
<link title="Nethtml" rel="Chapter" href="Nethtml.html">
<link title="Netstring_str" rel="Chapter" href="Netstring_str.html">
<link title="Netstring_pcre" rel="Chapter" href="Netstring_pcre.html">
<link title="Netstring_mt" rel="Chapter" href="Netstring_mt.html">
<link title="Netmappings" rel="Chapter" href="Netmappings.html">
<link title="Netaux" rel="Chapter" href="Netaux.html">
<link title="Nethttp" rel="Chapter" href="Nethttp.html">
<link title="Netchannels_tut" rel="Chapter" href="Netchannels_tut.html">
<link title="Netmime_tut" rel="Chapter" href="Netmime_tut.html">
<link title="Netsendmail_tut" rel="Chapter" href="Netsendmail_tut.html">
<link title="Netulex_tut" rel="Chapter" href="Netulex_tut.html">
<link title="Neturl_tut" rel="Chapter" href="Neturl_tut.html">
<link title="Netsys" rel="Chapter" href="Netsys.html">
<link title="Netpop" rel="Chapter" href="Netpop.html">
<link title="Rpc_auth_dh" rel="Chapter" href="Rpc_auth_dh.html">
<link title="Rpc_key_service" rel="Chapter" href="Rpc_key_service.html">
<link title="Rpc_time" rel="Chapter" href="Rpc_time.html">
<link title="Rpc_auth_local" rel="Chapter" href="Rpc_auth_local.html">
<link title="Rtypes" rel="Chapter" href="Rtypes.html">
<link title="Xdr" rel="Chapter" href="Xdr.html">
<link title="Rpc" rel="Chapter" href="Rpc.html">
<link title="Rpc_program" rel="Chapter" href="Rpc_program.html">
<link title="Rpc_portmapper_aux" rel="Chapter" href="Rpc_portmapper_aux.html">
<link title="Rpc_packer" rel="Chapter" href="Rpc_packer.html">
<link title="Rpc_transport" rel="Chapter" href="Rpc_transport.html">
<link title="Rpc_client" rel="Chapter" href="Rpc_client.html">
<link title="Rpc_simple_client" rel="Chapter" href="Rpc_simple_client.html">
<link title="Rpc_portmapper_clnt" rel="Chapter" href="Rpc_portmapper_clnt.html">
<link title="Rpc_portmapper" rel="Chapter" href="Rpc_portmapper.html">
<link title="Rpc_server" rel="Chapter" href="Rpc_server.html">
<link title="Rpc_auth_sys" rel="Chapter" href="Rpc_auth_sys.html">
<link title="Rpc_intro" rel="Chapter" href="Rpc_intro.html">
<link title="Rpc_mapping_ref" rel="Chapter" href="Rpc_mapping_ref.html">
<link title="Rpc_ssl" rel="Chapter" href="Rpc_ssl.html">
<link title="Rpc_xti_client" rel="Chapter" href="Rpc_xti_client.html">
<link title="Shell_sys" rel="Chapter" href="Shell_sys.html">
<link title="Shell" rel="Chapter" href="Shell.html">
<link title="Shell_uq" rel="Chapter" href="Shell_uq.html">
<link title="Shell_mt" rel="Chapter" href="Shell_mt.html">
<link title="Shell_intro" rel="Chapter" href="Shell_intro.html">
<link title="Netsmtp" rel="Chapter" href="Netsmtp.html"><title>Ocamlnet 2 Reference Manual : Nethtml</title>
</head>
<body>
<div class="navbar"><a href="Netaccel_link.html">Previous</a>
&nbsp;<a href="index.html">Up</a>
&nbsp;<a href="Netstring_str.html">Next</a>
</div>
<center><h1>Module <a href="type_Nethtml.html">Nethtml</a></h1></center>
<br>
<pre><span class="keyword">module</span> Nethtml: <code class="code">sig</code> <a href="Nethtml.html">..</a> <code class="code">end</code></pre>Parsing of HTML<br>
<hr width="100%">
<br><code><span class="keyword">type</span> <a name="TYPEdocument"></a><code class="type"></code>document = </code><table class="typetable">
<tr>
<td align="left" valign="top" >
<code><span class="keyword">|</span></code></td>
<td align="left" valign="top" >
<code><span class="constructor">Element</span> <span class="keyword">of</span> <code class="type">(string * (string * string) list * <a href="Nethtml.html#TYPEdocument">document</a> list)</code></code></td>

</tr>
<tr>
<td align="left" valign="top" >
<code><span class="keyword">|</span></code></td>
<td align="left" valign="top" >
<code><span class="constructor">Data</span> <span class="keyword">of</span> <code class="type">string</code></code></td>

</tr></table>

<div class="info">
The type <code class="code">document</code> represents parsed HTML documents:
<p>

 <ul>
<li><code class="code">Element (name, args, subnodes)</code> is an element node for an element of
   type <code class="code">name</code> (i.e. written <code class="code">&lt;name ...&gt;...&lt;/name&gt;</code>) with arguments <code class="code">args</code>
   and subnodes <code class="code">subnodes</code> (the material within the element). The arguments
   are simply name/value pairs. Entity references (something like <code class="code">&amp;xy;</code>)
   occuring in the values are <b>not</b> resolved.
<p>

   Arguments without values (e.g. <code class="code">&lt;select name="x" multiple&gt;</code>: here,
   <code class="code">multiple</code> is such an argument) are represented as <code class="code">(name,name)</code>, i.e. the
   name is also returned as value.
<p>

   As argument names are case-insensitive, the names are all lowercase.</li>
<li><code class="code">Data s</code> is a character data node. Again, entity references are contained
   as such and not as what they mean.</li>
</ul>

<p>

 Character encodings: The parser is restricted to ASCII-compatible
 encodings (see the function <a href="Netconversion.html#VALis_ascii_compatible"><code class="code">Netconversion.is_ascii_compatible</code></a> for
 a definition). In order to read other encodings, the text must be
 first recoded to an ASCII-compatible encoding (example below).
 Names of elements and attributes must additionally be ASCII-only.<br>
</div>

<br>
We also need a type that declares how to handle the various tags.
 This is called a "simplified DTD", as it is derived from SGML DTDs,
 but simplified to the extent used in the HTML definition.<br>
<pre><span class="keyword">type</span> <a name="TYPEelement_class"></a><code class="type"></code>element_class = <code class="type">[ `Block | `Essential_block | `Everywhere | `Inline | `None ]</code> </pre>
<div class="info">
Element classes are a property used in the HTML DTD. For our purposes,
 we define element classes simply as an enumeration:<ul>
<li><code class="code">`Inline</code> is the class of inline HTML elements</li>
<li><code class="code">`Block</code> is the class of block HTML elements</li>
<li><code class="code">`Essential_block</code> is a sub-class of <code class="code">`Block</code> with the additional
   property that every start tag must be explicitly ended</li>
<li><code class="code">`None</code> means that the members of the class are neither block nor
   inline elements, but have to be handled specially</li>
<li><code class="code">`Everywhere</code> means that the members of the class can occur everywhere, 
   regardless of whether a constraint allows it or not.</li>
</ul>
<br>
</div>

<pre><span class="keyword">type</span> <a name="TYPEmodel_constraint"></a><code class="type"></code>model_constraint = <code class="type">[ `Any<br>       | `Block<br>       | `Elements of string list<br>       | `Empty<br>       | `Except of <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a> * <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a><br>       | `Flow<br>       | `Inline<br>       | `Or of <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a> * <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a><br>       | `Special<br>       | `Sub_exclusions of string list * <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a> ]</code> </pre>
<div class="info">
Model constraints define the possible sub elements of an element:<ul>
<li><code class="code">`Inline</code>: The sub elements must belong to the class <code class="code">`Inline</code></li>
<li><code class="code">`Block</code>: The sub elements must be members of the classes <code class="code">`Block</code> or 
   <code class="code">`Essential_block</code></li>
<li><code class="code">`Flow</code>: The sub elements must belong to the classes <code class="code">`Inline</code>, <code class="code">`Block</code>,
   or <code class="code">`Essential_block</code></li>
<li><code class="code">`Empty</code>: There are no sub elements</li>
<li><code class="code">`Any</code>: Any sub element is allowed</li>
<li><code class="code">`Special</code>: The element has special content (e.g. <code class="code">&lt;script&gt;</code>).
   Functionally equivalent to <code class="code">`Empty</code></li>
<li><code class="code">`Elements l</code>: Only these enumerated elements may occur as sub elements</li>
<li><code class="code">`Or(m1,m2)</code>: One of the constraints <code class="code">m1</code> or <code class="code">m2</code> must hold</li>
<li><code class="code">`Except(m1,m2)</code>: The constraint <code class="code">m1</code> must hold, and <code class="code">m2</code> must not hold</li>
<li><code class="code">`Sub_exclusions(l,m)</code>: The constraint <code class="code">m</code> must hold; furthermore, 
   the elements enumerated in list <code class="code">l</code> are not allowed as direct or
   indirect subelements, even if <code class="code">m</code> or the model of a subelement would
   allow them. The difference to <code class="code">`Except(m, `Elements l)</code> is that the
   exclusion is inherited to the subelements. The <code class="code">`Sub_exclusions</code>
   expression must be toplevel, i.e. it must not occur within an <code class="code">`Or</code>, 
   <code class="code">`Except</code>, or another <code class="code">'Sub_exclusions</code> expression.</li>
</ul>

 Note that the members of the class <code class="code">`Everywhere</code> are allowed everywhere,
 regardless of whether the model constraint allows them or not.
<p>

 Note that certain aspects are not modeled:<ul>
<li><code class="code">#PCDATA</code>: We do not specify where PCDATA is allowed and where not.</li>
<li>Order, Number: We do neither specify in which order the sub elements must
   occur nor how often they can occur</li>
<li>Inclusions: DTDs may describe that an element extraordinarily
   allows a list of elements in all sub elements. </li>
<li>Optional tags: Whether start or end tags can be omitted (to some extent,
   this can be expressed with <code class="code">`Essential_block</code>, however)</li>
</ul>
<br>
</div>

<pre><span class="keyword">type</span> <a name="TYPEsimplified_dtd"></a><code class="type"></code>simplified_dtd = <code class="type">(string * (<a href="Nethtml.html#TYPEelement_class">element_class</a> * <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a>)) list</code> </pre>
<div class="info">
A <code class="code">simplified_dtd</code> is an associative list of tuples
  <code class="code">(element_name, (element_class, constraint))</code>: For every <code class="code">element_name</code>
  it is declared that it is a member of <code class="code">element_class</code>, and that
  the sub elements must satisfy <code class="code">constraint</code>.
<p>

  It is not allowed to have several entries for the same element.<br>
</div>

<pre><span class="keyword">val</span> <a name="VALhtml40_dtd"></a>html40_dtd : <code class="type"><a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a></code></pre><div class="info">
The (transitional) HTML 4.0 DTD, expressed as <code class="code">simplified_dtd</code><br>
</div>
<pre><span class="keyword">val</span> <a name="VALrelaxed_html40_dtd"></a>relaxed_html40_dtd : <code class="type"><a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a></code></pre><div class="info">
A relaxed version of the HTML 4.0 DTD that matches better common
 practice. In particular, this DTD additionally allows that inline
 elements may span blocks. For example, 
 <pre><code class="code"> &lt;B&gt;text1 &lt;P&gt;text2 </code></pre>
 is parsed as
 <pre><code class="code"> &lt;B&gt;text1 &lt;P&gt;text2&lt;/P&gt;&lt;/B&gt; </code></pre>
 and not as
 <pre><code class="code"> &lt;B&gt;text1 &lt;/B&gt;&lt;P&gt;text2&lt;/P&gt; </code></pre>
 \- the latter is more correct (and parsed by <code class="code">html40_dtd</code>), but is not what
 users expect.
<p>

 Note that this is still not what many browsers implement. For example,
 Netscape treats most inline tags specially: <code class="code">&lt;B&gt;</code> switches bold on,
 <code class="code">&lt;/B&gt;</code> switches bold off. For example,
 <pre><code class="code"> &lt;A href='a'&gt;text1&lt;B&gt;text2&lt;A href='b'&gt;text3 </code></pre>
 is parsed as
 <pre><code class="code"> &lt;A href='a'&gt;text1&lt;B&gt;text2&lt;/B&gt;&lt;/A&gt;&lt;B&gt;&lt;A href='b'&gt;text3&lt;/A&gt;&lt;/B&gt; </code></pre>
 \- there is an extra <code class="code">B</code> element around the second anchor! (You can
 see what Netscape parses by loading a page into the "Composer".)
 IMHO it is questionable to consider inline tags as switches because
 this is totally outside of the HTML specification, and browsers may
 differ in that point.
<p>

 Furthermore, several elements are turned into essential blocks:
 <code class="code">TABLE</code>, <code class="code">UL</code>, <code class="code">OL</code>, and <code class="code">DL</code>. David Fox reported a problem with structures
 like:
 <pre><code class="code"> &lt;TABLE&gt;&lt;TR&gt;&lt;TD&gt;&lt;TABLE&gt;&lt;TR&gt;&lt;TD&gt;x&lt;/TD&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TABLE&gt;y&lt;/TD&gt;&lt;/TR&gt;&lt;/TABLE&gt; </code></pre>
 i.e. the <code class="code">TD</code> of the inner table has two end tags. Without additional
 help, the second <code class="code">&lt;/TD&gt;</code> would close the outer table cell. Because of
 this problem, tables are now essential meaning that it is not allowed
 to implicitly add a missing <code class="code">&lt;/TABLE&gt;</code>; every table element has to
 be explicitly ended. This rule seems to be what many browsers implement.<br>
</div>
<pre><span class="keyword">val</span> <a name="VALparse_document"></a>parse_document : <code class="type">?dtd:<a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a> -><br>       ?return_declarations:bool -><br>       ?return_pis:bool -><br>       ?return_comments:bool -> Lexing.lexbuf -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
Parses the HTML document from a <code class="code">lexbuf</code> and returns it. 
<p>

<br>
</div>
<div class="param_info"><code class="code">dtd</code> : specifies the DTD to use. By default, <code class="code">html40_dtd</code> is used which
   bases on the transitional HTML 4.0 DTD</div>
<div class="param_info"><code class="code">return_declarations</code> : if set, the parser returns <code class="code">&lt;!...&gt;</code> declarations
   as <code class="code">Element("!",["contents",c],[])</code> nodes, where <code class="code">c</code> is the string inside
   <code class="code">&lt;!</code> and <code class="code">&gt;</code>. - By default, declarations are skipped.</div>
<div class="param_info"><code class="code">return_pis</code> : if set, the parser returns <code class="code">&lt;?...&gt;</code> (or <code class="code">&lt;?...?&gt;</code>) processing
   instructions as <code class="code">Element("?",["contents",c],[])</code> nodes, where <code class="code">c</code> is the
   string inside <code class="code">&lt;?</code> and <code class="code">&gt;</code> (or <code class="code">?&gt;</code>). - By default, processing instructions
   are skipped.</div>
<div class="param_info"><code class="code">return_comments</code> : if set, the parser returns <code class="code">&lt;!--</code> .... <code class="code">--&gt;</code> comments
   as <code class="code">Element("--",["contents",c],[])</code> nodes, where <code class="code">c</code> is the string inside
   <code class="code">&lt;!--</code> and <code class="code">--&gt;</code>. - By default, comments are skipped.</div>
<pre><span class="keyword">val</span> <a name="VALparse"></a>parse : <code class="type">?dtd:<a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a> -><br>       ?return_declarations:bool -><br>       ?return_pis:bool -><br>       ?return_comments:bool -> <a href="Netchannels.in_obj_channel.html">Netchannels.in_obj_channel</a> -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
Parses the HTML document from an object channel and returns it.
 For example, to parse the HTML string <code class="code">s</code>:
 <pre><code class="code"> let ch = Netchannels.input_string s in
 let doc = parse ch
 </code></pre>
<p>

 Arguments are the same as in <code class="code">parse_document</code>.<br>
</div>
<br>
<b>Note on XHTML</b>
<p>

 The parser can read XHTML, as long as the following XML features are not
 used:<ul>
<li>Internal DTD subset, i.e. <code class="code">&lt;!DOCTYPE html ... [ ... ]&gt;</code></li>
<li>External entities</li>
<li><code class="code">&lt;![CDATA[</code></li>
<li><code class="code">&lt;![INCLUDE[</code></li>
<li><code class="code">&lt;![IGNORE[</code></li>
</ul>

 The following XML features are ok:<ul>
<li>Processing instructions</li>
<li>Empty elements (e.g. <code class="code">&lt;br/&gt;</code>) as long as the element is declared as 
   <code class="code">`Empty</code>.</li>
</ul>
<br>
<br>
<b>Note on Character Encodings</b>
<p>

 The parser can only read character streams that are encoded in an ASCII-
 compatible way. For example, it is possible to read a UTF-8-encoded
 stream, but not a UTF-16-encoded stream. All bytes between 1 and 127
 are taken as ASCII, and other bytes are ignored (copied from input
 to output).
<p>

 Non-ASCII-compatible streams must be recoded first. For example, to
 read a UTF-16-encoded netchannel <code class="code">ch</code>, use:
<p>

 <pre><code class="code"> let p = 
   new Netconversion.recoding_pipe ~in_enc:`Enc_utf16 ~out_enc:`Enc_utf8 () in
 let ch' =
   new Netchannels.input_filter ch p in
 let doc =
   Nethtml.parse ch' in
 ch' # close_in();
 ch # close_in();
 </code></pre><br>
<pre><span class="keyword">val</span> <a name="VALdecode"></a>decode : <code class="type">?enc:<a href="Netconversion.html#TYPEencoding">Netconversion.encoding</a> -><br>       ?subst:(int -> string) -><br>       ?entity_base:[ `Empty | `Html | `Xml ] -><br>       ?lookup:(string -> string) -> <a href="Nethtml.html#TYPEdocument">document</a> list -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
Converts entities <code class="code">&amp;name;</code> and <code class="code">&amp;#num;</code> into the corresponding 
 characters. The argument <code class="code">enc</code> must indicate the character set of
 the document (by default ISO-8859-1 for backwards compatibility).
 If a character cannot be represented in this encoding, the function
 <code class="code">subst</code> is called (input is the Unicode code point, output is the
 substituted string). By default, the function fails if such a 
 character is found.
<p>

 The arg <code class="code">entity_base</code> selects which entities can be converted
 (see <a href="Netencoding.Html.html#VALdecode"><code class="code">Netencoding.Html.decode</code></a>). The function <code class="code">lookup</code> is called
 for all unknown <code class="code">&amp;name;</code> entities. By default, this function fails.
<p>

 Note: Declarations, processing instructions, and comments are not
 decoded.<br>
</div>
<pre><span class="keyword">val</span> <a name="VALencode"></a>encode : <code class="type">?enc:<a href="Netconversion.html#TYPEencoding">Netconversion.encoding</a> -><br>       ?prefer_name:bool -> <a href="Nethtml.html#TYPEdocument">document</a> list -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
Converts problematic characters to their corresponding
 entities. The argument <code class="code">enc</code> must indicate the character set of
 the document (by default ISO-8859-1 for backwards compatibility).
 If <code class="code">prefer_name</code>, the algorithm tries to find the named entities
 (<code class="code">&amp;name;</code>); otherwise only numeric entities (<code class="code">&amp;#num;</code>) are generated.
 Names are preferred by default.
<p>

 Note: Declarations, processing instructions, and comments are not
 encoded.<br>
</div>
<pre><span class="keyword">val</span> <a name="VALmap_list"></a>map_list : <code class="type">(string -> string) -> <a href="Nethtml.html#TYPEdocument">document</a> list -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
<code class="code">map_list f doclst</code>:
 Applies <code class="code">f</code> to all attribute values and data strings (except
 the attributes of "?", "!", or "--" nodes). 
<p>

 This can be used to change the text encoding of a parsed document:
 <pre><code class="code"> let doc' = map_list String.lowercase doc
 </code></pre>
 converts all text data to lowercase characters.<br>
</div>
<pre><span class="keyword">val</span> <a name="VALwrite"></a>write : <code class="type">?dtd:<a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a> -><br>       <a href="Netchannels.out_obj_channel.html">Netchannels.out_obj_channel</a> -> <a href="Nethtml.html#TYPEdocument">document</a> list -> unit</code></pre><div class="info">
Writes the document to the output channel. No additional encoding or
 decoding happens.
<p>

 Empty elements are written without end tag; the rest is written 
 unabbreviated.
<p>

 Example: To write the document to a file:
 <pre><code class="code"> let f = open_out "filename" in
 let ch = new Netchannels.output_channel f in
 write ch doc;
 ch # close_out()
 </code></pre>
<p>

<br>
</div>
<div class="param_info"><code class="code">dtd</code> : The assumed simplified DTD, by default <code class="code">html40_dtd</code></div>
</body></html>