Sophie

Sophie

distrib > Mageia > 4 > x86_64 > by-pkgid > 1abfe597bc89458ccaa645cd148862bb > files > 395

ocaml-ocamlnet-doc-3.7.3-3.mga4.noarch.rpm

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type">
<link rel="Start" href="index.html">
<link rel="previous" href="Netaccel_link.html">
<link rel="next" href="Netstring_str.html">
<link rel="Up" href="index.html">
<link title="Index of types" rel=Appendix href="index_types.html">
<link title="Index of exceptions" rel=Appendix href="index_exceptions.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of class attributes" rel=Appendix href="index_attributes.html">
<link title="Index of class methods" rel=Appendix href="index_methods.html">
<link title="Index of classes" rel=Appendix href="index_classes.html">
<link title="Index of class types" rel=Appendix href="index_class_types.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="Index of module types" rel=Appendix href="index_module_types.html">
<link title="Uq_gtk" rel="Chapter" href="Uq_gtk.html">
<link title="Equeue" rel="Chapter" href="Equeue.html">
<link title="Unixqueue" rel="Chapter" href="Unixqueue.html">
<link title="Unixqueue_pollset" rel="Chapter" href="Unixqueue_pollset.html">
<link title="Unixqueue_select" rel="Chapter" href="Unixqueue_select.html">
<link title="Uq_resolver" rel="Chapter" href="Uq_resolver.html">
<link title="Uq_engines" rel="Chapter" href="Uq_engines.html">
<link title="Uq_socks5" rel="Chapter" href="Uq_socks5.html">
<link title="Uq_io" rel="Chapter" href="Uq_io.html">
<link title="Uq_lwt" rel="Chapter" href="Uq_lwt.html">
<link title="Uq_libevent" rel="Chapter" href="Uq_libevent.html">
<link title="Uq_mt" rel="Chapter" href="Uq_mt.html">
<link title="Equeue_intro" rel="Chapter" href="Equeue_intro.html">
<link title="Equeue_howto" rel="Chapter" href="Equeue_howto.html">
<link title="Uq_ssl" rel="Chapter" href="Uq_ssl.html">
<link title="Https_client" rel="Chapter" href="Https_client.html">
<link title="Uq_tcl" rel="Chapter" href="Uq_tcl.html">
<link title="Netcamlbox" rel="Chapter" href="Netcamlbox.html">
<link title="Netcgi_apache" rel="Chapter" href="Netcgi_apache.html">
<link title="Netcgi_modtpl" rel="Chapter" href="Netcgi_modtpl.html">
<link title="Netcgi_common" rel="Chapter" href="Netcgi_common.html">
<link title="Netcgi" rel="Chapter" href="Netcgi.html">
<link title="Netcgi_ajp" rel="Chapter" href="Netcgi_ajp.html">
<link title="Netcgi_scgi" rel="Chapter" href="Netcgi_scgi.html">
<link title="Netcgi_cgi" rel="Chapter" href="Netcgi_cgi.html">
<link title="Netcgi_fcgi" rel="Chapter" href="Netcgi_fcgi.html">
<link title="Netcgi_dbi" rel="Chapter" href="Netcgi_dbi.html">
<link title="Netcgi1_compat" rel="Chapter" href="Netcgi1_compat.html">
<link title="Netcgi_test" rel="Chapter" href="Netcgi_test.html">
<link title="Netcgi_porting" rel="Chapter" href="Netcgi_porting.html">
<link title="Netcgi_plex" rel="Chapter" href="Netcgi_plex.html">
<link title="Http_client_conncache" rel="Chapter" href="Http_client_conncache.html">
<link title="Http_client" rel="Chapter" href="Http_client.html">
<link title="Telnet_client" rel="Chapter" href="Telnet_client.html">
<link title="Ftp_data_endpoint" rel="Chapter" href="Ftp_data_endpoint.html">
<link title="Ftp_client" rel="Chapter" href="Ftp_client.html">
<link title="Http_fs" rel="Chapter" href="Http_fs.html">
<link title="Ftp_fs" rel="Chapter" href="Ftp_fs.html">
<link title="Netclient_tut" rel="Chapter" href="Netclient_tut.html">
<link title="Netgssapi" rel="Chapter" href="Netgssapi.html">
<link title="Nethttpd_types" rel="Chapter" href="Nethttpd_types.html">
<link title="Nethttpd_kernel" rel="Chapter" href="Nethttpd_kernel.html">
<link title="Nethttpd_reactor" rel="Chapter" href="Nethttpd_reactor.html">
<link title="Nethttpd_engine" rel="Chapter" href="Nethttpd_engine.html">
<link title="Nethttpd_services" rel="Chapter" href="Nethttpd_services.html">
<link title="Nethttpd_plex" rel="Chapter" href="Nethttpd_plex.html">
<link title="Nethttpd_util" rel="Chapter" href="Nethttpd_util.html">
<link title="Nethttpd_intro" rel="Chapter" href="Nethttpd_intro.html">
<link title="Netmech_scram" rel="Chapter" href="Netmech_scram.html">
<link title="Netmech_scram_gssapi" rel="Chapter" href="Netmech_scram_gssapi.html">
<link title="Netmcore" rel="Chapter" href="Netmcore.html">
<link title="Netmcore_camlbox" rel="Chapter" href="Netmcore_camlbox.html">
<link title="Netmcore_mempool" rel="Chapter" href="Netmcore_mempool.html">
<link title="Netmcore_heap" rel="Chapter" href="Netmcore_heap.html">
<link title="Netmcore_ref" rel="Chapter" href="Netmcore_ref.html">
<link title="Netmcore_array" rel="Chapter" href="Netmcore_array.html">
<link title="Netmcore_sem" rel="Chapter" href="Netmcore_sem.html">
<link title="Netmcore_mutex" rel="Chapter" href="Netmcore_mutex.html">
<link title="Netmcore_condition" rel="Chapter" href="Netmcore_condition.html">
<link title="Netmcore_queue" rel="Chapter" href="Netmcore_queue.html">
<link title="Netmcore_buffer" rel="Chapter" href="Netmcore_buffer.html">
<link title="Netmcore_matrix" rel="Chapter" href="Netmcore_matrix.html">
<link title="Netmcore_hashtbl" rel="Chapter" href="Netmcore_hashtbl.html">
<link title="Netmcore_process" rel="Chapter" href="Netmcore_process.html">
<link title="Netmcore_tut" rel="Chapter" href="Netmcore_tut.html">
<link title="Netmcore_basics" rel="Chapter" href="Netmcore_basics.html">
<link title="Netplex_types" rel="Chapter" href="Netplex_types.html">
<link title="Netplex_mp" rel="Chapter" href="Netplex_mp.html">
<link title="Netplex_mt" rel="Chapter" href="Netplex_mt.html">
<link title="Netplex_log" rel="Chapter" href="Netplex_log.html">
<link title="Netplex_controller" rel="Chapter" href="Netplex_controller.html">
<link title="Netplex_container" rel="Chapter" href="Netplex_container.html">
<link title="Netplex_sockserv" rel="Chapter" href="Netplex_sockserv.html">
<link title="Netplex_workload" rel="Chapter" href="Netplex_workload.html">
<link title="Netplex_main" rel="Chapter" href="Netplex_main.html">
<link title="Netplex_config" rel="Chapter" href="Netplex_config.html">
<link title="Netplex_kit" rel="Chapter" href="Netplex_kit.html">
<link title="Rpc_netplex" rel="Chapter" href="Rpc_netplex.html">
<link title="Netplex_cenv" rel="Chapter" href="Netplex_cenv.html">
<link title="Netplex_semaphore" rel="Chapter" href="Netplex_semaphore.html">
<link title="Netplex_sharedvar" rel="Chapter" href="Netplex_sharedvar.html">
<link title="Netplex_mutex" rel="Chapter" href="Netplex_mutex.html">
<link title="Netplex_encap" rel="Chapter" href="Netplex_encap.html">
<link title="Netplex_mbox" rel="Chapter" href="Netplex_mbox.html">
<link title="Netplex_intro" rel="Chapter" href="Netplex_intro.html">
<link title="Netplex_advanced" rel="Chapter" href="Netplex_advanced.html">
<link title="Netplex_admin" rel="Chapter" href="Netplex_admin.html">
<link title="Netshm" rel="Chapter" href="Netshm.html">
<link title="Netshm_data" rel="Chapter" href="Netshm_data.html">
<link title="Netshm_hashtbl" rel="Chapter" href="Netshm_hashtbl.html">
<link title="Netshm_array" rel="Chapter" href="Netshm_array.html">
<link title="Netshm_intro" rel="Chapter" href="Netshm_intro.html">
<link title="Netconversion" rel="Chapter" href="Netconversion.html">
<link title="Netchannels" rel="Chapter" href="Netchannels.html">
<link title="Netstream" rel="Chapter" href="Netstream.html">
<link title="Mimestring" rel="Chapter" href="Mimestring.html">
<link title="Netmime" rel="Chapter" href="Netmime.html">
<link title="Netsendmail" rel="Chapter" href="Netsendmail.html">
<link title="Neturl" rel="Chapter" href="Neturl.html">
<link title="Netaddress" rel="Chapter" href="Netaddress.html">
<link title="Netbuffer" rel="Chapter" href="Netbuffer.html">
<link title="Netdate" rel="Chapter" href="Netdate.html">
<link title="Netencoding" rel="Chapter" href="Netencoding.html">
<link title="Netulex" rel="Chapter" href="Netulex.html">
<link title="Netaccel" rel="Chapter" href="Netaccel.html">
<link title="Netaccel_link" rel="Chapter" href="Netaccel_link.html">
<link title="Nethtml" rel="Chapter" href="Nethtml.html">
<link title="Netstring_str" rel="Chapter" href="Netstring_str.html">
<link title="Netmappings" rel="Chapter" href="Netmappings.html">
<link title="Netaux" rel="Chapter" href="Netaux.html">
<link title="Nethttp" rel="Chapter" href="Nethttp.html">
<link title="Netpagebuffer" rel="Chapter" href="Netpagebuffer.html">
<link title="Netfs" rel="Chapter" href="Netfs.html">
<link title="Netglob" rel="Chapter" href="Netglob.html">
<link title="Netauth" rel="Chapter" href="Netauth.html">
<link title="Netsockaddr" rel="Chapter" href="Netsockaddr.html">
<link title="Netnumber" rel="Chapter" href="Netnumber.html">
<link title="Rtypes" rel="Chapter" href="Rtypes.html">
<link title="Xdr_mstring" rel="Chapter" href="Xdr_mstring.html">
<link title="Xdr" rel="Chapter" href="Xdr.html">
<link title="Netcompression" rel="Chapter" href="Netcompression.html">
<link title="Netunichar" rel="Chapter" href="Netunichar.html">
<link title="Netchannels_tut" rel="Chapter" href="Netchannels_tut.html">
<link title="Netmime_tut" rel="Chapter" href="Netmime_tut.html">
<link title="Netsendmail_tut" rel="Chapter" href="Netsendmail_tut.html">
<link title="Netulex_tut" rel="Chapter" href="Netulex_tut.html">
<link title="Neturl_tut" rel="Chapter" href="Neturl_tut.html">
<link title="Netstring_pcre" rel="Chapter" href="Netstring_pcre.html">
<link title="Netsys" rel="Chapter" href="Netsys.html">
<link title="Netsys_posix" rel="Chapter" href="Netsys_posix.html">
<link title="Netsys_pollset" rel="Chapter" href="Netsys_pollset.html">
<link title="Netlog" rel="Chapter" href="Netlog.html">
<link title="Netexn" rel="Chapter" href="Netexn.html">
<link title="Netsys_win32" rel="Chapter" href="Netsys_win32.html">
<link title="Netsys_pollset_posix" rel="Chapter" href="Netsys_pollset_posix.html">
<link title="Netsys_pollset_win32" rel="Chapter" href="Netsys_pollset_win32.html">
<link title="Netsys_pollset_generic" rel="Chapter" href="Netsys_pollset_generic.html">
<link title="Netsys_signal" rel="Chapter" href="Netsys_signal.html">
<link title="Netsys_oothr" rel="Chapter" href="Netsys_oothr.html">
<link title="Netsys_xdr" rel="Chapter" href="Netsys_xdr.html">
<link title="Netsys_rng" rel="Chapter" href="Netsys_rng.html">
<link title="Netsys_types" rel="Chapter" href="Netsys_types.html">
<link title="Netsys_mem" rel="Chapter" href="Netsys_mem.html">
<link title="Netsys_tmp" rel="Chapter" href="Netsys_tmp.html">
<link title="Netsys_sem" rel="Chapter" href="Netsys_sem.html">
<link title="Netsys_pmanage" rel="Chapter" href="Netsys_pmanage.html">
<link title="Netgzip" rel="Chapter" href="Netgzip.html">
<link title="Netpop" rel="Chapter" href="Netpop.html">
<link title="Rpc_auth_dh" rel="Chapter" href="Rpc_auth_dh.html">
<link title="Rpc_key_service" rel="Chapter" href="Rpc_key_service.html">
<link title="Rpc_time" rel="Chapter" href="Rpc_time.html">
<link title="Rpc_auth_local" rel="Chapter" href="Rpc_auth_local.html">
<link title="Rpc" rel="Chapter" href="Rpc.html">
<link title="Rpc_program" rel="Chapter" href="Rpc_program.html">
<link title="Rpc_util" rel="Chapter" href="Rpc_util.html">
<link title="Rpc_portmapper_aux" rel="Chapter" href="Rpc_portmapper_aux.html">
<link title="Rpc_packer" rel="Chapter" href="Rpc_packer.html">
<link title="Rpc_transport" rel="Chapter" href="Rpc_transport.html">
<link title="Rpc_client" rel="Chapter" href="Rpc_client.html">
<link title="Rpc_simple_client" rel="Chapter" href="Rpc_simple_client.html">
<link title="Rpc_portmapper_clnt" rel="Chapter" href="Rpc_portmapper_clnt.html">
<link title="Rpc_portmapper" rel="Chapter" href="Rpc_portmapper.html">
<link title="Rpc_server" rel="Chapter" href="Rpc_server.html">
<link title="Rpc_auth_sys" rel="Chapter" href="Rpc_auth_sys.html">
<link title="Rpc_auth_gssapi" rel="Chapter" href="Rpc_auth_gssapi.html">
<link title="Rpc_proxy" rel="Chapter" href="Rpc_proxy.html">
<link title="Rpc_intro" rel="Chapter" href="Rpc_intro.html">
<link title="Rpc_mapping_ref" rel="Chapter" href="Rpc_mapping_ref.html">
<link title="Rpc_intro_gss" rel="Chapter" href="Rpc_intro_gss.html">
<link title="Rpc_ssl" rel="Chapter" href="Rpc_ssl.html">
<link title="Rpc_xti_client" rel="Chapter" href="Rpc_xti_client.html">
<link title="Shell_sys" rel="Chapter" href="Shell_sys.html">
<link title="Shell" rel="Chapter" href="Shell.html">
<link title="Shell_uq" rel="Chapter" href="Shell_uq.html">
<link title="Shell_fs" rel="Chapter" href="Shell_fs.html">
<link title="Shell_intro" rel="Chapter" href="Shell_intro.html">
<link title="Netsmtp" rel="Chapter" href="Netsmtp.html">
<link title="Intro" rel="Chapter" href="Intro.html">
<link title="Platform" rel="Chapter" href="Platform.html">
<link title="Foreword" rel="Chapter" href="Foreword.html">
<link title="Ipv6" rel="Chapter" href="Ipv6.html">
<link title="Regexp" rel="Chapter" href="Regexp.html"><title>Ocamlnet 3 Reference Manual : Nethtml</title>
</head>
<body>
<div class="navbar"><a class="pre" href="Netaccel_link.html" title="Netaccel_link">Previous</a>
&nbsp;<a class="up" href="index.html" title="Index">Up</a>
&nbsp;<a class="post" href="Netstring_str.html" title="Netstring_str">Next</a>
</div>
<h1>Module <a href="type_Nethtml.html">Nethtml</a></h1>
<pre><span class="keyword">module</span> Nethtml: <code class="code">sig</code> <a href="Nethtml.html">..</a> <code class="code">end</code></pre><div class="info">
Parsing of HTML<br>
</div>
<hr width="100%">
<pre><code><span id="TYPEdocument"><span class="keyword">type</span> <code class="type"></code>document</span> = </code></pre><table class="typetable">
<tr>
<td align="left" valign="top" >
<code><span class="keyword">|</span></code></td>
<td align="left" valign="top" >
<code><span id="TYPEELTdocument.Element"><span class="constructor">Element</span></span> <span class="keyword">of</span> <code class="type">(string * (string * string) list * <a href="Nethtml.html#TYPEdocument">document</a> list)</code></code></td>

</tr>
<tr>
<td align="left" valign="top" >
<code><span class="keyword">|</span></code></td>
<td align="left" valign="top" >
<code><span id="TYPEELTdocument.Data"><span class="constructor">Data</span></span> <span class="keyword">of</span> <code class="type">string</code></code></td>

</tr></table>

<div class="info">
The type <code class="code">document</code> represents parsed HTML documents:
<p>

 <ul>
<li><code class="code">Element (name, args, subnodes)</code> is an element node for an element of
   type <code class="code">name</code> (i.e. written <code class="code">&lt;name ...&gt;...&lt;/name&gt;</code>) with arguments <code class="code">args</code>
   and subnodes <code class="code">subnodes</code> (the material within the element). The arguments
   are simply name/value pairs. Entity references (something like <code class="code">&amp;xy;</code>)
   occuring in the values are <b>not</b> resolved.
<p>

   Arguments without values (e.g. <code class="code">&lt;select name="x" multiple&gt;</code>: here,
   <code class="code">multiple</code> is such an argument) are represented as <code class="code">(name,name)</code>, i.e. the
   name is also returned as value.
<p>

   As argument names are case-insensitive, the names are all lowercase.</li>
<li><code class="code">Data s</code> is a character data node. Again, entity references are contained
   as such and not as what they mean.</li>
</ul>

<p>

 Character encodings: The parser is restricted to ASCII-compatible
 encodings (see the function <a href="Netconversion.html#VALis_ascii_compatible"><code class="code">Netconversion.is_ascii_compatible</code></a> for
 a definition). In order to read other encodings, the text must be
 first recoded to an ASCII-compatible encoding (example below).
 Names of elements and attributes must additionally be ASCII-only.<br>
</div>

<br>
We also need a type that declares how to handle the various tags.
    This is called a "simplified DTD", as it is derived from SGML DTDs,
    but simplified to the extent used in the HTML definition.
<p>

    The HTML specification (http://www.w3.org/TR/1999/REC-html401-19991224)
    is the reference for the HTML DTD. You can see there that
    most HTML elements are either an inline element, a block element, or
    both ("flow" element). The grammar of HTML is described in terms of
    these classes. For instance, a <code class="code">P</code> tag (paragraph) is a block element and
    contains block elements whereas <code class="code">B</code> (bold) is an inline element and
    contains inline elements. From this follows that you cannot put a <code class="code">P</code>
    inside a <code class="code">B</code>: <code class="code">&lt;B&gt;&lt;P&gt;something&lt;/P&gt;&lt;/B&gt;</code> is illegal.
<p>

    The parser needs this information to resolve such input, i.e. do
    something with bad HTML. As HTML allows tag minimization (many end tags
    can be omitted), the parser can read this as: <code class="code">&lt;B&gt;&lt;/B&gt;&lt;P&gt;something&lt;/P&gt;</code>
    (and the <code class="code">&lt;/B&gt;</code> in the input is ignored).
<p>

    If all start and all end tags are written out, changing the
    simplified_dtd does not make any difference.
<p>

    There is no normative text that says how to read bad HTML. Because of
    this, it is - to a large degree - an interpretation of HTML what you put
    into <code class="code">simplified_dtd</code>. We provide two versions:<ul>
<li><code class="code">html40_dtd</code>: tries to be close to the official spec</li>
<li><code class="code">relaxed_html40_dtd</code>: tries to be close to what common web browsers
      implement</li>
</ul>
<br>
<pre><span id="TYPEelement_class"><span class="keyword">type</span> <code class="type"></code>element_class</span> = <code class="type">[ `Block | `Essential_block | `Everywhere | `Inline | `None ]</code> </pre>
<div class="info">
Element classes are a property used in the HTML DTD. For our purposes,
 we define element classes simply as an enumeration:<ul>
<li><code class="code">`Inline</code> is the class of inline HTML elements</li>
<li><code class="code">`Block</code> is the class of block HTML elements</li>
<li><code class="code">`Essential_block</code> is a sub-class of <code class="code">`Block</code> with the additional
   property that every start tag must be explicitly ended</li>
<li><code class="code">`None</code> means that the members of the class are neither block nor
   inline elements, but have to be handled specially</li>
<li><code class="code">`Everywhere</code> means that the members of the class can occur everywhere, 
   regardless of whether a constraint allows it or not.</li>
</ul>
<br>
</div>

<pre><span id="TYPEmodel_constraint"><span class="keyword">type</span> <code class="type"></code>model_constraint</span> = <code class="type">[ `Any<br>       | `Block<br>       | `Elements of string list<br>       | `Empty<br>       | `Except of <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a> * <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a><br>       | `Flow<br>       | `Inline<br>       | `Or of <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a> * <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a><br>       | `Special<br>       | `Sub_exclusions of string list * <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a> ]</code> </pre>
<div class="info">
Model constraints define the possible sub elements of an element:<ul>
<li><code class="code">`Inline</code>: The sub elements must belong to the class <code class="code">`Inline</code></li>
<li><code class="code">`Block</code>: The sub elements must be members of the classes <code class="code">`Block</code> or 
   <code class="code">`Essential_block</code></li>
<li><code class="code">`Flow</code>: The sub elements must belong to the classes <code class="code">`Inline</code>, <code class="code">`Block</code>,
   or <code class="code">`Essential_block</code></li>
<li><code class="code">`Empty</code>: There are no sub elements</li>
<li><code class="code">`Any</code>: Any sub element is allowed</li>
<li><code class="code">`Special</code>: The element has special content (e.g. <code class="code">&lt;script&gt;</code>).
   Functionally equivalent to <code class="code">`Empty</code></li>
<li><code class="code">`Elements l</code>: Only these enumerated elements may occur as sub elements</li>
<li><code class="code">`Or(m1,m2)</code>: One of the constraints <code class="code">m1</code> or <code class="code">m2</code> must hold</li>
<li><code class="code">`Except(m1,m2)</code>: The constraint <code class="code">m1</code> must hold, and <code class="code">m2</code> must not hold</li>
<li><code class="code">`Sub_exclusions(l,m)</code>: The constraint <code class="code">m</code> must hold; furthermore, 
   the elements enumerated in list <code class="code">l</code> are not allowed as direct or
   indirect subelements, even if <code class="code">m</code> or the model of a subelement would
   allow them. The difference to <code class="code">`Except(m, `Elements l)</code> is that the
   exclusion is inherited to the subelements. The <code class="code">`Sub_exclusions</code>
   expression must be toplevel, i.e. it must not occur within an <code class="code">`Or</code>, 
   <code class="code">`Except</code>, or another <code class="code">'Sub_exclusions</code> expression.</li>
</ul>

 Note that the members of the class <code class="code">`Everywhere</code> are allowed everywhere,
 regardless of whether the model constraint allows them or not.
<p>

 Note that certain aspects are not modeled:<ul>
<li><code class="code">#PCDATA</code>: We do not specify where PCDATA is allowed and where not.</li>
<li>Order, Number: We do neither specify in which order the sub elements must
   occur nor how often they can occur</li>
<li>Inclusions: DTDs may describe that an element extraordinarily
   allows a list of elements in all sub elements. </li>
<li>Optional tags: Whether start or end tags can be omitted (to some extent,
   this can be expressed with <code class="code">`Essential_block</code>, however)</li>
</ul>
<br>
</div>

<pre><span id="TYPEsimplified_dtd"><span class="keyword">type</span> <code class="type"></code>simplified_dtd</span> = <code class="type">(string * (<a href="Nethtml.html#TYPEelement_class">element_class</a> * <a href="Nethtml.html#TYPEmodel_constraint">model_constraint</a>)) list</code> </pre>
<div class="info">
A <code class="code">simplified_dtd</code> is an associative list of tuples
  <code class="code">(element_name, (element_class, constraint))</code>: For every <code class="code">element_name</code>
  it is declared that it is a member of <code class="code">element_class</code>, and that
  the sub elements must satisfy <code class="code">constraint</code>.
<p>

  It is not allowed to have several entries for the same element.<br>
</div>

<pre><span id="VALhtml40_dtd"><span class="keyword">val</span> html40_dtd</span> : <code class="type"><a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a></code></pre><div class="info">
The (transitional) HTML 4.0 DTD, expressed as <code class="code">simplified_dtd</code><br>
</div>
<pre><span id="VALrelaxed_html40_dtd"><span class="keyword">val</span> relaxed_html40_dtd</span> : <code class="type"><a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a></code></pre><div class="info">
A relaxed version of the HTML 4.0 DTD that matches better common
 practice. In particular, this DTD additionally allows that inline
 elements may span blocks. For example, 
 <pre class="codepre"><code class="code"> &lt;B&gt;text1 &lt;P&gt;text2 </code></pre>
 is parsed as
 <pre class="codepre"><code class="code"> &lt;B&gt;text1 &lt;P&gt;text2&lt;/P&gt;&lt;/B&gt; </code></pre>
 and not as
 <pre class="codepre"><code class="code"> &lt;B&gt;text1 &lt;/B&gt;&lt;P&gt;text2&lt;/P&gt; </code></pre>
 \- the latter is more correct (and parsed by <code class="code">html40_dtd</code>), but is not what
 users expect.
<p>

 Note that this is still not what many browsers implement. For example,
 Netscape treats most inline tags specially: <code class="code">&lt;B&gt;</code> switches bold on,
 <code class="code">&lt;/B&gt;</code> switches bold off. For example,
 <pre class="codepre"><code class="code"> &lt;A href='a'&gt;text1&lt;B&gt;text2&lt;A href='b'&gt;text3 </code></pre>
 is parsed as
 <pre class="codepre"><code class="code"> &lt;A href='a'&gt;text1&lt;B&gt;text2&lt;/B&gt;&lt;/A&gt;&lt;B&gt;&lt;A href='b'&gt;text3&lt;/A&gt;&lt;/B&gt; </code></pre>
 \- there is an extra <code class="code">B</code> element around the second anchor! (You can
 see what Netscape parses by loading a page into the "Composer".)
 IMHO it is questionable to consider inline tags as switches because
 this is totally outside of the HTML specification, and browsers may
 differ in that point.
<p>

 Furthermore, several elements are turned into essential blocks:
 <code class="code">TABLE</code>, <code class="code">UL</code>, <code class="code">OL</code>, and <code class="code">DL</code>. David Fox reported a problem with structures
 like:
 <pre class="codepre"><code class="code"> &lt;TABLE&gt;&lt;TR&gt;&lt;TD&gt;&lt;TABLE&gt;&lt;TR&gt;&lt;TD&gt;x&lt;/TD&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TABLE&gt;y&lt;/TD&gt;&lt;/TR&gt;&lt;/TABLE&gt; </code></pre>
 i.e. the <code class="code">TD</code> of the inner table has two end tags. Without additional
 help, the second <code class="code">&lt;/TD&gt;</code> would close the outer table cell. Because of
 this problem, tables are now essential meaning that it is not allowed
 to implicitly add a missing <code class="code">&lt;/TABLE&gt;</code>; every table element has to
 be explicitly ended. This rule seems to be what many browsers implement.<br>
</div>
<pre><span id="VALparse_document"><span class="keyword">val</span> parse_document</span> : <code class="type">?dtd:<a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a> -><br>       ?return_declarations:bool -><br>       ?return_pis:bool -><br>       ?return_comments:bool -> Lexing.lexbuf -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
Parses the HTML document from a <code class="code">lexbuf</code> and returns it. 
<p>

<br>
</div>
<div class="param_info"><code class="code">dtd</code> : specifies the DTD to use. By default, <code class="code">html40_dtd</code> is used which
   bases on the transitional HTML 4.0 DTD</div>
<div class="param_info"><code class="code">return_declarations</code> : if set, the parser returns <code class="code">&lt;!...&gt;</code> declarations
   as <code class="code">Element("!",["contents",c],[])</code> nodes, where <code class="code">c</code> is the string inside
   <code class="code">&lt;!</code> and <code class="code">&gt;</code>. - By default, declarations are skipped.</div>
<div class="param_info"><code class="code">return_pis</code> : if set, the parser returns <code class="code">&lt;?...&gt;</code> (or <code class="code">&lt;?...?&gt;</code>) processing
   instructions as <code class="code">Element("?",["contents",c],[])</code> nodes, where <code class="code">c</code> is the
   string inside <code class="code">&lt;?</code> and <code class="code">&gt;</code> (or <code class="code">?&gt;</code>). - By default, processing instructions
   are skipped.</div>
<div class="param_info"><code class="code">return_comments</code> : if set, the parser returns <code class="code">&lt;!--</code> .... <code class="code">--&gt;</code> comments
   as <code class="code">Element("--",["contents",c],[])</code> nodes, where <code class="code">c</code> is the string inside
   <code class="code">&lt;!--</code> and <code class="code">--&gt;</code>. - By default, comments are skipped.</div>
<pre><span id="VALparse"><span class="keyword">val</span> parse</span> : <code class="type">?dtd:<a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a> -><br>       ?return_declarations:bool -><br>       ?return_pis:bool -><br>       ?return_comments:bool -> <a href="Netchannels.in_obj_channel-c.html">Netchannels.in_obj_channel</a> -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
Parses the HTML document from an object channel and returns it.
 For example, to parse the HTML string <code class="code">s</code>:
 <pre class="codepre"><code class="code"> let ch = Netchannels.input_string s in
 let doc = parse ch
 </code></pre>
<p>

 Arguments are the same as in <code class="code">parse_document</code>.<br>
</div>
<br>
<b>Note on XHTML</b>
<p>

 The parser can read XHTML, as long as the following XML features are not
 used:<ul>
<li>Internal DTD subset, i.e. <code class="code">&lt;!DOCTYPE html ... [ ... ]&gt;</code></li>
<li>External entities</li>
<li><code class="code">&lt;![CDATA[</code></li>
<li><code class="code">&lt;![INCLUDE[</code></li>
<li><code class="code">&lt;![IGNORE[</code></li>
</ul>

 The following XML features are ok:<ul>
<li>Processing instructions</li>
<li>Empty elements (e.g. <code class="code">&lt;br/&gt;</code>) as long as the element is declared as 
   <code class="code">`Empty</code>.</li>
</ul>
<br>
<br>
<b>Note on Character Encodings</b>
<p>

 The parser can only read character streams that are encoded in an ASCII-
 compatible way. For example, it is possible to read a UTF-8-encoded
 stream, but not a UTF-16-encoded stream. All bytes between 1 and 127
 are taken as ASCII, and other bytes are ignored (copied from input
 to output).
<p>

 Non-ASCII-compatible streams must be recoded first. For example, to
 read a UTF-16-encoded netchannel <code class="code">ch</code>, use:
<p>

 <pre class="codepre"><code class="code"> let p = 
   new Netconversion.recoding_pipe ~in_enc:`Enc_utf16 ~out_enc:`Enc_utf8 () in
 let ch' =
   new Netchannels.input_filter ch p in
 let doc =
   Nethtml.parse ch' in
 ch' # close_in();
 ch # close_in();
 </code></pre><br>
<pre><span id="VALdecode"><span class="keyword">val</span> decode</span> : <code class="type">?enc:<a href="Netconversion.html#TYPEencoding">Netconversion.encoding</a> -><br>       ?subst:(int -> string) -><br>       ?entity_base:[ `Empty | `Html | `Xml ] -><br>       ?lookup:(string -> string) -><br>       ?dtd:<a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a> -> <a href="Nethtml.html#TYPEdocument">document</a> list -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
Converts entities <code class="code">&amp;name;</code> and <code class="code">&amp;#num;</code> into the corresponding 
 characters. The argument <code class="code">enc</code> must indicate the character set of
 the document (by default ISO-8859-1 for backwards compatibility).
 If a character cannot be represented in this encoding, the function
 <code class="code">subst</code> is called (input is the Unicode code point, output is the
 substituted string). By default, the function fails if such a 
 character is found.
<p>

 The arg <code class="code">entity_base</code> selects which entities can be converted
 (see <a href="Netencoding.Html.html#VALdecode"><code class="code">Netencoding.Html.decode</code></a>). The function <code class="code">lookup</code> is called
 for all unknown <code class="code">&amp;name;</code> entities. By default, this function fails.
<p>

 Declarations, processing instructions, and comments are not
 decoded. The same also applies to elements declared as <code class="code">`Special</code>
 in the DTD. The <code class="code">dtd</code> argument determines the DTD, by default
 <code class="code">html40_dtd</code> is assumed.<br>
</div>
<pre><span id="VALencode"><span class="keyword">val</span> encode</span> : <code class="type">?enc:<a href="Netconversion.html#TYPEencoding">Netconversion.encoding</a> -><br>       ?prefer_name:bool -><br>       ?dtd:<a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a> -> <a href="Nethtml.html#TYPEdocument">document</a> list -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
Converts problematic characters to their corresponding
 entities. The argument <code class="code">enc</code> must indicate the character set of
 the document (by default ISO-8859-1 for backwards compatibility).
 If <code class="code">prefer_name</code>, the algorithm tries to find the named entities
 (<code class="code">&amp;name;</code>); otherwise only numeric entities (<code class="code">&amp;#num;</code>) are generated.
 Names are preferred by default.
<p>

 Declarations, processing instructions, and comments are not
 encoded. The same also applies to elements declared as <code class="code">`Special</code>
 in the DTD. The <code class="code">dtd</code> argument determines the DTD, by default
 <code class="code">html40_dtd</code> is assumed.<br>
</div>
<pre><span id="VALmap_list"><span class="keyword">val</span> map_list</span> : <code class="type">(string -> string) -> <a href="Nethtml.html#TYPEdocument">document</a> list -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
<code class="code">map_list f doclst</code>:
 Applies <code class="code">f</code> to all attribute values and data strings (except
 the attributes of "?", "!", or "--" nodes). 
<p>

 This can be used to change the text encoding of a parsed document:
 <pre class="codepre"><code class="code"> let doc' = map_list String.lowercase doc
 </code></pre>
 converts all text data to lowercase characters.<br>
</div>
<pre><code><span id="TYPExmap_value"><span class="keyword">type</span> <code class="type"></code>xmap_value</span> = </code></pre><table class="typetable">
<tr>
<td align="left" valign="top" >
<code><span class="keyword">|</span></code></td>
<td align="left" valign="top" >
<code><span id="TYPEELTxmap_value.Xmap_attribute"><span class="constructor">Xmap_attribute</span></span> <span class="keyword">of</span> <code class="type">string * string * string</code></code></td>

</tr>
<tr>
<td align="left" valign="top" >
<code><span class="keyword">|</span></code></td>
<td align="left" valign="top" >
<code><span id="TYPEELTxmap_value.Xmap_data"><span class="constructor">Xmap_data</span></span> <span class="keyword">of</span> <code class="type">string option * string</code></code></td>

</tr></table>


<pre><span id="VALxmap_list"><span class="keyword">val</span> xmap_list</span> : <code class="type">(<a href="Nethtml.html#TYPExmap_value">xmap_value</a> -> string) -><br>       string option -> <a href="Nethtml.html#TYPEdocument">document</a> list -> <a href="Nethtml.html#TYPEdocument">document</a> list</code></pre><div class="info">
<code class="code">xmap_list f surrounding_element_opt doclst</code>: Similar to <code class="code">map_list</code>,
 the function <code class="code">f</code> is applied to all attribute values and data strings.
 Unlike <code class="code">map_list</code>, more information is passed to the callback function
 <code class="code">f</code>. This function is called with an <code class="code">xmap_value</code> argument:<ul>
<li><code class="code">Xmap_attribute(ename,aname,aval)</code>: The function is called for an
   attribute value of element <code class="code">ename</code>. The attribute is <code class="code">aname</code> and
   has the value <code class="code">aval</code>. The function must return the new value of
   the attribute (i.e. <code class="code">aval'</code>).</li>
<li><code class="code">Xmap_data(ename_opt,data)</code>: The function is called for a data
   node surrounded by an element <code class="code">ename_opt</code> (which is <code class="code">None</code> if the
   data node is the outermost node). The string <code class="code">data</code> is the value
   of the data node. The function must return the new value of the
   data node (i.e. <code class="code">data'</code>).</li>
</ul>

 <code class="code">xmap_list</code> is invoked with <code class="code">surrounding_element_opt</code> which is the
 name of the surrounding element, or <code class="code">None</code> if such an element does 
 not exist, or is unknown.<br>
</div>
<pre><span id="VALwrite"><span class="keyword">val</span> write</span> : <code class="type">?dtd:<a href="Nethtml.html#TYPEsimplified_dtd">simplified_dtd</a> -><br>       ?xhtml:bool -> <a href="Netchannels.out_obj_channel-c.html">Netchannels.out_obj_channel</a> -> <a href="Nethtml.html#TYPEdocument">document</a> list -> unit</code></pre><div class="info">
Writes the document to the output channel. No additional encoding or
 decoding happens.
<p>

 Empty elements are written without end tag (see also optional argument
 <code class="code">xhtml</code>); the rest is written unabbreviated.
<p>

 Example: To write the document to a file:
 <pre class="codepre"><code class="code"> let f = open_out "filename" in
 let ch = new Netchannels.output_channel f in
 write ch doc;
 ch # close_out()
 </code></pre>
<p>

<br>
</div>
<div class="param_info"><code class="code">dtd</code> : The assumed simplified DTD, by default <code class="code">html40_dtd</code></div>
<div class="param_info"><code class="code">xhtml</code> : makes the output compatible with XHTML 1.0 Strict by
 closing <code class="code">`Empty</code> tags with "/&gt;" (<code class="code">true</code> by default).</div>
</body></html>