Sophie: haskell-HXT-7.1-2mdv2007.1 i586

haskell-HXT-7.1-2mdv2007.1.i586.rpm

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!--Rendered using the Haskell Html Library v0.2-->
<HTML
><HEAD
><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"
><TITLE
>Text.XML.HXT.Parser.MainFunctions</TITLE
><LINK HREF="haddock.css" REL="stylesheet" TYPE="text/css"
><SCRIPT SRC="haddock.js" TYPE="text/javascript"
></SCRIPT
></HEAD
><BODY
><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0"
><TR
><TD CLASS="topbar"
><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0"
><TR
><TD
><IMG SRC="haskell_icon.gif" WIDTH="16" HEIGHT="16" ALT=" "
></TD
><TD CLASS="title"
>hxt-7.1: </TD
><TD CLASS="topbut"
><A HREF="index.html"
>Contents</A
></TD
><TD CLASS="topbut"
><A HREF="doc-index.html"
>Index</A
></TD
></TR
></TABLE
></TD
></TR
><TR
><TD CLASS="modulebar"
><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0"
><TR
><TD
><FONT SIZE="6"
>Text.XML.HXT.Parser.MainFunctions</FONT
></TD
></TR
></TABLE
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="section1"
>Description</TD
></TR
><TR
><TD CLASS="doc"
><P
>Simple parse functions.
</P
><P
>Version : $Id: MainFunctions.hs,v 1.2 2004<EM
>11</EM
>20 16:53:15 hxml Exp $
</P
><P
>the main building blocks for an application.
 this module exports complex filters and functions for
 common tasks for input and parsing, output and option handling.
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="section1"
>Synopsis</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="body"
><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0"
><TR
><TD CLASS="decl"
><A HREF="#v%3AgetXmlDocument"
>getXmlDocument</A
> :: <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AAttributes"
>Attributes</A
> -&gt; String -&gt; IO (<A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AXmlTree"
>XmlTree</A
>, <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AXmlTrees"
>XmlTrees</A
>, Int)</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AputXmlDocument"
>putXmlDocument</A
> :: <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AAttributes"
>Attributes</A
> -&gt; String -&gt; <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AXmlTree"
>XmlTree</A
> -&gt; IO (<A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AXmlTrees"
>XmlTrees</A
>, Int)</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AparseDocument"
>parseDocument</A
> :: <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AAttributes"
>Attributes</A
> -&gt; <A HREF="Text-XML-HXT-DOM-XmlState.html#t%3AXmlStateFilter"
>XmlStateFilter</A
> state</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AwriteDocument"
>writeDocument</A
> :: <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AAttributes"
>Attributes</A
> -&gt; <A HREF="Text-XML-HXT-DOM-XmlState.html#t%3AXmlStateFilter"
>XmlStateFilter</A
> state</TD
></TR
></TABLE
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="section1"
>Documentation</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AgetXmlDocument"
></A
><B
>getXmlDocument</B
> :: <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AAttributes"
>Attributes</A
> -&gt; String -&gt; IO (<A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AXmlTree"
>XmlTree</A
>, <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AXmlTrees"
>XmlTrees</A
>, Int)</TD
></TR
><TR
><TD CLASS="doc"
><P
>convenient function for reading a XML document without
 dealing with state monads, error messages collection and other details
</P
><P
>getXmlDocument calls <TT
><A HREF="Text-XML-HXT-Parser-MainFunctions.html#v%3AparseDocument"
>parseDocument</A
></TT
> with the list of parsing options
 and an url or filename as document source.
</P
><P
>result is a triple
</P
><UL
><LI
> the resulting document tree with a root node containing all
   meta info about the document (options, status info, http header, ...)
</LI
><LI
> the list of errors and warnings
</LI
><LI
> the error level: one of <TT
><A HREF="Text-XML-HXT-DOM-TypeDefs.html#v%3Ac_ok"
>c_ok</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-TypeDefs.html#v%3Ac_warn"
>c_warn</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-TypeDefs.html#v%3Ac_err"
>c_err</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-TypeDefs.html#v%3Ac_fatal"
>c_fatal</A
></TT
>
</LI
></UL
><P
>example for input (see also example in <TT
><A HREF="Text-XML-HXT-Parser-MainFunctions.html#v%3AputXmlDocument"
>putXmlDocument</A
></TT
> and example in <TT
><A HREF="Text-XML-HXT-Parser-MainFunctions.html#v%3AwriteDocument"
>writeDocument</A
></TT
>)
</P
><PRE
> main :: IO ()
 main
   = do
     (res, errs, rc) &lt;- getXmlDocument [] &quot;test.xml&quot;
     if rc &gt;= c_err
       then issueErrors errs
       else processTree res

 issueErrors :: XmlTrees -&gt; IO ()

 processTree :: XmlTree  -&gt; IO ()
</PRE
><P
>for options see <TT
><A HREF="Text-XML-HXT-Parser-MainFunctions.html#v%3AparseDocument"
>parseDocument</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_collect_errors"
>a_collect_errors</A
></TT
> is set implicitly
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AputXmlDocument"
></A
><B
>putXmlDocument</B
> :: <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AAttributes"
>Attributes</A
> -&gt; String -&gt; <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AXmlTree"
>XmlTree</A
> -&gt; IO (<A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AXmlTrees"
>XmlTrees</A
>, Int)</TD
></TR
><TR
><TD CLASS="doc"
><P
>the inverse operation to <TT
><A HREF="Text-XML-HXT-Parser-MainFunctions.html#v%3AgetXmlDocument"
>getXmlDocument</A
></TT
>
</P
><P
>writes a complete document tree to a file, writing can be
 controlled by options, the real work is done with filter <TT
><A HREF="Text-XML-HXT-Parser-MainFunctions.html#v%3AwriteDocument"
>writeDocument</A
></TT
>.
 useful options are the options of <TT
><A HREF="Text-XML-HXT-Parser-MainFunctions.html#v%3AwriteDocument"
>writeDocument</A
></TT
>.
</P
><P
>result is a pair: 1.part is a list of error messages, 2. part is the return code,
 the status info of the write filter
</P
><P
>this filter is useful, when processing XML in an arbitray context in the IO monad
</P
><P
>an example main program for such an application is:
</P
><PRE
> main :: IO ()
 main
   = do
     (input, readErrs, rc) &lt;- getXmlDocument [...] &quot;test.xml&quot;
     if rc &gt;= c_err
       then issueErrors readErrs
       else processTree input

 processTree :: XmlTree -&gt; IO ()
 processTree t
   = let res = computeNewTree input
     in do
        (writeErrs, rc2) &lt;- putXmlDocument [...] &quot;out.xml&quot; res
        if rc2 &gt;= c_err
          then issueErrors writeErrs
          else return ()

 issueErrors :: XmlTrees -&gt; IO ()

 computeNewTree :: XmlTree -&gt; XmlTree
</PRE
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AparseDocument"
></A
><B
>parseDocument</B
> :: <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AAttributes"
>Attributes</A
> -&gt; <A HREF="Text-XML-HXT-DOM-XmlState.html#t%3AXmlStateFilter"
>XmlStateFilter</A
> state</TD
></TR
><TR
><TD CLASS="doc"
><P
>the main parsing filter
</P
><P
>this filter can be configured by an option list, a list of
 option name, option value pairs.
 the input tree must be a possibly empty document root tree.
 all the options are stored as attributes in this root node to control processing.
</P
><P
>available options:
</P
><UL
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_parse_html"
>a_parse_html</A
></TT
>: use HTML parser, else use XML parser (default)
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_validate"
>a_validate</A
></TT
> : validate document (default), else skip validation
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_check_namespaces"
>a_check_namespaces</A
></TT
> : check namespaces, else skip namespace processing (default)
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_canonicalize"
>a_canonicalize</A
></TT
> : canonicalize document (default), else skip canonicalization
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_preserve_comment"
>a_preserve_comment</A
></TT
> : preserve comments during canonicalization, else remove comments (default)
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_remove_whitespace"
>a_remove_whitespace</A
></TT
> : remove all whitespace, used for document indentation, else skip this step (default)
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_indent"
>a_indent</A
></TT
> : indent document by inserting whitespace, else skip this step (default)
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_issue_warnings"
>a_issue_warnings</A
></TT
> : issue warnings, when parsing HTML (default), else ignore HTML parser warnings
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_issue_errors"
>a_issue_errors</A
></TT
> : issue all error messages on stderr (default), or ignore all error messages
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_collect_errors"
>a_collect_errors</A
></TT
> : all error messages are collected during processing and appended to the result document
 			  for further processing within the calling modules
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_trace"
>a_trace</A
></TT
> : trace level: values: 0 -4
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_proxy"
>a_proxy</A
></TT
> : proxy for http access, e.g. www-cache:3128
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_use_curl"
>a_use_curl</A
></TT
> : for http access via external programm curl, default is native HTTP access
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_options_curl"
>a_options_curl</A
></TT
> : more options for external program curl
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_source"
>a_source</A
></TT
> : the document source url
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_encoding"
>a_encoding</A
></TT
> : default document encoding (<TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Autf8"
>utf8</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3AisoLatin1"
>isoLatin1</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3AusAscii"
>usAscii</A
></TT
>, ...)
</LI
></UL
><P
>examples:
</P
><PRE
> parseDocument [ (a_source,   &quot;test.xml&quot;)
               , (a_validate, &quot;0&quot;)
               , (a_encoding, isoLatin1)
               ] emptyRoot
</PRE
><P
>reads document &quot;test.xml&quot; without validation and default encoding <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3AisoLatin1"
>isoLatin1</A
></TT
>
</P
><PRE
> parseDocument [ (a_source,         &quot;http://www.haskell.org/&quot;)
               , (a_parse_html,     &quot;1&quot;)
               , (a_proxy,          &quot;www-cache:3128&quot;)
               , (a_curl,           &quot;1&quot;)
               , (a_issue_warnings, &quot;0&quot;)
               ] emptyRoot
</PRE
><P
>reads Haskell homepage with HTML parser ignoring any warnings and with http access via external program curl and proxy &quot;www-cache&quot; at port 3128
</P
><PRE
> parseDocument [ (a_source,            &quot;http://www.w3c.org/&quot;)
               , (a_parse_html,        &quot;0&quot;)                       -- default
               , (a_validate,          &quot;1&quot;)                       -- default
               , (a_check_namespace,   &quot;1&quot;)
               , (a_remove_whitespace, &quot;1&quot;)
               , (a_trace,             &quot;2&quot;)
               ] emptyRoot
</PRE
><P
>read w3c home page, validate and chech namespaces, remove whitespace between tags, trace activities with level 2
</P
><PRE
> parseDocument [ (a_source,   &quot;test.xml&quot;)
               , (a_validate,        &quot;1&quot;)
               , (a_check_namespace, &quot;1&quot;)
               , (a_collect_errors,  &quot;1&quot;)
               , (a_issue_errors,    &quot;0&quot;)
               ] emptyRoot
</PRE
><P
>reads file &quot;test.xml&quot;, validates it, checks namespaces, does not issue any erros
 but collects errors and appends the list of errors to the single element list for the document.
 this enables the calling application to define own error handlers.
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AwriteDocument"
></A
><B
>writeDocument</B
> :: <A HREF="Text-XML-HXT-DOM-TypeDefs.html#t%3AAttributes"
>Attributes</A
> -&gt; <A HREF="Text-XML-HXT-DOM-XmlState.html#t%3AXmlStateFilter"
>XmlStateFilter</A
> state</TD
></TR
><TR
><TD CLASS="doc"
><P
>the main filter for writing documents
</P
><P
>this filter can be configured by an option list like <TT
><A HREF="Text-XML-HXT-Parser-MainFunctions.html#v%3AgetXmlDocument"
>getXmlDocument</A
></TT
>
</P
><P
>available options are
</P
><UL
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_indent"
>a_indent</A
></TT
> : indent document for readability, (default: no indentation)
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_remove_whitespace"
>a_remove_whitespace</A
></TT
> : remove all redundant whitespace for shorten text (default: no removal)
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_output_file"
>a_output_file</A
></TT
> : destination file for document, default is &quot;-&quot; for stdout
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_output_encoding"
>a_output_encoding</A
></TT
> : encoding of document, default is <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_encoding"
>a_encoding</A
></TT
> or <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Autf8"
>utf8</A
></TT
>
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_output_xml"
>a_output_xml</A
></TT
> : (default) issue XML: quote special XML chars &gt;,&lt;,&quot;,',&amp;
                    add XML processing instruction
                    and encode document with respect to <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_output_encoding"
>a_output_encoding</A
></TT
>,
                    if explizitly switched of, the plain text is issued, this is useful
                    for non XML output, e.g. generated Haskell code, LaTex, Java, ...
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_show_tree"
>a_show_tree</A
></TT
> : show tree representation of document (for debugging)
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_show_haskell"
>a_show_haskell</A
></TT
> : show Haskell representaion of document (for debugging)
</LI
><LI
> <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_issue_errors"
>a_issue_errors</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-XmlKeywords.html#v%3Aa_collect_errors"
>a_collect_errors</A
></TT
> : see <TT
><A HREF="Text-XML-HXT-Parser-MainFunctions.html#v%3AparseDocument"
>parseDocument</A
></TT
>
</LI
></UL
><P
>a typical main program running in the XmlState monad
  has the following structure:
</P
><PRE
>
 main :: IO ()
 main
     = do
       argv &lt;- getArgs                                              -- get the commandline arguments
       (inp, outp, options) &lt;- cmdlineOpts argv                     -- and evaluate them, return a key-value list
                                                                    -- and input and output
       res  &lt;- run' $ application inp outp options $ emptyRoot      -- run the application
 
       exitWith (if null res
                 then ExitFailure (-1)
                 else exitSuccess
                )

 application :: String -&gt; String -&gt; Attributes -&gt; XmlStateFilter ()
 application inp outp al
   = parseDocument (al ++ [(a_source, inp)])                        -- set options and source
     .&gt;&gt;                                                            -- and parse document
     processDocument                                                -- the hard work
     .&gt;&gt;
     writeDocument [(a_output_file, outp)]                          -- issue results
     .&gt;&gt;
     checkStatus                                                    -- check errors

</PRE
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="botbar"
>Produced by <A HREF="http://www.haskell.org/haddock/"
>Haddock</A
> version 0.8</TD
></TR
></TABLE
></BODY
></HTML
>