Sophie

Sophie

distrib > Mandriva > 2007.1 > i586 > by-pkgid > 09cecd41fd5510f1b4c6358078b3faaf > files > 184

haskell-HXT-7.1-2mdv2007.1.i586.rpm

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!--Rendered using the Haskell Html Library v0.2-->
<HTML
><HEAD
><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"
><TITLE
>Text.XML.HXT.DOM.Unicode</TITLE
><LINK HREF="haddock.css" REL="stylesheet" TYPE="text/css"
><SCRIPT SRC="haddock.js" TYPE="text/javascript"
></SCRIPT
></HEAD
><BODY
><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0"
><TR
><TD CLASS="topbar"
><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0"
><TR
><TD
><IMG SRC="haskell_icon.gif" WIDTH="16" HEIGHT="16" ALT=" "
></TD
><TD CLASS="title"
>hxt-7.1: </TD
><TD CLASS="topbut"
><A HREF="index.html"
>Contents</A
></TD
><TD CLASS="topbut"
><A HREF="doc-index.html"
>Index</A
></TD
></TR
></TABLE
></TD
></TR
><TR
><TD CLASS="modulebar"
><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0"
><TR
><TD
><FONT SIZE="6"
>Text.XML.HXT.DOM.Unicode</FONT
></TD
></TR
></TABLE
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD
><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0"
><TR
><TD CLASS="section4"
><B
>Contents</B
></TD
></TR
><TR
><TD
><DL
><DT
><A HREF="#1"
>Unicode Type declarations
</A
></DT
><DT
><A HREF="#2"
>Unicode and UTF-8 predicates
</A
></DT
><DT
><A HREF="#3"
>UTF-8 and Unicode conversion functions
</A
></DT
></DL
></TD
></TR
></TABLE
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="section1"
>Description</TD
></TR
><TR
><TD CLASS="doc"
>Unicode (UCS-2) and UTF-8 Conversion Funtions
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="section1"
>Synopsis</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="body"
><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0"
><TR
><TD CLASS="decl"
><SPAN CLASS="keyword"
>type</SPAN
> <A HREF="#t%3AUnicode"
>Unicode</A
> = Char</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><SPAN CLASS="keyword"
>type</SPAN
> <A HREF="#t%3AUString"
>UString</A
> = [<A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
>]</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><SPAN CLASS="keyword"
>type</SPAN
> <A HREF="#t%3AUTF8Char"
>UTF8Char</A
> = Char</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><SPAN CLASS="keyword"
>type</SPAN
> <A HREF="#t%3AUTF8String"
>UTF8String</A
> = String</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisLeadingMultiByteChar"
>isLeadingMultiByteChar</A
> :: Char -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisFollowingMultiByteChar"
>isFollowingMultiByteChar</A
> :: Char -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisMultiByteChar"
>isMultiByteChar</A
> :: Char -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisNByteChar"
>isNByteChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; (Int, Int, Int)</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Ais1ByteXmlChar"
>is1ByteXmlChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisMultiByteXmlChar"
>isMultiByteXmlChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlChar"
>isXmlChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlLatin1Char"
>isXmlLatin1Char</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlSpaceChar"
>isXmlSpaceChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXml11SpaceChar"
>isXml11SpaceChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlNameChar"
>isXmlNameChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlNameStartChar"
>isXmlNameStartChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlNCNameChar"
>isXmlNCNameChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlNCNameStartChar"
>isXmlNCNameStartChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlPubidChar"
>isXmlPubidChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlLetter"
>isXmlLetter</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlBaseChar"
>isXmlBaseChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlIdeographicChar"
>isXmlIdeographicChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlCombiningChar"
>isXmlCombiningChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlDigit"
>isXmlDigit</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlExtender"
>isXmlExtender</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AisXmlControlOrPermanentlyUndefined"
>isXmlControlOrPermanentlyUndefined</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Autf8ToUnicodeChar"
>utf8ToUnicodeChar</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Autf8ToUnicode"
>utf8ToUnicode</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Autf8WithByteMarkToUnicode"
>utf8WithByteMarkToUnicode</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Alatin1ToUnicode"
>latin1ToUnicode</A
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Aucs2ToUnicode"
>ucs2ToUnicode</A
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Aucs2BigEndianToUnicode"
>ucs2BigEndianToUnicode</A
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Aucs2LittleEndianToUnicode"
>ucs2LittleEndianToUnicode</A
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Autf16beToUnicode"
>utf16beToUnicode</A
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3Autf16leToUnicode"
>utf16leToUnicode</A
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AunicodeCharToUtf8"
>unicodeCharToUtf8</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AunicodeToUtf8"
>unicodeToUtf8</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
></TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AunicodeToXmlEntity"
>unicodeToXmlEntity</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AunicodeToLatin1"
>unicodeToLatin1</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AunicodeRemoveNoneAscii"
>unicodeRemoveNoneAscii</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AunicodeRemoveNoneLatin1"
>unicodeRemoveNoneLatin1</A
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AintToCharRef"
>intToCharRef</A
> :: Int -&gt; String</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AintToCharRefHex"
>intToCharRefHex</A
> :: Int -&gt; String</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AgetEncodingFct"
>getEncodingFct</A
> :: String -&gt; Maybe (<A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String)</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AgetOutputEncodingFct"
>getOutputEncodingFct</A
> :: String -&gt; Maybe (String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
>)</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AnormalizeNL"
>normalizeNL</A
> :: String -&gt; String</TD
></TR
><TR
><TD CLASS="s8"
></TD
></TR
><TR
><TD CLASS="decl"
><A HREF="#v%3AguessEncoding"
>guessEncoding</A
> :: String -&gt; String</TD
></TR
></TABLE
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="section1"
><A NAME="1"
>Unicode Type declarations
</A
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><SPAN CLASS="keyword"
>type</SPAN
> <A NAME="t%3AUnicode"
></A
><B
>Unicode</B
> = Char</TD
></TR
><TR
><TD CLASS="doc"
>Unicode is represented as the Char type
   Precondition for this is the support of Unicode character range
   in the compiler (e.g. ghc but not hugs)
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><SPAN CLASS="keyword"
>type</SPAN
> <A NAME="t%3AUString"
></A
><B
>UString</B
> = [<A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
>]</TD
></TR
><TR
><TD CLASS="doc"
>the type for Unicode strings
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><SPAN CLASS="keyword"
>type</SPAN
> <A NAME="t%3AUTF8Char"
></A
><B
>UTF8Char</B
> = Char</TD
></TR
><TR
><TD CLASS="doc"
>UTF-8 charachters are represented by the Char type
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><SPAN CLASS="keyword"
>type</SPAN
> <A NAME="t%3AUTF8String"
></A
><B
>UTF8String</B
> = String</TD
></TR
><TR
><TD CLASS="doc"
>UTF-8 strings are implemented as Haskell strings
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="section1"
><A NAME="2"
>Unicode and UTF-8 predicates
</A
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisLeadingMultiByteChar"
></A
><B
>isLeadingMultiByteChar</B
> :: Char -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>test for leading multibyte UTF-8 character
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisFollowingMultiByteChar"
></A
><B
>isFollowingMultiByteChar</B
> :: Char -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>test for following multibyte UTF-8 character
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisMultiByteChar"
></A
><B
>isMultiByteChar</B
> :: Char -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>test for following multibyte UTF-8 character
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisNByteChar"
></A
><B
>isNByteChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; (Int, Int, Int)</TD
></TR
><TR
><TD CLASS="doc"
>compute the number of following bytes and the mask bits of a leading UTF-8 multibyte char
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Ais1ByteXmlChar"
></A
><B
>is1ByteXmlChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>test for a legal 1 byte XML char
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisMultiByteXmlChar"
></A
><B
>isMultiByteXmlChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>test for a legal multi byte XML char
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlChar"
></A
><B
>isXmlChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for valid XML characters
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlLatin1Char"
></A
><B
>isXmlLatin1Char</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>test for a legal latin1 XML char
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlSpaceChar"
></A
><B
>isXmlSpaceChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for XML space character: \n, \r, \t and &quot; &quot;
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXml11SpaceChar"
></A
><B
>isXml11SpaceChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
><P
>checking for XML1.1 space character: additional space 0x85 and 0x2028
</P
><P
>see also : <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlSpaceChar"
>isXmlSpaceChar</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlNameChar"
></A
><B
>isXmlNameChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for XML name character
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlNameStartChar"
></A
><B
>isXmlNameStartChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
><P
>checking for XML name start character
</P
><P
>see also : <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlNameChar"
>isXmlNameChar</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlNCNameChar"
></A
><B
>isXmlNCNameChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
><P
>checking for XML NCName character: no &quot;:&quot; allowed
</P
><P
>see also : <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlNameChar"
>isXmlNameChar</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlNCNameStartChar"
></A
><B
>isXmlNCNameStartChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
><P
>checking for XML NCName start character: no &quot;:&quot; allowed
</P
><P
>see also : <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlNameChar"
>isXmlNameChar</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlNCNameChar"
>isXmlNCNameChar</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlPubidChar"
></A
><B
>isXmlPubidChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for XML public id character
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlLetter"
></A
><B
>isXmlLetter</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for XML letter
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlBaseChar"
></A
><B
>isXmlBaseChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for XML base charater
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlIdeographicChar"
></A
><B
>isXmlIdeographicChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for XML ideographic charater
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlCombiningChar"
></A
><B
>isXmlCombiningChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for XML combining charater
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlDigit"
></A
><B
>isXmlDigit</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for XML digit
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlExtender"
></A
><B
>isXmlExtender</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
>checking for XML extender
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AisXmlControlOrPermanentlyUndefined"
></A
><B
>isXmlControlOrPermanentlyUndefined</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; Bool</TD
></TR
><TR
><TD CLASS="doc"
><P
>checking for XML control or permanently discouraged char
</P
><P
>see Errata to XML1.0 (http://www.w3.org/XML/xml-V10-2e-errata) No 46
</P
><P
>Document authors are encouraged to avoid <A HREF="compatibility characters.html"
>compatibility characters</A
>,
 as defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of [Unicode3]).
 The characters defined in the following ranges are also discouraged.
 They are either control characters or permanently undefined Unicode characters:
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="section1"
><A NAME="3"
>UTF-8 and Unicode conversion functions
</A
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Autf8ToUnicodeChar"
></A
><B
>utf8ToUnicodeChar</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
></TD
></TR
><TR
><TD CLASS="doc"
>conversion of a UTF-8 encoded single Unicode character into the corresponding Unicode value.
 precondition: the character is a valid UTF-8 encoded character
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Autf8ToUnicode"
></A
><B
>utf8ToUnicode</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="doc"
>conversion of a UTF-8 encoded string into a sequence of unicode values.
 precondition: the string is a valid UTF-8 encoded string
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Autf8WithByteMarkToUnicode"
></A
><B
>utf8WithByteMarkToUnicode</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="doc"
>UTF-8 to Unicode conversion with deletion of leading byte order mark, as described in XML standard F.1
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Alatin1ToUnicode"
></A
><B
>latin1ToUnicode</B
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="doc"
>code conversion from latin1 to Unicode
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Aucs2ToUnicode"
></A
><B
>ucs2ToUnicode</B
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="doc"
>UCS-2 to UTF-8 conversion with byte order mark analysis
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Aucs2BigEndianToUnicode"
></A
><B
>ucs2BigEndianToUnicode</B
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="doc"
>UCS-2 big endian to Unicode conversion
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Aucs2LittleEndianToUnicode"
></A
><B
>ucs2LittleEndianToUnicode</B
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="doc"
>UCS-2 little endian to Unicode conversion
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Autf16beToUnicode"
></A
><B
>utf16beToUnicode</B
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="doc"
>UTF-16 big endian to UTF-8 conversion with removal of byte order mark
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3Autf16leToUnicode"
></A
><B
>utf16leToUnicode</B
> :: String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
></TD
></TR
><TR
><TD CLASS="doc"
>UTF-16 little endian to UTF-8 conversion with removal of byte order mark
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AunicodeCharToUtf8"
></A
><B
>unicodeCharToUtf8</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode"
>Unicode</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
></TD
></TR
><TR
><TD CLASS="doc"
>conversion from Unicode (Char) to a UTF8 encoded string.
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AunicodeToUtf8"
></A
><B
>unicodeToUtf8</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String"
>UTF8String</A
></TD
></TR
><TR
><TD CLASS="doc"
>conversion from Unicode strings (UString) to UTF8 encoded strings.
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AunicodeToXmlEntity"
></A
><B
>unicodeToXmlEntity</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String</TD
></TR
><TR
><TD CLASS="doc"
><P
>substitute all Unicode characters, that are not legal 1-byte
 UTF-8 XML characters by a character reference.
</P
><P
>This function can be used to translate all text nodes and
 attribute values into pure ascii.
</P
><P
>see also : <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeToLatin1"
>unicodeToLatin1</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AunicodeToLatin1"
></A
><B
>unicodeToLatin1</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String</TD
></TR
><TR
><TD CLASS="doc"
><P
>substitute all Unicode characters, that are not legal latin1
 UTF-8 XML characters by a character reference.
</P
><P
>This function can be used to translate all text nodes and
 attribute values into ISO latin1.
</P
><P
>see also : <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeToXmlEntity"
>unicodeToXmlEntity</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AunicodeRemoveNoneAscii"
></A
><B
>unicodeRemoveNoneAscii</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String</TD
></TR
><TR
><TD CLASS="doc"
><P
>removes all non ascii chars, may be used to transform
 a document into a pure ascii representation by removing
 all non ascii chars from tag and attibute names
</P
><P
>see also : <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeRemoveNoneLatin1"
>unicodeRemoveNoneLatin1</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeToXmlEntity"
>unicodeToXmlEntity</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AunicodeRemoveNoneLatin1"
></A
><B
>unicodeRemoveNoneLatin1</B
> :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String</TD
></TR
><TR
><TD CLASS="doc"
><P
>removes all non latin1 chars, may be used to transform
 a document into a pure ascii representation by removing
 all non ascii chars from tag and attibute names
</P
><P
>see also : <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeRemoveNoneAscii"
>unicodeRemoveNoneAscii</A
></TT
>, <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeToLatin1"
>unicodeToLatin1</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AintToCharRef"
></A
><B
>intToCharRef</B
> :: Int -&gt; String</TD
></TR
><TR
><TD CLASS="doc"
><P
>convert an Unicode into a XML character reference.
</P
><P
>see also : <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AintToCharRefHex"
>intToCharRefHex</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AintToCharRefHex"
></A
><B
>intToCharRefHex</B
> :: Int -&gt; String</TD
></TR
><TR
><TD CLASS="doc"
><P
>convert an Unicode into a XML hexadecimal character reference.
</P
><P
>see also: <TT
><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AintToCharRef"
>intToCharRef</A
></TT
>
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AgetEncodingFct"
></A
><B
>getEncodingFct</B
> :: String -&gt; Maybe (<A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
> -&gt; String)</TD
></TR
><TR
><TD CLASS="doc"
>the lookup function for selecting the encoding function
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AgetOutputEncodingFct"
></A
><B
>getOutputEncodingFct</B
> :: String -&gt; Maybe (String -&gt; <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString"
>UString</A
>)</TD
></TR
><TR
><TD CLASS="doc"
>the lookup function for selecting the encoding function
</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AnormalizeNL"
></A
><B
>normalizeNL</B
> :: String -&gt; String</TD
></TR
><TR
><TD CLASS="doc"
><P
>White Space (XML Standard 2.3) and 
 end of line handling (2.11)
</P
><P
>#x0D and #x0D#x0A are mapped to #x0A
</P
></TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="decl"
><A NAME="v%3AguessEncoding"
></A
><B
>guessEncoding</B
> :: String -&gt; String</TD
></TR
><TR
><TD CLASS="s15"
></TD
></TR
><TR
><TD CLASS="botbar"
>Produced by <A HREF="http://www.haskell.org/haddock/"
>Haddock</A
> version 0.8</TD
></TR
></TABLE
></BODY
></HTML
>