<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <!--Rendered using the Haskell Html Library v0.2--> <HTML ><HEAD ><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8" ><TITLE >Text.XML.HXT.DOM.Unicode</TITLE ><LINK HREF="haddock.css" REL="stylesheet" TYPE="text/css" ><SCRIPT SRC="haddock.js" TYPE="text/javascript" ></SCRIPT ></HEAD ><BODY ><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0" ><TR ><TD CLASS="topbar" ><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0" ><TR ><TD ><IMG SRC="haskell_icon.gif" WIDTH="16" HEIGHT="16" ALT=" " ></TD ><TD CLASS="title" >hxt-7.1: </TD ><TD CLASS="topbut" ><A HREF="index.html" >Contents</A ></TD ><TD CLASS="topbut" ><A HREF="doc-index.html" >Index</A ></TD ></TR ></TABLE ></TD ></TR ><TR ><TD CLASS="modulebar" ><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0" ><TR ><TD ><FONT SIZE="6" >Text.XML.HXT.DOM.Unicode</FONT ></TD ></TR ></TABLE ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD ><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0" ><TR ><TD CLASS="section4" ><B >Contents</B ></TD ></TR ><TR ><TD ><DL ><DT ><A HREF="#1" >Unicode Type declarations </A ></DT ><DT ><A HREF="#2" >Unicode and UTF-8 predicates </A ></DT ><DT ><A HREF="#3" >UTF-8 and Unicode conversion functions </A ></DT ></DL ></TD ></TR ></TABLE ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="section1" >Description</TD ></TR ><TR ><TD CLASS="doc" >Unicode (UCS-2) and UTF-8 Conversion Funtions </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="section1" >Synopsis</TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="body" ><TABLE CLASS="vanilla" CELLSPACING="0" CELLPADDING="0" ><TR ><TD CLASS="decl" ><SPAN CLASS="keyword" >type</SPAN > <A HREF="#t%3AUnicode" >Unicode</A > = Char</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><SPAN CLASS="keyword" >type</SPAN > <A HREF="#t%3AUString" >UString</A > = [<A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A >]</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><SPAN CLASS="keyword" >type</SPAN > <A HREF="#t%3AUTF8Char" >UTF8Char</A > = Char</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><SPAN CLASS="keyword" >type</SPAN > <A HREF="#t%3AUTF8String" >UTF8String</A > = String</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisLeadingMultiByteChar" >isLeadingMultiByteChar</A > :: Char -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisFollowingMultiByteChar" >isFollowingMultiByteChar</A > :: Char -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisMultiByteChar" >isMultiByteChar</A > :: Char -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisNByteChar" >isNByteChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> (Int, Int, Int)</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Ais1ByteXmlChar" >is1ByteXmlChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisMultiByteXmlChar" >isMultiByteXmlChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlChar" >isXmlChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlLatin1Char" >isXmlLatin1Char</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlSpaceChar" >isXmlSpaceChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXml11SpaceChar" >isXml11SpaceChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlNameChar" >isXmlNameChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlNameStartChar" >isXmlNameStartChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlNCNameChar" >isXmlNCNameChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlNCNameStartChar" >isXmlNCNameStartChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlPubidChar" >isXmlPubidChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlLetter" >isXmlLetter</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlBaseChar" >isXmlBaseChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlIdeographicChar" >isXmlIdeographicChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlCombiningChar" >isXmlCombiningChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlDigit" >isXmlDigit</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlExtender" >isXmlExtender</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AisXmlControlOrPermanentlyUndefined" >isXmlControlOrPermanentlyUndefined</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Autf8ToUnicodeChar" >utf8ToUnicodeChar</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Autf8ToUnicode" >utf8ToUnicode</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Autf8WithByteMarkToUnicode" >utf8WithByteMarkToUnicode</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Alatin1ToUnicode" >latin1ToUnicode</A > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Aucs2ToUnicode" >ucs2ToUnicode</A > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Aucs2BigEndianToUnicode" >ucs2BigEndianToUnicode</A > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Aucs2LittleEndianToUnicode" >ucs2LittleEndianToUnicode</A > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Autf16beToUnicode" >utf16beToUnicode</A > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3Autf16leToUnicode" >utf16leToUnicode</A > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AunicodeCharToUtf8" >unicodeCharToUtf8</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AunicodeToUtf8" >unicodeToUtf8</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A ></TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AunicodeToXmlEntity" >unicodeToXmlEntity</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AunicodeToLatin1" >unicodeToLatin1</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AunicodeRemoveNoneAscii" >unicodeRemoveNoneAscii</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AunicodeRemoveNoneLatin1" >unicodeRemoveNoneLatin1</A > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AintToCharRef" >intToCharRef</A > :: Int -> String</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AintToCharRefHex" >intToCharRefHex</A > :: Int -> String</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AgetEncodingFct" >getEncodingFct</A > :: String -> Maybe (<A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String)</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AgetOutputEncodingFct" >getOutputEncodingFct</A > :: String -> Maybe (String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A >)</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AnormalizeNL" >normalizeNL</A > :: String -> String</TD ></TR ><TR ><TD CLASS="s8" ></TD ></TR ><TR ><TD CLASS="decl" ><A HREF="#v%3AguessEncoding" >guessEncoding</A > :: String -> String</TD ></TR ></TABLE ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="section1" ><A NAME="1" >Unicode Type declarations </A ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><SPAN CLASS="keyword" >type</SPAN > <A NAME="t%3AUnicode" ></A ><B >Unicode</B > = Char</TD ></TR ><TR ><TD CLASS="doc" >Unicode is represented as the Char type Precondition for this is the support of Unicode character range in the compiler (e.g. ghc but not hugs) </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><SPAN CLASS="keyword" >type</SPAN > <A NAME="t%3AUString" ></A ><B >UString</B > = [<A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A >]</TD ></TR ><TR ><TD CLASS="doc" >the type for Unicode strings </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><SPAN CLASS="keyword" >type</SPAN > <A NAME="t%3AUTF8Char" ></A ><B >UTF8Char</B > = Char</TD ></TR ><TR ><TD CLASS="doc" >UTF-8 charachters are represented by the Char type </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><SPAN CLASS="keyword" >type</SPAN > <A NAME="t%3AUTF8String" ></A ><B >UTF8String</B > = String</TD ></TR ><TR ><TD CLASS="doc" >UTF-8 strings are implemented as Haskell strings </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="section1" ><A NAME="2" >Unicode and UTF-8 predicates </A ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisLeadingMultiByteChar" ></A ><B >isLeadingMultiByteChar</B > :: Char -> Bool</TD ></TR ><TR ><TD CLASS="doc" >test for leading multibyte UTF-8 character </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisFollowingMultiByteChar" ></A ><B >isFollowingMultiByteChar</B > :: Char -> Bool</TD ></TR ><TR ><TD CLASS="doc" >test for following multibyte UTF-8 character </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisMultiByteChar" ></A ><B >isMultiByteChar</B > :: Char -> Bool</TD ></TR ><TR ><TD CLASS="doc" >test for following multibyte UTF-8 character </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisNByteChar" ></A ><B >isNByteChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> (Int, Int, Int)</TD ></TR ><TR ><TD CLASS="doc" >compute the number of following bytes and the mask bits of a leading UTF-8 multibyte char </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Ais1ByteXmlChar" ></A ><B >is1ByteXmlChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >test for a legal 1 byte XML char </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisMultiByteXmlChar" ></A ><B >isMultiByteXmlChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >test for a legal multi byte XML char </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlChar" ></A ><B >isXmlChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for valid XML characters </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlLatin1Char" ></A ><B >isXmlLatin1Char</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >test for a legal latin1 XML char </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlSpaceChar" ></A ><B >isXmlSpaceChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for XML space character: \n, \r, \t and " " </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXml11SpaceChar" ></A ><B >isXml11SpaceChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" ><P >checking for XML1.1 space character: additional space 0x85 and 0x2028 </P ><P >see also : <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlSpaceChar" >isXmlSpaceChar</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlNameChar" ></A ><B >isXmlNameChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for XML name character </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlNameStartChar" ></A ><B >isXmlNameStartChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" ><P >checking for XML name start character </P ><P >see also : <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlNameChar" >isXmlNameChar</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlNCNameChar" ></A ><B >isXmlNCNameChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" ><P >checking for XML NCName character: no ":" allowed </P ><P >see also : <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlNameChar" >isXmlNameChar</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlNCNameStartChar" ></A ><B >isXmlNCNameStartChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" ><P >checking for XML NCName start character: no ":" allowed </P ><P >see also : <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlNameChar" >isXmlNameChar</A ></TT >, <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AisXmlNCNameChar" >isXmlNCNameChar</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlPubidChar" ></A ><B >isXmlPubidChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for XML public id character </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlLetter" ></A ><B >isXmlLetter</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for XML letter </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlBaseChar" ></A ><B >isXmlBaseChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for XML base charater </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlIdeographicChar" ></A ><B >isXmlIdeographicChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for XML ideographic charater </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlCombiningChar" ></A ><B >isXmlCombiningChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for XML combining charater </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlDigit" ></A ><B >isXmlDigit</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for XML digit </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlExtender" ></A ><B >isXmlExtender</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" >checking for XML extender </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AisXmlControlOrPermanentlyUndefined" ></A ><B >isXmlControlOrPermanentlyUndefined</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> Bool</TD ></TR ><TR ><TD CLASS="doc" ><P >checking for XML control or permanently discouraged char </P ><P >see Errata to XML1.0 (http://www.w3.org/XML/xml-V10-2e-errata) No 46 </P ><P >Document authors are encouraged to avoid <A HREF="compatibility characters.html" >compatibility characters</A >, as defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of [Unicode3]). The characters defined in the following ranges are also discouraged. They are either control characters or permanently undefined Unicode characters: </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="section1" ><A NAME="3" >UTF-8 and Unicode conversion functions </A ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Autf8ToUnicodeChar" ></A ><B >utf8ToUnicodeChar</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A ></TD ></TR ><TR ><TD CLASS="doc" >conversion of a UTF-8 encoded single Unicode character into the corresponding Unicode value. precondition: the character is a valid UTF-8 encoded character </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Autf8ToUnicode" ></A ><B >utf8ToUnicode</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="doc" >conversion of a UTF-8 encoded string into a sequence of unicode values. precondition: the string is a valid UTF-8 encoded string </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Autf8WithByteMarkToUnicode" ></A ><B >utf8WithByteMarkToUnicode</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="doc" >UTF-8 to Unicode conversion with deletion of leading byte order mark, as described in XML standard F.1 </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Alatin1ToUnicode" ></A ><B >latin1ToUnicode</B > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="doc" >code conversion from latin1 to Unicode </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Aucs2ToUnicode" ></A ><B >ucs2ToUnicode</B > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="doc" >UCS-2 to UTF-8 conversion with byte order mark analysis </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Aucs2BigEndianToUnicode" ></A ><B >ucs2BigEndianToUnicode</B > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="doc" >UCS-2 big endian to Unicode conversion </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Aucs2LittleEndianToUnicode" ></A ><B >ucs2LittleEndianToUnicode</B > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="doc" >UCS-2 little endian to Unicode conversion </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Autf16beToUnicode" ></A ><B >utf16beToUnicode</B > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="doc" >UTF-16 big endian to UTF-8 conversion with removal of byte order mark </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3Autf16leToUnicode" ></A ><B >utf16leToUnicode</B > :: String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A ></TD ></TR ><TR ><TD CLASS="doc" >UTF-16 little endian to UTF-8 conversion with removal of byte order mark </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AunicodeCharToUtf8" ></A ><B >unicodeCharToUtf8</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUnicode" >Unicode</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A ></TD ></TR ><TR ><TD CLASS="doc" >conversion from Unicode (Char) to a UTF8 encoded string. </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AunicodeToUtf8" ></A ><B >unicodeToUtf8</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUTF8String" >UTF8String</A ></TD ></TR ><TR ><TD CLASS="doc" >conversion from Unicode strings (UString) to UTF8 encoded strings. </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AunicodeToXmlEntity" ></A ><B >unicodeToXmlEntity</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String</TD ></TR ><TR ><TD CLASS="doc" ><P >substitute all Unicode characters, that are not legal 1-byte UTF-8 XML characters by a character reference. </P ><P >This function can be used to translate all text nodes and attribute values into pure ascii. </P ><P >see also : <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeToLatin1" >unicodeToLatin1</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AunicodeToLatin1" ></A ><B >unicodeToLatin1</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String</TD ></TR ><TR ><TD CLASS="doc" ><P >substitute all Unicode characters, that are not legal latin1 UTF-8 XML characters by a character reference. </P ><P >This function can be used to translate all text nodes and attribute values into ISO latin1. </P ><P >see also : <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeToXmlEntity" >unicodeToXmlEntity</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AunicodeRemoveNoneAscii" ></A ><B >unicodeRemoveNoneAscii</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String</TD ></TR ><TR ><TD CLASS="doc" ><P >removes all non ascii chars, may be used to transform a document into a pure ascii representation by removing all non ascii chars from tag and attibute names </P ><P >see also : <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeRemoveNoneLatin1" >unicodeRemoveNoneLatin1</A ></TT >, <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeToXmlEntity" >unicodeToXmlEntity</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AunicodeRemoveNoneLatin1" ></A ><B >unicodeRemoveNoneLatin1</B > :: <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String</TD ></TR ><TR ><TD CLASS="doc" ><P >removes all non latin1 chars, may be used to transform a document into a pure ascii representation by removing all non ascii chars from tag and attibute names </P ><P >see also : <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeRemoveNoneAscii" >unicodeRemoveNoneAscii</A ></TT >, <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AunicodeToLatin1" >unicodeToLatin1</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AintToCharRef" ></A ><B >intToCharRef</B > :: Int -> String</TD ></TR ><TR ><TD CLASS="doc" ><P >convert an Unicode into a XML character reference. </P ><P >see also : <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AintToCharRefHex" >intToCharRefHex</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AintToCharRefHex" ></A ><B >intToCharRefHex</B > :: Int -> String</TD ></TR ><TR ><TD CLASS="doc" ><P >convert an Unicode into a XML hexadecimal character reference. </P ><P >see also: <TT ><A HREF="Text-XML-HXT-DOM-Unicode.html#v%3AintToCharRef" >intToCharRef</A ></TT > </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AgetEncodingFct" ></A ><B >getEncodingFct</B > :: String -> Maybe (<A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A > -> String)</TD ></TR ><TR ><TD CLASS="doc" >the lookup function for selecting the encoding function </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AgetOutputEncodingFct" ></A ><B >getOutputEncodingFct</B > :: String -> Maybe (String -> <A HREF="Text-XML-HXT-DOM-Unicode.html#t%3AUString" >UString</A >)</TD ></TR ><TR ><TD CLASS="doc" >the lookup function for selecting the encoding function </TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AnormalizeNL" ></A ><B >normalizeNL</B > :: String -> String</TD ></TR ><TR ><TD CLASS="doc" ><P >White Space (XML Standard 2.3) and end of line handling (2.11) </P ><P >#x0D and #x0D#x0A are mapped to #x0A </P ></TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="decl" ><A NAME="v%3AguessEncoding" ></A ><B >guessEncoding</B > :: String -> String</TD ></TR ><TR ><TD CLASS="s15" ></TD ></TR ><TR ><TD CLASS="botbar" >Produced by <A HREF="http://www.haskell.org/haddock/" >Haddock</A > version 0.8</TD ></TR ></TABLE ></BODY ></HTML >