Sophie

Sophie

distrib > Fedora > 14 > x86_64 > media > updates > by-pkgid > c97e5283ec2813fd0eb55be8bea536e2 > files > 38

ghc-pcre-light-devel-0.4-4.fc14.x86_64.rpm

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<!-- Generated by HsColour, http://www.cs.york.ac.uk/fp/darcs/hscolour/ -->
<title>Text/Regex/PCRE/Light/Char8.hs</title>
<link type='text/css' rel='stylesheet' href='hscolour.css' />
</head>
<body>
<pre><a name="line-1"></a><span class='hs-comment'>--------------------------------------------------------------------</span>
<a name="line-2"></a><span class='hs-comment'>-- |</span>
<a name="line-3"></a><span class='hs-comment'>-- Module   : Text.Regex.PCRE.Light.Char8</span>
<a name="line-4"></a><span class='hs-comment'>-- Copyright: Copyright (c) 2007-2008, Don Stewart</span>
<a name="line-5"></a><span class='hs-comment'>-- License  : BSD3</span>
<a name="line-6"></a><span class='hs-comment'>--</span>
<a name="line-7"></a><span class='hs-comment'>-- Maintainer:  Don Stewart &lt;dons@galois.com&gt;</span>
<a name="line-8"></a><span class='hs-comment'>-- Stability :  experimental</span>
<a name="line-9"></a><span class='hs-comment'>-- Portability: H98 + FFI</span>
<a name="line-10"></a><span class='hs-comment'>--</span>
<a name="line-11"></a><span class='hs-comment'>--------------------------------------------------------------------</span>
<a name="line-12"></a><span class='hs-comment'>-- </span>
<a name="line-13"></a><span class='hs-comment'>-- A simple, portable binding to perl-compatible regular expressions</span>
<a name="line-14"></a><span class='hs-comment'>-- (PCRE) via 8-bit latin1 Strings.</span>
<a name="line-15"></a><span class='hs-comment'>--</span>
<a name="line-16"></a>
<a name="line-17"></a><span class='hs-keyword'>module</span> <span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Regex</span><span class='hs-varop'>.</span><span class='hs-conid'>PCRE</span><span class='hs-varop'>.</span><span class='hs-conid'>Light</span><span class='hs-varop'>.</span><span class='hs-conid'>Char8</span> <span class='hs-layout'>(</span>
<a name="line-18"></a>
<a name="line-19"></a>        <span class='hs-comment'>-- * The abstract PCRE Regex type</span>
<a name="line-20"></a>          <span class='hs-conid'>Regex</span>
<a name="line-21"></a>
<a name="line-22"></a>        <span class='hs-comment'>-- * String interface</span>
<a name="line-23"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>compile</span><span class='hs-layout'>,</span> <span class='hs-varid'>compileM</span>
<a name="line-24"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>match</span>
<a name="line-25"></a>
<a name="line-26"></a>        <span class='hs-comment'>-- * Regex types and constructors externally visible</span>
<a name="line-27"></a>
<a name="line-28"></a>        <span class='hs-comment'>-- ** PCRE compile-time bit flags</span>
<a name="line-29"></a>        <span class='hs-layout'>,</span> <span class='hs-conid'>PCREOption</span>
<a name="line-30"></a>
<a name="line-31"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>anchored</span>
<a name="line-32"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>auto_callout</span>
<a name="line-33"></a>        <span class='hs-comment'>{-, bsr_anycrlf-}</span>
<a name="line-34"></a>        <span class='hs-comment'>{-, bsr_unicode-}</span>
<a name="line-35"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>caseless</span>
<a name="line-36"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>dollar_endonly</span>
<a name="line-37"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>dotall</span>
<a name="line-38"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>dupnames</span>
<a name="line-39"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>extended</span>
<a name="line-40"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>extra</span>
<a name="line-41"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>firstline</span>
<a name="line-42"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>multiline</span>
<a name="line-43"></a>        <span class='hs-comment'>{-, newline_any-}</span>
<a name="line-44"></a>        <span class='hs-comment'>{-, newline_anycrlf-}</span>
<a name="line-45"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>newline_cr</span>
<a name="line-46"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>newline_crlf</span>
<a name="line-47"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>newline_lf</span>
<a name="line-48"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>no_auto_capture</span>
<a name="line-49"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>ungreedy</span>
<a name="line-50"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>utf8</span>
<a name="line-51"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>no_utf8_check</span>
<a name="line-52"></a>
<a name="line-53"></a>        <span class='hs-comment'>-- ** PCRE exec-time bit flags</span>
<a name="line-54"></a>        <span class='hs-layout'>,</span> <span class='hs-conid'>PCREExecOption</span>
<a name="line-55"></a>
<a name="line-56"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>exec_anchored</span>
<a name="line-57"></a>        <span class='hs-comment'>{-, exec_newline_any     -}</span>
<a name="line-58"></a>        <span class='hs-comment'>{-, exec_newline_anycrlf -}</span>
<a name="line-59"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>exec_newline_cr</span>
<a name="line-60"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>exec_newline_crlf</span>
<a name="line-61"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>exec_newline_lf</span>
<a name="line-62"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>exec_notbol</span>
<a name="line-63"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>exec_noteol</span>
<a name="line-64"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>exec_notempty</span>
<a name="line-65"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>exec_no_utf8_check</span>
<a name="line-66"></a>        <span class='hs-layout'>,</span> <span class='hs-varid'>exec_partial</span>
<a name="line-67"></a>
<a name="line-68"></a>    <span class='hs-layout'>)</span> <span class='hs-keyword'>where</span>
<a name="line-69"></a>
<a name="line-70"></a><span class='hs-keyword'>import</span> <span class='hs-keyword'>qualified</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span><span class='hs-varop'>.</span><span class='hs-conid'>Char8</span> <span class='hs-keyword'>as</span> <span class='hs-conid'>S</span>
<a name="line-71"></a><span class='hs-keyword'>import</span> <span class='hs-keyword'>qualified</span> <span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Regex</span><span class='hs-varop'>.</span><span class='hs-conid'>PCRE</span><span class='hs-varop'>.</span><span class='hs-conid'>Light</span> <span class='hs-keyword'>as</span> <span class='hs-conid'>S</span>
<a name="line-72"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Regex</span><span class='hs-varop'>.</span><span class='hs-conid'>PCRE</span><span class='hs-varop'>.</span><span class='hs-conid'>Light</span> <span class='hs-varid'>hiding</span> <span class='hs-layout'>(</span><span class='hs-varid'>match</span><span class='hs-layout'>,</span> <span class='hs-varid'>compile</span><span class='hs-layout'>,</span> <span class='hs-varid'>compileM</span><span class='hs-layout'>)</span>
<a name="line-73"></a>
<a name="line-74"></a><a name="compile"></a><span class='hs-comment'>-- | 'compile'</span>
<a name="line-75"></a><span class='hs-comment'>--</span>
<a name="line-76"></a><span class='hs-comment'>-- Compile a perl-compatible regular expression, in a strict bytestring.</span>
<a name="line-77"></a><span class='hs-comment'>-- The arguments are:</span>
<a name="line-78"></a><span class='hs-comment'>--</span>
<a name="line-79"></a><span class='hs-comment'>-- * 'pat': A ByteString, which may or may not be zero-terminated,</span>
<a name="line-80"></a><span class='hs-comment'>-- containing the regular expression to be compiled. </span>
<a name="line-81"></a><span class='hs-comment'>--</span>
<a name="line-82"></a><span class='hs-comment'>-- * 'flags', optional bit flags. If 'Nothing' is provided, defaults are used.</span>
<a name="line-83"></a><span class='hs-comment'>--</span>
<a name="line-84"></a><span class='hs-comment'>-- Valid compile-time flags are:</span>
<a name="line-85"></a><span class='hs-comment'>--</span>
<a name="line-86"></a><span class='hs-comment'>-- * 'anchored'        - Force pattern anchoring</span>
<a name="line-87"></a><span class='hs-comment'>--</span>
<a name="line-88"></a><span class='hs-comment'>-- * 'auto_callout'    - Compile automatic callouts</span>
<a name="line-89"></a><span class='hs-comment'>--</span>
<a name="line-90"></a><span class='hs-comment'>-- * 'bsr_anycrlf'     - \\R matches only CR, LF, or CRLF</span>
<a name="line-91"></a><span class='hs-comment'>--</span>
<a name="line-92"></a><span class='hs-comment'>-- * 'bsr_unicode'     - \\R matches all Unicode line endings</span>
<a name="line-93"></a><span class='hs-comment'>--</span>
<a name="line-94"></a><span class='hs-comment'>-- * 'caseless'        - Do caseless matching</span>
<a name="line-95"></a><span class='hs-comment'>--</span>
<a name="line-96"></a><span class='hs-comment'>-- * 'dollar_endonly'  - '$' not to match newline at end</span>
<a name="line-97"></a><span class='hs-comment'>--</span>
<a name="line-98"></a><span class='hs-comment'>-- * 'dotall'          - matches anything including NL</span>
<a name="line-99"></a><span class='hs-comment'>--</span>
<a name="line-100"></a><span class='hs-comment'>-- * 'dupnames'        - Allow duplicate names for subpatterns</span>
<a name="line-101"></a><span class='hs-comment'>--</span>
<a name="line-102"></a><span class='hs-comment'>-- * 'extended'        - Ignore whitespace and # comments</span>
<a name="line-103"></a><span class='hs-comment'>--</span>
<a name="line-104"></a><span class='hs-comment'>-- * 'extra'           - PCRE extra features (not much use currently)</span>
<a name="line-105"></a><span class='hs-comment'>--</span>
<a name="line-106"></a><span class='hs-comment'>-- * 'firstline'       - Force matching to be  before  newline</span>
<a name="line-107"></a><span class='hs-comment'>--</span>
<a name="line-108"></a><span class='hs-comment'>-- * 'multiline'       - '^' and '$' match newlines within data</span>
<a name="line-109"></a><span class='hs-comment'>--</span>
<a name="line-110"></a><span class='hs-comment'>-- * 'newline_any'     - Recognize any Unicode newline sequence</span>
<a name="line-111"></a><span class='hs-comment'>--</span>
<a name="line-112"></a><span class='hs-comment'>-- * 'newline_anycrlf' - Recognize CR, LF, and CRLF as newline sequences</span>
<a name="line-113"></a><span class='hs-comment'>--</span>
<a name="line-114"></a><span class='hs-comment'>-- * 'newline_cr'      - Set CR as the newline sequence</span>
<a name="line-115"></a><span class='hs-comment'>--</span>
<a name="line-116"></a><span class='hs-comment'>-- * 'newline_crlf'    - Set CRLF as the newline sequence</span>
<a name="line-117"></a><span class='hs-comment'>--</span>
<a name="line-118"></a><span class='hs-comment'>-- * 'newline_lf'      - Set LF as the newline sequence</span>
<a name="line-119"></a><span class='hs-comment'>--</span>
<a name="line-120"></a><span class='hs-comment'>-- * 'no_auto_capture' - Disable numbered capturing parentheses (named ones available)</span>
<a name="line-121"></a><span class='hs-comment'>--</span>
<a name="line-122"></a><span class='hs-comment'>-- * 'ungreedy'        - Invert greediness of quantifiers</span>
<a name="line-123"></a><span class='hs-comment'>--</span>
<a name="line-124"></a><span class='hs-comment'>-- * 'utf8'            - Run in UTF-8 mode</span>
<a name="line-125"></a><span class='hs-comment'>--</span>
<a name="line-126"></a><span class='hs-comment'>-- * 'no_utf8_check'   - Do not check the pattern for UTF-8 validity</span>
<a name="line-127"></a><span class='hs-comment'>--</span>
<a name="line-128"></a><span class='hs-comment'>-- If compilation of the pattern fails, the 'Left' constructor is </span>
<a name="line-129"></a><span class='hs-comment'>-- returned with the error string. Otherwise an abstract type</span>
<a name="line-130"></a><span class='hs-comment'>-- representing the compiled regular expression is returned.</span>
<a name="line-131"></a><span class='hs-comment'>-- The regex is allocated via malloc on the C side, and will be</span>
<a name="line-132"></a><span class='hs-comment'>-- deallocated by the runtime when the Haskell value representing it</span>
<a name="line-133"></a><span class='hs-comment'>-- goes out of scope.</span>
<a name="line-134"></a><span class='hs-comment'>--</span>
<a name="line-135"></a><span class='hs-comment'>-- As regexes are often defined statically, GHC will compile them </span>
<a name="line-136"></a><span class='hs-comment'>-- to null-terminated, strict C strings, enabling compilation of the </span>
<a name="line-137"></a><span class='hs-comment'>-- pattern without copying. This may be useful for very large patterns.</span>
<a name="line-138"></a><span class='hs-comment'>--</span>
<a name="line-139"></a><span class='hs-comment'>-- See man pcreapi for more details.</span>
<a name="line-140"></a><span class='hs-comment'>--</span>
<a name="line-141"></a><span class='hs-definition'>compile</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>String</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>PCREOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Regex</span>
<a name="line-142"></a><span class='hs-definition'>compile</span> <span class='hs-varid'>str</span> <span class='hs-varid'>os</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>compile</span> <span class='hs-layout'>(</span><span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>pack</span> <span class='hs-varid'>str</span><span class='hs-layout'>)</span> <span class='hs-varid'>os</span>
<a name="line-143"></a><span class='hs-comment'>{-# INLINE compile #-}</span>
<a name="line-144"></a>
<a name="line-145"></a><a name="compileM"></a><span class='hs-comment'>-- | 'compileM'</span>
<a name="line-146"></a><span class='hs-comment'>-- A safe version of 'compile' with failure lifted into an Either</span>
<a name="line-147"></a><span class='hs-definition'>compileM</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>String</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>PCREOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Either</span> <span class='hs-conid'>String</span> <span class='hs-conid'>Regex</span>
<a name="line-148"></a><span class='hs-definition'>compileM</span> <span class='hs-varid'>str</span> <span class='hs-varid'>os</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>compileM</span> <span class='hs-layout'>(</span><span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>pack</span> <span class='hs-varid'>str</span><span class='hs-layout'>)</span> <span class='hs-varid'>os</span>
<a name="line-149"></a><span class='hs-comment'>{-# INLINE compileM #-}</span>
<a name="line-150"></a>
<a name="line-151"></a>
<a name="line-152"></a><a name="match"></a><span class='hs-comment'>-- | 'match'</span>
<a name="line-153"></a><span class='hs-comment'>--</span>
<a name="line-154"></a><span class='hs-comment'>-- Matches a compiled regular expression against a given subject string,</span>
<a name="line-155"></a><span class='hs-comment'>-- using a matching algorithm that is similar to Perl's. If the subject</span>
<a name="line-156"></a><span class='hs-comment'>-- string doesn't match the regular expression, 'Nothing' is returned,</span>
<a name="line-157"></a><span class='hs-comment'>-- otherwise the portion of the string that matched is returned, along</span>
<a name="line-158"></a><span class='hs-comment'>-- with any captured subpatterns.</span>
<a name="line-159"></a><span class='hs-comment'>--</span>
<a name="line-160"></a><span class='hs-comment'>-- The arguments are:</span>
<a name="line-161"></a><span class='hs-comment'>--</span>
<a name="line-162"></a><span class='hs-comment'>-- * 'regex', a PCRE regular expression value produced by compile</span>
<a name="line-163"></a><span class='hs-comment'>--</span>
<a name="line-164"></a><span class='hs-comment'>-- * 'subject', the subject string to match against</span>
<a name="line-165"></a><span class='hs-comment'>--</span>
<a name="line-166"></a><span class='hs-comment'>-- * 'options', an optional set of exec-time flags to exec.</span>
<a name="line-167"></a><span class='hs-comment'>--</span>
<a name="line-168"></a><span class='hs-comment'>-- Available runtime options are:</span>
<a name="line-169"></a><span class='hs-comment'>--</span>
<a name="line-170"></a><span class='hs-comment'>-- * 'anchored'        - Match only at the first position</span>
<a name="line-171"></a><span class='hs-comment'>--</span>
<a name="line-172"></a><span class='hs-comment'>-- * 'bsr_anycrlf'     - '\\R' matches only CR, LF, or CRLF</span>
<a name="line-173"></a><span class='hs-comment'>--</span>
<a name="line-174"></a><span class='hs-comment'>-- * 'bsr_unicode'     - '\\R' matches all Unicode line endings</span>
<a name="line-175"></a><span class='hs-comment'>--</span>
<a name="line-176"></a><span class='hs-comment'>-- * 'newline_any'     - Recognize any Unicode newline sequence</span>
<a name="line-177"></a><span class='hs-comment'>--</span>
<a name="line-178"></a><span class='hs-comment'>-- * 'newline_anycrlf' - Recognize CR, LF, and CRLF as newline sequences</span>
<a name="line-179"></a><span class='hs-comment'>--</span>
<a name="line-180"></a><span class='hs-comment'>-- * 'newline_cr'      - Set CR as the newline sequence</span>
<a name="line-181"></a><span class='hs-comment'>--</span>
<a name="line-182"></a><span class='hs-comment'>-- * 'newline_crlf'    - Set CRLF as the newline sequence</span>
<a name="line-183"></a><span class='hs-comment'>--</span>
<a name="line-184"></a><span class='hs-comment'>-- * 'newline_lf'      - Set LF as the newline sequence</span>
<a name="line-185"></a><span class='hs-comment'>--</span>
<a name="line-186"></a><span class='hs-comment'>-- * 'notbol'          - Subject is not the beginning of a line</span>
<a name="line-187"></a><span class='hs-comment'>--</span>
<a name="line-188"></a><span class='hs-comment'>-- * 'noteol'          - Subject is not the end of a line</span>
<a name="line-189"></a><span class='hs-comment'>--</span>
<a name="line-190"></a><span class='hs-comment'>-- * 'notempty'        - An empty string is not a valid match</span>
<a name="line-191"></a><span class='hs-comment'>--</span>
<a name="line-192"></a><span class='hs-comment'>-- * 'no_utf8_check'   - Do not check the subject for UTF-8</span>
<a name="line-193"></a><span class='hs-comment'>--</span>
<a name="line-194"></a><span class='hs-comment'>-- * 'partial'         - Return PCRE_ERROR_PARTIAL for a partial match</span>
<a name="line-195"></a><span class='hs-comment'>--</span>
<a name="line-196"></a><span class='hs-comment'>-- The result value, and any captured subpatterns, are returned.</span>
<a name="line-197"></a><span class='hs-comment'>-- If the regex is invalid, or the subject string is empty, Nothing</span>
<a name="line-198"></a><span class='hs-comment'>-- is returned.</span>
<a name="line-199"></a><span class='hs-comment'>--</span>
<a name="line-200"></a><span class='hs-definition'>match</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Regex</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>String</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>PCREExecOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Maybe</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>String</span><span class='hs-keyglyph'>]</span>
<a name="line-201"></a><span class='hs-definition'>match</span> <span class='hs-varid'>r</span> <span class='hs-varid'>subject</span> <span class='hs-varid'>os</span> <span class='hs-keyglyph'>=</span>
<a name="line-202"></a>    <span class='hs-keyword'>case</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>match</span> <span class='hs-varid'>r</span> <span class='hs-layout'>(</span><span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>pack</span> <span class='hs-varid'>subject</span><span class='hs-layout'>)</span> <span class='hs-varid'>os</span> <span class='hs-keyword'>of</span>
<a name="line-203"></a>           <span class='hs-conid'>Nothing</span> <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Nothing</span>
<a name="line-204"></a>           <span class='hs-conid'>Just</span> <span class='hs-varid'>x</span>  <span class='hs-keyglyph'>-&gt;</span> <span class='hs-conid'>Just</span> <span class='hs-layout'>(</span><span class='hs-varid'>map</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>unpack</span> <span class='hs-varid'>x</span><span class='hs-layout'>)</span>
<a name="line-205"></a><span class='hs-comment'>{-# INLINE match #-}</span>
</pre></body>
</html>