<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html> <head> <!-- Generated by HsColour, http://www.cs.york.ac.uk/fp/darcs/hscolour/ --> <title>Text/Regex/PCRE/Light/Char8.hs</title> <link type='text/css' rel='stylesheet' href='hscolour.css' /> </head> <body> <pre><a name="line-1"></a><span class='hs-comment'>--------------------------------------------------------------------</span> <a name="line-2"></a><span class='hs-comment'>-- |</span> <a name="line-3"></a><span class='hs-comment'>-- Module : Text.Regex.PCRE.Light.Char8</span> <a name="line-4"></a><span class='hs-comment'>-- Copyright: Copyright (c) 2007-2008, Don Stewart</span> <a name="line-5"></a><span class='hs-comment'>-- License : BSD3</span> <a name="line-6"></a><span class='hs-comment'>--</span> <a name="line-7"></a><span class='hs-comment'>-- Maintainer: Don Stewart <dons@galois.com></span> <a name="line-8"></a><span class='hs-comment'>-- Stability : experimental</span> <a name="line-9"></a><span class='hs-comment'>-- Portability: H98 + FFI</span> <a name="line-10"></a><span class='hs-comment'>--</span> <a name="line-11"></a><span class='hs-comment'>--------------------------------------------------------------------</span> <a name="line-12"></a><span class='hs-comment'>-- </span> <a name="line-13"></a><span class='hs-comment'>-- A simple, portable binding to perl-compatible regular expressions</span> <a name="line-14"></a><span class='hs-comment'>-- (PCRE) via 8-bit latin1 Strings.</span> <a name="line-15"></a><span class='hs-comment'>--</span> <a name="line-16"></a> <a name="line-17"></a><span class='hs-keyword'>module</span> <span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Regex</span><span class='hs-varop'>.</span><span class='hs-conid'>PCRE</span><span class='hs-varop'>.</span><span class='hs-conid'>Light</span><span class='hs-varop'>.</span><span class='hs-conid'>Char8</span> <span class='hs-layout'>(</span> <a name="line-18"></a> <a name="line-19"></a> <span class='hs-comment'>-- * The abstract PCRE Regex type</span> <a name="line-20"></a> <span class='hs-conid'>Regex</span> <a name="line-21"></a> <a name="line-22"></a> <span class='hs-comment'>-- * String interface</span> <a name="line-23"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>compile</span><span class='hs-layout'>,</span> <span class='hs-varid'>compileM</span> <a name="line-24"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>match</span> <a name="line-25"></a> <a name="line-26"></a> <span class='hs-comment'>-- * Regex types and constructors externally visible</span> <a name="line-27"></a> <a name="line-28"></a> <span class='hs-comment'>-- ** PCRE compile-time bit flags</span> <a name="line-29"></a> <span class='hs-layout'>,</span> <span class='hs-conid'>PCREOption</span> <a name="line-30"></a> <a name="line-31"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>anchored</span> <a name="line-32"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>auto_callout</span> <a name="line-33"></a> <span class='hs-comment'>{-, bsr_anycrlf-}</span> <a name="line-34"></a> <span class='hs-comment'>{-, bsr_unicode-}</span> <a name="line-35"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>caseless</span> <a name="line-36"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>dollar_endonly</span> <a name="line-37"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>dotall</span> <a name="line-38"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>dupnames</span> <a name="line-39"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>extended</span> <a name="line-40"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>extra</span> <a name="line-41"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>firstline</span> <a name="line-42"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>multiline</span> <a name="line-43"></a> <span class='hs-comment'>{-, newline_any-}</span> <a name="line-44"></a> <span class='hs-comment'>{-, newline_anycrlf-}</span> <a name="line-45"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>newline_cr</span> <a name="line-46"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>newline_crlf</span> <a name="line-47"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>newline_lf</span> <a name="line-48"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>no_auto_capture</span> <a name="line-49"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>ungreedy</span> <a name="line-50"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>utf8</span> <a name="line-51"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>no_utf8_check</span> <a name="line-52"></a> <a name="line-53"></a> <span class='hs-comment'>-- ** PCRE exec-time bit flags</span> <a name="line-54"></a> <span class='hs-layout'>,</span> <span class='hs-conid'>PCREExecOption</span> <a name="line-55"></a> <a name="line-56"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_anchored</span> <a name="line-57"></a> <span class='hs-comment'>{-, exec_newline_any -}</span> <a name="line-58"></a> <span class='hs-comment'>{-, exec_newline_anycrlf -}</span> <a name="line-59"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_newline_cr</span> <a name="line-60"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_newline_crlf</span> <a name="line-61"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_newline_lf</span> <a name="line-62"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_notbol</span> <a name="line-63"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_noteol</span> <a name="line-64"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_notempty</span> <a name="line-65"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_no_utf8_check</span> <a name="line-66"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_partial</span> <a name="line-67"></a> <a name="line-68"></a> <span class='hs-layout'>)</span> <span class='hs-keyword'>where</span> <a name="line-69"></a> <a name="line-70"></a><span class='hs-keyword'>import</span> <span class='hs-keyword'>qualified</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span><span class='hs-varop'>.</span><span class='hs-conid'>Char8</span> <span class='hs-keyword'>as</span> <span class='hs-conid'>S</span> <a name="line-71"></a><span class='hs-keyword'>import</span> <span class='hs-keyword'>qualified</span> <span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Regex</span><span class='hs-varop'>.</span><span class='hs-conid'>PCRE</span><span class='hs-varop'>.</span><span class='hs-conid'>Light</span> <span class='hs-keyword'>as</span> <span class='hs-conid'>S</span> <a name="line-72"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Regex</span><span class='hs-varop'>.</span><span class='hs-conid'>PCRE</span><span class='hs-varop'>.</span><span class='hs-conid'>Light</span> <span class='hs-varid'>hiding</span> <span class='hs-layout'>(</span><span class='hs-varid'>match</span><span class='hs-layout'>,</span> <span class='hs-varid'>compile</span><span class='hs-layout'>,</span> <span class='hs-varid'>compileM</span><span class='hs-layout'>)</span> <a name="line-73"></a> <a name="line-74"></a><a name="compile"></a><span class='hs-comment'>-- | 'compile'</span> <a name="line-75"></a><span class='hs-comment'>--</span> <a name="line-76"></a><span class='hs-comment'>-- Compile a perl-compatible regular expression, in a strict bytestring.</span> <a name="line-77"></a><span class='hs-comment'>-- The arguments are:</span> <a name="line-78"></a><span class='hs-comment'>--</span> <a name="line-79"></a><span class='hs-comment'>-- * 'pat': A ByteString, which may or may not be zero-terminated,</span> <a name="line-80"></a><span class='hs-comment'>-- containing the regular expression to be compiled. </span> <a name="line-81"></a><span class='hs-comment'>--</span> <a name="line-82"></a><span class='hs-comment'>-- * 'flags', optional bit flags. If 'Nothing' is provided, defaults are used.</span> <a name="line-83"></a><span class='hs-comment'>--</span> <a name="line-84"></a><span class='hs-comment'>-- Valid compile-time flags are:</span> <a name="line-85"></a><span class='hs-comment'>--</span> <a name="line-86"></a><span class='hs-comment'>-- * 'anchored' - Force pattern anchoring</span> <a name="line-87"></a><span class='hs-comment'>--</span> <a name="line-88"></a><span class='hs-comment'>-- * 'auto_callout' - Compile automatic callouts</span> <a name="line-89"></a><span class='hs-comment'>--</span> <a name="line-90"></a><span class='hs-comment'>-- * 'bsr_anycrlf' - \\R matches only CR, LF, or CRLF</span> <a name="line-91"></a><span class='hs-comment'>--</span> <a name="line-92"></a><span class='hs-comment'>-- * 'bsr_unicode' - \\R matches all Unicode line endings</span> <a name="line-93"></a><span class='hs-comment'>--</span> <a name="line-94"></a><span class='hs-comment'>-- * 'caseless' - Do caseless matching</span> <a name="line-95"></a><span class='hs-comment'>--</span> <a name="line-96"></a><span class='hs-comment'>-- * 'dollar_endonly' - '$' not to match newline at end</span> <a name="line-97"></a><span class='hs-comment'>--</span> <a name="line-98"></a><span class='hs-comment'>-- * 'dotall' - matches anything including NL</span> <a name="line-99"></a><span class='hs-comment'>--</span> <a name="line-100"></a><span class='hs-comment'>-- * 'dupnames' - Allow duplicate names for subpatterns</span> <a name="line-101"></a><span class='hs-comment'>--</span> <a name="line-102"></a><span class='hs-comment'>-- * 'extended' - Ignore whitespace and # comments</span> <a name="line-103"></a><span class='hs-comment'>--</span> <a name="line-104"></a><span class='hs-comment'>-- * 'extra' - PCRE extra features (not much use currently)</span> <a name="line-105"></a><span class='hs-comment'>--</span> <a name="line-106"></a><span class='hs-comment'>-- * 'firstline' - Force matching to be before newline</span> <a name="line-107"></a><span class='hs-comment'>--</span> <a name="line-108"></a><span class='hs-comment'>-- * 'multiline' - '^' and '$' match newlines within data</span> <a name="line-109"></a><span class='hs-comment'>--</span> <a name="line-110"></a><span class='hs-comment'>-- * 'newline_any' - Recognize any Unicode newline sequence</span> <a name="line-111"></a><span class='hs-comment'>--</span> <a name="line-112"></a><span class='hs-comment'>-- * 'newline_anycrlf' - Recognize CR, LF, and CRLF as newline sequences</span> <a name="line-113"></a><span class='hs-comment'>--</span> <a name="line-114"></a><span class='hs-comment'>-- * 'newline_cr' - Set CR as the newline sequence</span> <a name="line-115"></a><span class='hs-comment'>--</span> <a name="line-116"></a><span class='hs-comment'>-- * 'newline_crlf' - Set CRLF as the newline sequence</span> <a name="line-117"></a><span class='hs-comment'>--</span> <a name="line-118"></a><span class='hs-comment'>-- * 'newline_lf' - Set LF as the newline sequence</span> <a name="line-119"></a><span class='hs-comment'>--</span> <a name="line-120"></a><span class='hs-comment'>-- * 'no_auto_capture' - Disable numbered capturing parentheses (named ones available)</span> <a name="line-121"></a><span class='hs-comment'>--</span> <a name="line-122"></a><span class='hs-comment'>-- * 'ungreedy' - Invert greediness of quantifiers</span> <a name="line-123"></a><span class='hs-comment'>--</span> <a name="line-124"></a><span class='hs-comment'>-- * 'utf8' - Run in UTF-8 mode</span> <a name="line-125"></a><span class='hs-comment'>--</span> <a name="line-126"></a><span class='hs-comment'>-- * 'no_utf8_check' - Do not check the pattern for UTF-8 validity</span> <a name="line-127"></a><span class='hs-comment'>--</span> <a name="line-128"></a><span class='hs-comment'>-- If compilation of the pattern fails, the 'Left' constructor is </span> <a name="line-129"></a><span class='hs-comment'>-- returned with the error string. Otherwise an abstract type</span> <a name="line-130"></a><span class='hs-comment'>-- representing the compiled regular expression is returned.</span> <a name="line-131"></a><span class='hs-comment'>-- The regex is allocated via malloc on the C side, and will be</span> <a name="line-132"></a><span class='hs-comment'>-- deallocated by the runtime when the Haskell value representing it</span> <a name="line-133"></a><span class='hs-comment'>-- goes out of scope.</span> <a name="line-134"></a><span class='hs-comment'>--</span> <a name="line-135"></a><span class='hs-comment'>-- As regexes are often defined statically, GHC will compile them </span> <a name="line-136"></a><span class='hs-comment'>-- to null-terminated, strict C strings, enabling compilation of the </span> <a name="line-137"></a><span class='hs-comment'>-- pattern without copying. This may be useful for very large patterns.</span> <a name="line-138"></a><span class='hs-comment'>--</span> <a name="line-139"></a><span class='hs-comment'>-- See man pcreapi for more details.</span> <a name="line-140"></a><span class='hs-comment'>--</span> <a name="line-141"></a><span class='hs-definition'>compile</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>String</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>PCREOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Regex</span> <a name="line-142"></a><span class='hs-definition'>compile</span> <span class='hs-varid'>str</span> <span class='hs-varid'>os</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>compile</span> <span class='hs-layout'>(</span><span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>pack</span> <span class='hs-varid'>str</span><span class='hs-layout'>)</span> <span class='hs-varid'>os</span> <a name="line-143"></a><span class='hs-comment'>{-# INLINE compile #-}</span> <a name="line-144"></a> <a name="line-145"></a><a name="compileM"></a><span class='hs-comment'>-- | 'compileM'</span> <a name="line-146"></a><span class='hs-comment'>-- A safe version of 'compile' with failure lifted into an Either</span> <a name="line-147"></a><span class='hs-definition'>compileM</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>String</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>PCREOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Either</span> <span class='hs-conid'>String</span> <span class='hs-conid'>Regex</span> <a name="line-148"></a><span class='hs-definition'>compileM</span> <span class='hs-varid'>str</span> <span class='hs-varid'>os</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>compileM</span> <span class='hs-layout'>(</span><span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>pack</span> <span class='hs-varid'>str</span><span class='hs-layout'>)</span> <span class='hs-varid'>os</span> <a name="line-149"></a><span class='hs-comment'>{-# INLINE compileM #-}</span> <a name="line-150"></a> <a name="line-151"></a> <a name="line-152"></a><a name="match"></a><span class='hs-comment'>-- | 'match'</span> <a name="line-153"></a><span class='hs-comment'>--</span> <a name="line-154"></a><span class='hs-comment'>-- Matches a compiled regular expression against a given subject string,</span> <a name="line-155"></a><span class='hs-comment'>-- using a matching algorithm that is similar to Perl's. If the subject</span> <a name="line-156"></a><span class='hs-comment'>-- string doesn't match the regular expression, 'Nothing' is returned,</span> <a name="line-157"></a><span class='hs-comment'>-- otherwise the portion of the string that matched is returned, along</span> <a name="line-158"></a><span class='hs-comment'>-- with any captured subpatterns.</span> <a name="line-159"></a><span class='hs-comment'>--</span> <a name="line-160"></a><span class='hs-comment'>-- The arguments are:</span> <a name="line-161"></a><span class='hs-comment'>--</span> <a name="line-162"></a><span class='hs-comment'>-- * 'regex', a PCRE regular expression value produced by compile</span> <a name="line-163"></a><span class='hs-comment'>--</span> <a name="line-164"></a><span class='hs-comment'>-- * 'subject', the subject string to match against</span> <a name="line-165"></a><span class='hs-comment'>--</span> <a name="line-166"></a><span class='hs-comment'>-- * 'options', an optional set of exec-time flags to exec.</span> <a name="line-167"></a><span class='hs-comment'>--</span> <a name="line-168"></a><span class='hs-comment'>-- Available runtime options are:</span> <a name="line-169"></a><span class='hs-comment'>--</span> <a name="line-170"></a><span class='hs-comment'>-- * 'anchored' - Match only at the first position</span> <a name="line-171"></a><span class='hs-comment'>--</span> <a name="line-172"></a><span class='hs-comment'>-- * 'bsr_anycrlf' - '\\R' matches only CR, LF, or CRLF</span> <a name="line-173"></a><span class='hs-comment'>--</span> <a name="line-174"></a><span class='hs-comment'>-- * 'bsr_unicode' - '\\R' matches all Unicode line endings</span> <a name="line-175"></a><span class='hs-comment'>--</span> <a name="line-176"></a><span class='hs-comment'>-- * 'newline_any' - Recognize any Unicode newline sequence</span> <a name="line-177"></a><span class='hs-comment'>--</span> <a name="line-178"></a><span class='hs-comment'>-- * 'newline_anycrlf' - Recognize CR, LF, and CRLF as newline sequences</span> <a name="line-179"></a><span class='hs-comment'>--</span> <a name="line-180"></a><span class='hs-comment'>-- * 'newline_cr' - Set CR as the newline sequence</span> <a name="line-181"></a><span class='hs-comment'>--</span> <a name="line-182"></a><span class='hs-comment'>-- * 'newline_crlf' - Set CRLF as the newline sequence</span> <a name="line-183"></a><span class='hs-comment'>--</span> <a name="line-184"></a><span class='hs-comment'>-- * 'newline_lf' - Set LF as the newline sequence</span> <a name="line-185"></a><span class='hs-comment'>--</span> <a name="line-186"></a><span class='hs-comment'>-- * 'notbol' - Subject is not the beginning of a line</span> <a name="line-187"></a><span class='hs-comment'>--</span> <a name="line-188"></a><span class='hs-comment'>-- * 'noteol' - Subject is not the end of a line</span> <a name="line-189"></a><span class='hs-comment'>--</span> <a name="line-190"></a><span class='hs-comment'>-- * 'notempty' - An empty string is not a valid match</span> <a name="line-191"></a><span class='hs-comment'>--</span> <a name="line-192"></a><span class='hs-comment'>-- * 'no_utf8_check' - Do not check the subject for UTF-8</span> <a name="line-193"></a><span class='hs-comment'>--</span> <a name="line-194"></a><span class='hs-comment'>-- * 'partial' - Return PCRE_ERROR_PARTIAL for a partial match</span> <a name="line-195"></a><span class='hs-comment'>--</span> <a name="line-196"></a><span class='hs-comment'>-- The result value, and any captured subpatterns, are returned.</span> <a name="line-197"></a><span class='hs-comment'>-- If the regex is invalid, or the subject string is empty, Nothing</span> <a name="line-198"></a><span class='hs-comment'>-- is returned.</span> <a name="line-199"></a><span class='hs-comment'>--</span> <a name="line-200"></a><span class='hs-definition'>match</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Regex</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>String</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>PCREExecOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Maybe</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>String</span><span class='hs-keyglyph'>]</span> <a name="line-201"></a><span class='hs-definition'>match</span> <span class='hs-varid'>r</span> <span class='hs-varid'>subject</span> <span class='hs-varid'>os</span> <span class='hs-keyglyph'>=</span> <a name="line-202"></a> <span class='hs-keyword'>case</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>match</span> <span class='hs-varid'>r</span> <span class='hs-layout'>(</span><span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>pack</span> <span class='hs-varid'>subject</span><span class='hs-layout'>)</span> <span class='hs-varid'>os</span> <span class='hs-keyword'>of</span> <a name="line-203"></a> <span class='hs-conid'>Nothing</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Nothing</span> <a name="line-204"></a> <span class='hs-conid'>Just</span> <span class='hs-varid'>x</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Just</span> <span class='hs-layout'>(</span><span class='hs-varid'>map</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>unpack</span> <span class='hs-varid'>x</span><span class='hs-layout'>)</span> <a name="line-205"></a><span class='hs-comment'>{-# INLINE match #-}</span> </pre></body> </html>