<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html> <head> <!-- Generated by HsColour, http://www.cs.york.ac.uk/fp/darcs/hscolour/ --> <title>Text/Regex/PCRE/Light.hs</title> <link type='text/css' rel='stylesheet' href='hscolour.css' /> </head> <body> <pre><a name="line-1"></a><span class='hs-comment'>{-# LANGUAGE CPP #-}</span> <a name="line-2"></a><span class='hs-comment'>--------------------------------------------------------------------</span> <a name="line-3"></a><span class='hs-comment'>-- |</span> <a name="line-4"></a><span class='hs-comment'>-- Module : Text.Regex.PCRE.Light</span> <a name="line-5"></a><span class='hs-comment'>-- Copyright: Copyright (c) 2007-2008, Don Stewart</span> <a name="line-6"></a><span class='hs-comment'>-- License : BSD3</span> <a name="line-7"></a><span class='hs-comment'>--</span> <a name="line-8"></a><span class='hs-comment'>-- Maintainer: Don Stewart <dons@galois.com></span> <a name="line-9"></a><span class='hs-comment'>-- Stability : experimental</span> <a name="line-10"></a><span class='hs-comment'>-- Portability: H98 + CPP</span> <a name="line-11"></a><span class='hs-comment'>--</span> <a name="line-12"></a><span class='hs-comment'>--------------------------------------------------------------------</span> <a name="line-13"></a><span class='hs-comment'>-- </span> <a name="line-14"></a><span class='hs-comment'>-- A simple, portable binding to perl-compatible regular expressions</span> <a name="line-15"></a><span class='hs-comment'>-- (PCRE) via strict ByteStrings.</span> <a name="line-16"></a><span class='hs-comment'>--</span> <a name="line-17"></a> <a name="line-18"></a><span class='hs-keyword'>module</span> <span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Regex</span><span class='hs-varop'>.</span><span class='hs-conid'>PCRE</span><span class='hs-varop'>.</span><span class='hs-conid'>Light</span> <span class='hs-layout'>(</span> <a name="line-19"></a> <a name="line-20"></a> <span class='hs-comment'>-- * The abstract PCRE Regex type</span> <a name="line-21"></a> <span class='hs-conid'>Regex</span> <a name="line-22"></a> <a name="line-23"></a> <span class='hs-comment'>-- * ByteString interface</span> <a name="line-24"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>compile</span><span class='hs-layout'>,</span> <span class='hs-varid'>compileM</span> <a name="line-25"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>match</span> <a name="line-26"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>captureCount</span> <a name="line-27"></a> <a name="line-28"></a> <span class='hs-comment'>-- * Regex types and constructors externally visible</span> <a name="line-29"></a> <a name="line-30"></a> <span class='hs-comment'>-- ** PCRE compile-time bit flags</span> <a name="line-31"></a> <span class='hs-layout'>,</span> <span class='hs-conid'>PCREOption</span> <a name="line-32"></a> <a name="line-33"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>anchored</span> <a name="line-34"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>auto_callout</span> <a name="line-35"></a> <span class='hs-comment'>{-, bsr_anycrlf-}</span> <a name="line-36"></a> <span class='hs-comment'>{-, bsr_unicode-}</span> <a name="line-37"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>caseless</span> <a name="line-38"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>dollar_endonly</span> <a name="line-39"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>dotall</span> <a name="line-40"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>dupnames</span> <a name="line-41"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>extended</span> <a name="line-42"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>extra</span> <a name="line-43"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>firstline</span> <a name="line-44"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>multiline</span> <a name="line-45"></a> <span class='hs-comment'>{-, newline_any-}</span> <a name="line-46"></a> <span class='hs-comment'>{-, newline_anycrlf-}</span> <a name="line-47"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>newline_cr</span> <a name="line-48"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>newline_crlf</span> <a name="line-49"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>newline_lf</span> <a name="line-50"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>no_auto_capture</span> <a name="line-51"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>ungreedy</span> <a name="line-52"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>utf8</span> <a name="line-53"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>no_utf8_check</span> <a name="line-54"></a> <a name="line-55"></a> <span class='hs-comment'>-- ** PCRE exec-time bit flags</span> <a name="line-56"></a> <span class='hs-layout'>,</span> <span class='hs-conid'>PCREExecOption</span> <a name="line-57"></a> <a name="line-58"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_anchored</span> <a name="line-59"></a> <span class='hs-comment'>{-, exec_newline_any -}</span> <a name="line-60"></a> <span class='hs-comment'>{-, exec_newline_anycrlf -}</span> <a name="line-61"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_newline_cr</span> <a name="line-62"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_newline_crlf</span> <a name="line-63"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_newline_lf</span> <a name="line-64"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_notbol</span> <a name="line-65"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_noteol</span> <a name="line-66"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_notempty</span> <a name="line-67"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_no_utf8_check</span> <a name="line-68"></a> <span class='hs-layout'>,</span> <span class='hs-varid'>exec_partial</span> <a name="line-69"></a> <a name="line-70"></a> <span class='hs-layout'>)</span> <span class='hs-keyword'>where</span> <a name="line-71"></a> <a name="line-72"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Text</span><span class='hs-varop'>.</span><span class='hs-conid'>Regex</span><span class='hs-varop'>.</span><span class='hs-conid'>PCRE</span><span class='hs-varop'>.</span><span class='hs-conid'>Light</span><span class='hs-varop'>.</span><span class='hs-conid'>Base</span> <a name="line-73"></a> <a name="line-74"></a><span class='hs-comment'>-- Strings</span> <a name="line-75"></a><span class='hs-keyword'>import</span> <span class='hs-keyword'>qualified</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span> <span class='hs-keyword'>as</span> <span class='hs-conid'>S</span> <a name="line-76"></a> <a name="line-77"></a><span class='hs-cpp'>#if __GLASGOW_HASKELL__ >= 608</span> <a name="line-78"></a><span class='hs-keyword'>import</span> <span class='hs-keyword'>qualified</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span><span class='hs-varop'>.</span><span class='hs-conid'>Internal</span> <span class='hs-keyword'>as</span> <span class='hs-conid'>S</span> <a name="line-79"></a><span class='hs-keyword'>import</span> <span class='hs-keyword'>qualified</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span><span class='hs-varop'>.</span><span class='hs-conid'>Unsafe</span> <span class='hs-keyword'>as</span> <span class='hs-conid'>S</span> <a name="line-80"></a><span class='hs-cpp'>#else</span> <a name="line-81"></a><span class='hs-keyword'>import</span> <span class='hs-keyword'>qualified</span> <span class='hs-conid'>Data</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span><span class='hs-varop'>.</span><span class='hs-conid'>Base</span> <span class='hs-keyword'>as</span> <span class='hs-conid'>S</span> <a name="line-82"></a><span class='hs-cpp'>#endif</span> <a name="line-83"></a> <a name="line-84"></a><span class='hs-comment'>-- Foreigns</span> <a name="line-85"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span> <a name="line-86"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span><span class='hs-varop'>.</span><span class='hs-conid'>Ptr</span> <a name="line-87"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span><span class='hs-varop'>.</span><span class='hs-conid'>C</span><span class='hs-varop'>.</span><span class='hs-conid'>Types</span> <a name="line-88"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span><span class='hs-varop'>.</span><span class='hs-conid'>C</span><span class='hs-varop'>.</span><span class='hs-conid'>String</span> <a name="line-89"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span><span class='hs-varop'>.</span><span class='hs-conid'>Storable</span> <a name="line-90"></a><span class='hs-keyword'>import</span> <span class='hs-conid'>Foreign</span><span class='hs-varop'>.</span><span class='hs-conid'>Marshal</span><span class='hs-varop'>.</span><span class='hs-conid'>Alloc</span> <a name="line-91"></a> <a name="line-92"></a><a name="compile"></a><span class='hs-comment'>-- | 'compile'</span> <a name="line-93"></a><span class='hs-comment'>--</span> <a name="line-94"></a><span class='hs-comment'>-- Compile a perl-compatible regular expression stored in a strict bytestring.</span> <a name="line-95"></a><span class='hs-comment'>--</span> <a name="line-96"></a><span class='hs-comment'>-- An example</span> <a name="line-97"></a><span class='hs-comment'>--</span> <a name="line-98"></a><span class='hs-comment'>-- > let r = compile (pack "^(b+|a){1,2}?bc") []</span> <a name="line-99"></a><span class='hs-comment'>--</span> <a name="line-100"></a><span class='hs-comment'>-- Or using GHC's -XOverloadedStrings flag, and importing</span> <a name="line-101"></a><span class='hs-comment'>-- Data.ByteString.Char8, we can avoid the pack:</span> <a name="line-102"></a><span class='hs-comment'>--</span> <a name="line-103"></a><span class='hs-comment'>-- > let r = compile "^(b+|a){1,2}?bc" []</span> <a name="line-104"></a><span class='hs-comment'>--</span> <a name="line-105"></a><span class='hs-comment'>-- If the regular expression is invalid, an exception is thrown.</span> <a name="line-106"></a><span class='hs-comment'>-- If this is unsuitable, 'compileM' is availlable, which returns failure </span> <a name="line-107"></a><span class='hs-comment'>-- in a monad.</span> <a name="line-108"></a><span class='hs-comment'>--</span> <a name="line-109"></a><span class='hs-comment'>-- To do case insentive matching,</span> <a name="line-110"></a><span class='hs-comment'>--</span> <a name="line-111"></a><span class='hs-comment'>-- > compile "^(b+|a){1,2}?bc" [caseless]</span> <a name="line-112"></a><span class='hs-comment'>--</span> <a name="line-113"></a><span class='hs-comment'>-- Other flags are documented below.</span> <a name="line-114"></a><span class='hs-comment'>--</span> <a name="line-115"></a><span class='hs-comment'>-- The resulting abstract regular expression can be passed to 'match'</span> <a name="line-116"></a><span class='hs-comment'>-- for matching against a subject string.</span> <a name="line-117"></a><span class='hs-comment'>--</span> <a name="line-118"></a><span class='hs-comment'>-- The arguments are:</span> <a name="line-119"></a><span class='hs-comment'>--</span> <a name="line-120"></a><span class='hs-comment'>-- * 'pat': A ByteString containing the regular expression to be compiled. </span> <a name="line-121"></a><span class='hs-comment'>--</span> <a name="line-122"></a><span class='hs-comment'>-- * 'flags', optional bit flags. If 'Nothing' is provided, defaults are used.</span> <a name="line-123"></a><span class='hs-comment'>--</span> <a name="line-124"></a><span class='hs-comment'>-- Valid compile-time flags are:</span> <a name="line-125"></a><span class='hs-comment'>--</span> <a name="line-126"></a><span class='hs-comment'>-- * 'anchored' - Force pattern anchoring</span> <a name="line-127"></a><span class='hs-comment'>--</span> <a name="line-128"></a><span class='hs-comment'>-- * 'auto_callout' - Compile automatic callouts</span> <a name="line-129"></a><span class='hs-comment'>--</span> <a name="line-130"></a><span class='hs-comment'>-- * 'bsr_anycrlf' - \\R matches only CR, LF, or CRLF</span> <a name="line-131"></a><span class='hs-comment'>--</span> <a name="line-132"></a><span class='hs-comment'>-- * 'bsr_unicode' - \\R matches all Unicode line endings</span> <a name="line-133"></a><span class='hs-comment'>--</span> <a name="line-134"></a><span class='hs-comment'>-- * 'caseless' - Do caseless matching</span> <a name="line-135"></a><span class='hs-comment'>--</span> <a name="line-136"></a><span class='hs-comment'>-- * 'dollar_endonly' - '$' not to match newline at end</span> <a name="line-137"></a><span class='hs-comment'>--</span> <a name="line-138"></a><span class='hs-comment'>-- * 'dotall' - matches anything including NL</span> <a name="line-139"></a><span class='hs-comment'>--</span> <a name="line-140"></a><span class='hs-comment'>-- * 'dupnames' - Allow duplicate names for subpatterns</span> <a name="line-141"></a><span class='hs-comment'>--</span> <a name="line-142"></a><span class='hs-comment'>-- * 'extended' - Ignore whitespace and # comments</span> <a name="line-143"></a><span class='hs-comment'>--</span> <a name="line-144"></a><span class='hs-comment'>-- * 'extra' - PCRE extra features (not much use currently)</span> <a name="line-145"></a><span class='hs-comment'>--</span> <a name="line-146"></a><span class='hs-comment'>-- * 'firstline' - Force matching to be before newline</span> <a name="line-147"></a><span class='hs-comment'>--</span> <a name="line-148"></a><span class='hs-comment'>-- * 'multiline' - '^' and '$' match newlines within data</span> <a name="line-149"></a><span class='hs-comment'>--</span> <a name="line-150"></a><span class='hs-comment'>-- * 'newline_any' - Recognize any Unicode newline sequence</span> <a name="line-151"></a><span class='hs-comment'>--</span> <a name="line-152"></a><span class='hs-comment'>-- * 'newline_anycrlf' - Recognize CR, LF, and CRLF as newline sequences</span> <a name="line-153"></a><span class='hs-comment'>--</span> <a name="line-154"></a><span class='hs-comment'>-- * 'newline_cr' - Set CR as the newline sequence</span> <a name="line-155"></a><span class='hs-comment'>--</span> <a name="line-156"></a><span class='hs-comment'>-- * 'newline_crlf' - Set CRLF as the newline sequence</span> <a name="line-157"></a><span class='hs-comment'>--</span> <a name="line-158"></a><span class='hs-comment'>-- * 'newline_lf' - Set LF as the newline sequence</span> <a name="line-159"></a><span class='hs-comment'>--</span> <a name="line-160"></a><span class='hs-comment'>-- * 'no_auto_capture' - Disable numbered capturing parentheses (named ones available)</span> <a name="line-161"></a><span class='hs-comment'>--</span> <a name="line-162"></a><span class='hs-comment'>-- * 'ungreedy' - Invert greediness of quantifiers</span> <a name="line-163"></a><span class='hs-comment'>--</span> <a name="line-164"></a><span class='hs-comment'>-- * 'utf8' - Run in UTF-8 mode</span> <a name="line-165"></a><span class='hs-comment'>--</span> <a name="line-166"></a><span class='hs-comment'>-- * 'no_utf8_check' - Do not check the pattern for UTF-8 validity</span> <a name="line-167"></a><span class='hs-comment'>--</span> <a name="line-168"></a><span class='hs-comment'>-- The regex is allocated via malloc on the C side, and will be</span> <a name="line-169"></a><span class='hs-comment'>-- deallocated by the runtime when the Haskell value representing it</span> <a name="line-170"></a><span class='hs-comment'>-- goes out of scope.</span> <a name="line-171"></a><span class='hs-comment'>--</span> <a name="line-172"></a><span class='hs-comment'>-- See 'man pcreapi for more details.</span> <a name="line-173"></a><span class='hs-comment'>--</span> <a name="line-174"></a><span class='hs-comment'>-- Caveats: patterns with embedded nulls, such as "\0*" seem to be</span> <a name="line-175"></a><span class='hs-comment'>-- mishandled, as this won't currently match the subject "\0\0\0".</span> <a name="line-176"></a><span class='hs-comment'>--</span> <a name="line-177"></a><span class='hs-definition'>compile</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>PCREOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Regex</span> <a name="line-178"></a><span class='hs-definition'>compile</span> <span class='hs-varid'>s</span> <span class='hs-varid'>o</span> <span class='hs-keyglyph'>=</span> <span class='hs-keyword'>case</span> <span class='hs-varid'>compileM</span> <span class='hs-varid'>s</span> <span class='hs-varid'>o</span> <span class='hs-keyword'>of</span> <a name="line-179"></a> <span class='hs-conid'>Right</span> <span class='hs-varid'>r</span> <span class='hs-keyglyph'>-></span> <span class='hs-varid'>r</span> <a name="line-180"></a> <span class='hs-conid'>Left</span> <span class='hs-varid'>e</span> <span class='hs-keyglyph'>-></span> <span class='hs-varid'>error</span> <span class='hs-layout'>(</span><span class='hs-str'>"Text.Regex.PCRE.Light: Error in regex: "</span> <span class='hs-varop'>++</span> <span class='hs-varid'>e</span><span class='hs-layout'>)</span> <a name="line-181"></a> <a name="line-182"></a><span class='hs-comment'>------------------------------------------------------------------------</span> <a name="line-183"></a> <a name="line-184"></a><a name="compileM"></a><span class='hs-comment'>-- | 'compileM'</span> <a name="line-185"></a><span class='hs-comment'>-- A safe version of 'compile' with failure wrapped in an Either.</span> <a name="line-186"></a><span class='hs-comment'>--</span> <a name="line-187"></a><span class='hs-comment'>-- Examples,</span> <a name="line-188"></a><span class='hs-comment'>--</span> <a name="line-189"></a><span class='hs-comment'>-- > > compileM ".*" [] :: Either String Regex</span> <a name="line-190"></a><span class='hs-comment'>-- > Right (Regex 0x000000004bb5b980 ".*")</span> <a name="line-191"></a><span class='hs-comment'>--</span> <a name="line-192"></a><span class='hs-comment'>-- > > compileM "*" [] :: Either String Regex</span> <a name="line-193"></a><span class='hs-comment'>-- > Left "nothing to repeat"</span> <a name="line-194"></a><span class='hs-comment'>--</span> <a name="line-195"></a><span class='hs-definition'>compileM</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>PCREOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Either</span> <span class='hs-conid'>String</span> <span class='hs-conid'>Regex</span> <a name="line-196"></a><span class='hs-definition'>compileM</span> <span class='hs-varid'>str</span> <span class='hs-varid'>os</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>$</span> <a name="line-197"></a> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>useAsCString</span> <span class='hs-varid'>str</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>pattern</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyword'>do</span> <a name="line-198"></a> <span class='hs-varid'>alloca</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>errptr</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyword'>do</span> <a name="line-199"></a> <span class='hs-varid'>alloca</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>erroffset</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyword'>do</span> <a name="line-200"></a> <span class='hs-varid'>pcre_ptr</span> <span class='hs-keyglyph'><-</span> <span class='hs-varid'>c_pcre_compile</span> <span class='hs-varid'>pattern</span> <span class='hs-layout'>(</span><span class='hs-varid'>combineOptions</span> <span class='hs-varid'>os</span><span class='hs-layout'>)</span> <span class='hs-varid'>errptr</span> <span class='hs-varid'>erroffset</span> <span class='hs-varid'>nullPtr</span> <a name="line-201"></a> <span class='hs-keyword'>if</span> <span class='hs-varid'>pcre_ptr</span> <span class='hs-varop'>==</span> <span class='hs-varid'>nullPtr</span> <a name="line-202"></a> <span class='hs-keyword'>then</span> <span class='hs-keyword'>do</span> <a name="line-203"></a> <span class='hs-varid'>err</span> <span class='hs-keyglyph'><-</span> <span class='hs-varid'>peekCString</span> <span class='hs-varop'>=<<</span> <span class='hs-varid'>peek</span> <span class='hs-varid'>errptr</span> <a name="line-204"></a> <span class='hs-varid'>return</span> <span class='hs-layout'>(</span><span class='hs-conid'>Left</span> <span class='hs-varid'>err</span><span class='hs-layout'>)</span> <a name="line-205"></a> <span class='hs-keyword'>else</span> <span class='hs-keyword'>do</span> <a name="line-206"></a> <span class='hs-varid'>reg</span> <span class='hs-keyglyph'><-</span> <span class='hs-varid'>newForeignPtr</span> <span class='hs-varid'>finalizerFree</span> <span class='hs-varid'>pcre_ptr</span> <span class='hs-comment'>-- release with free()</span> <a name="line-207"></a> <span class='hs-varid'>return</span> <span class='hs-layout'>(</span><span class='hs-conid'>Right</span> <span class='hs-layout'>(</span><span class='hs-conid'>Regex</span> <span class='hs-varid'>reg</span> <span class='hs-varid'>str</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span> <a name="line-208"></a> <a name="line-209"></a><span class='hs-comment'>-- Possible improvements: an 'IsString' instance could be defined</span> <a name="line-210"></a><span class='hs-comment'>-- for 'Regex', which would allow the compiler to insert calls to</span> <a name="line-211"></a><span class='hs-comment'>-- 'compile' based on the type:</span> <a name="line-212"></a><span class='hs-comment'>--</span> <a name="line-213"></a><span class='hs-comment'>-- The following would be valid:</span> <a name="line-214"></a><span class='hs-comment'>--</span> <a name="line-215"></a><span class='hs-comment'>-- > match "a.*b" "abcdef" []</span> <a name="line-216"></a><span class='hs-comment'>--</span> <a name="line-217"></a><span class='hs-comment'>-- and equivalent to:</span> <a name="line-218"></a><span class='hs-comment'>--</span> <a name="line-219"></a><span class='hs-comment'>-- > match (either error id (compile "a.*b")) "abcdef" []</span> <a name="line-220"></a> <a name="line-221"></a><a name="match"></a><span class='hs-comment'>-- | 'match'</span> <a name="line-222"></a><span class='hs-comment'>--</span> <a name="line-223"></a><span class='hs-comment'>-- Matches a compiled regular expression against a given subject string,</span> <a name="line-224"></a><span class='hs-comment'>-- using a matching algorithm that is similar to Perl's. If the subject</span> <a name="line-225"></a><span class='hs-comment'>-- string doesn't match the regular expression, 'Nothing' is returned,</span> <a name="line-226"></a><span class='hs-comment'>-- otherwise the portion of the string that matched is returned, along</span> <a name="line-227"></a><span class='hs-comment'>-- with any captured subpatterns.</span> <a name="line-228"></a><span class='hs-comment'>--</span> <a name="line-229"></a><span class='hs-comment'>-- The arguments are:</span> <a name="line-230"></a><span class='hs-comment'>--</span> <a name="line-231"></a><span class='hs-comment'>-- * 'regex', a PCRE regular expression value produced by compile</span> <a name="line-232"></a><span class='hs-comment'>--</span> <a name="line-233"></a><span class='hs-comment'>-- * 'subject', the subject string to match against</span> <a name="line-234"></a><span class='hs-comment'>--</span> <a name="line-235"></a><span class='hs-comment'>-- * 'options', an optional set of exec-time flags to exec.</span> <a name="line-236"></a><span class='hs-comment'>--</span> <a name="line-237"></a><span class='hs-comment'>-- Available runtime options are:</span> <a name="line-238"></a><span class='hs-comment'>--</span> <a name="line-239"></a><span class='hs-comment'>-- * 'exec_anchored' - Match only at the first position</span> <a name="line-240"></a><span class='hs-comment'>--</span> <a name="line-241"></a><span class='hs-comment'>-- * 'exec_newline_any' - Recognize any Unicode newline sequence</span> <a name="line-242"></a><span class='hs-comment'>--</span> <a name="line-243"></a><span class='hs-comment'>-- * 'exec_newline_anycrlf' - Recognize CR, LF, and CRLF as newline sequences</span> <a name="line-244"></a><span class='hs-comment'>--</span> <a name="line-245"></a><span class='hs-comment'>-- * 'exec_newline_cr' - Set CR as the newline sequence</span> <a name="line-246"></a><span class='hs-comment'>--</span> <a name="line-247"></a><span class='hs-comment'>-- * 'exec_newline_crlf' - Set CRLF as the newline sequence</span> <a name="line-248"></a><span class='hs-comment'>--</span> <a name="line-249"></a><span class='hs-comment'>-- * 'exec_newline_lf' - Set LF as the newline sequence</span> <a name="line-250"></a><span class='hs-comment'>--</span> <a name="line-251"></a><span class='hs-comment'>-- * 'exec_notbol' - Subject is not the beginning of a line</span> <a name="line-252"></a><span class='hs-comment'>--</span> <a name="line-253"></a><span class='hs-comment'>-- * 'exec_noteol' - Subject is not the end of a line</span> <a name="line-254"></a><span class='hs-comment'>--</span> <a name="line-255"></a><span class='hs-comment'>-- * 'exec_notempty' - An empty string is not a valid match</span> <a name="line-256"></a><span class='hs-comment'>--</span> <a name="line-257"></a><span class='hs-comment'>-- * 'exec_no_utf8_check' - Do not check the subject for UTF-8</span> <a name="line-258"></a><span class='hs-comment'>--</span> <a name="line-259"></a><span class='hs-comment'>-- * 'exec_partial' - Return PCRE_ERROR_PARTIAL for a partial match</span> <a name="line-260"></a><span class='hs-comment'>--</span> <a name="line-261"></a><span class='hs-comment'>-- The result value, and any captured subpatterns, are returned.</span> <a name="line-262"></a><span class='hs-comment'>-- If the regex is invalid, or the subject string is empty, Nothing</span> <a name="line-263"></a><span class='hs-comment'>-- is returned.</span> <a name="line-264"></a><span class='hs-comment'>--</span> <a name="line-265"></a><span class='hs-definition'>match</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Regex</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>PCREExecOption</span><span class='hs-keyglyph'>]</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Maybe</span> <span class='hs-keyglyph'>[</span><span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span><span class='hs-keyglyph'>]</span> <a name="line-266"></a><span class='hs-definition'>match</span> <span class='hs-layout'>(</span><span class='hs-conid'>Regex</span> <span class='hs-varid'>pcre_fp</span> <span class='hs-keyword'>_</span><span class='hs-layout'>)</span> <span class='hs-varid'>subject</span> <span class='hs-varid'>os</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>$</span> <span class='hs-keyword'>do</span> <a name="line-267"></a> <span class='hs-varid'>withForeignPtr</span> <span class='hs-varid'>pcre_fp</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>pcre_ptr</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyword'>do</span> <a name="line-268"></a> <span class='hs-varid'>n_capt</span> <span class='hs-keyglyph'><-</span> <span class='hs-varid'>captureCount'</span> <span class='hs-varid'>pcre_ptr</span> <a name="line-269"></a> <a name="line-270"></a> <span class='hs-comment'>-- The smallest size for ovector that will allow for n captured</span> <a name="line-271"></a> <span class='hs-comment'>-- substrings, in addition to the offsets of the substring</span> <a name="line-272"></a> <span class='hs-comment'>-- matched by the whole pattern, is (n+1)*3. (man pcreapi)</span> <a name="line-273"></a> <a name="line-274"></a> <span class='hs-keyword'>let</span> <span class='hs-varid'>ovec_size</span> <span class='hs-keyglyph'>=</span> <span class='hs-layout'>(</span><span class='hs-varid'>n_capt</span> <span class='hs-varop'>+</span> <span class='hs-num'>1</span><span class='hs-layout'>)</span> <span class='hs-varop'>*</span> <span class='hs-num'>3</span> <a name="line-275"></a> <span class='hs-varid'>ovec_bytes</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>ovec_size</span> <span class='hs-varop'>*</span> <span class='hs-varid'>size_of_cint</span> <a name="line-276"></a> <a name="line-277"></a> <span class='hs-varid'>allocaBytes</span> <span class='hs-varid'>ovec_bytes</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>ovec</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyword'>do</span> <a name="line-278"></a> <a name="line-279"></a> <span class='hs-keyword'>let</span> <span class='hs-layout'>(</span><span class='hs-varid'>str_fp</span><span class='hs-layout'>,</span> <span class='hs-varid'>off</span><span class='hs-layout'>,</span> <span class='hs-varid'>len</span><span class='hs-layout'>)</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>toForeignPtr</span> <span class='hs-varid'>subject</span> <a name="line-280"></a> <span class='hs-varid'>withForeignPtr</span> <span class='hs-varid'>str_fp</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>cstr</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyword'>do</span> <a name="line-281"></a> <span class='hs-varid'>r</span> <span class='hs-keyglyph'><-</span> <span class='hs-varid'>c_pcre_exec</span> <a name="line-282"></a> <span class='hs-varid'>pcre_ptr</span> <a name="line-283"></a> <span class='hs-varid'>nullPtr</span> <a name="line-284"></a> <span class='hs-layout'>(</span><span class='hs-varid'>cstr</span> <span class='hs-varop'>`plusPtr`</span> <span class='hs-varid'>off</span><span class='hs-layout'>)</span> <span class='hs-comment'>-- may contain binary zero bytes.</span> <a name="line-285"></a> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>len</span><span class='hs-layout'>)</span> <a name="line-286"></a> <span class='hs-num'>0</span> <a name="line-287"></a> <span class='hs-layout'>(</span><span class='hs-varid'>combineExecOptions</span> <span class='hs-varid'>os</span><span class='hs-layout'>)</span> <a name="line-288"></a> <span class='hs-varid'>ovec</span> <a name="line-289"></a> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>ovec_size</span><span class='hs-layout'>)</span> <a name="line-290"></a> <a name="line-291"></a> <span class='hs-keyword'>if</span> <span class='hs-varid'>r</span> <span class='hs-varop'><</span> <span class='hs-num'>0</span> <span class='hs-comment'>-- errors, or error_no_match</span> <a name="line-292"></a> <span class='hs-keyword'>then</span> <span class='hs-varid'>return</span> <span class='hs-conid'>Nothing</span> <a name="line-293"></a> <span class='hs-keyword'>else</span> <span class='hs-keyword'>let</span> <span class='hs-varid'>loop</span> <span class='hs-varid'>n</span> <span class='hs-varid'>o</span> <span class='hs-varid'>acc</span> <span class='hs-keyglyph'>=</span> <a name="line-294"></a> <span class='hs-keyword'>if</span> <span class='hs-varid'>n</span> <span class='hs-varop'>==</span> <span class='hs-varid'>r</span> <a name="line-295"></a> <span class='hs-keyword'>then</span> <span class='hs-varid'>return</span> <span class='hs-layout'>(</span><span class='hs-conid'>Just</span> <span class='hs-layout'>(</span><span class='hs-varid'>reverse</span> <span class='hs-varid'>acc</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span> <a name="line-296"></a> <span class='hs-keyword'>else</span> <span class='hs-keyword'>do</span> <a name="line-297"></a> <span class='hs-varid'>i</span> <span class='hs-keyglyph'><-</span> <span class='hs-varid'>peekElemOff</span> <span class='hs-varid'>ovec</span> <span class='hs-varop'>$!</span> <span class='hs-varid'>o</span> <a name="line-298"></a> <span class='hs-varid'>j</span> <span class='hs-keyglyph'><-</span> <span class='hs-varid'>peekElemOff</span> <span class='hs-varid'>ovec</span> <span class='hs-layout'>(</span><span class='hs-varid'>o</span><span class='hs-varop'>+</span><span class='hs-num'>1</span><span class='hs-layout'>)</span> <a name="line-299"></a> <span class='hs-keyword'>let</span> <span class='hs-varid'>s</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>substring</span> <span class='hs-varid'>i</span> <span class='hs-varid'>j</span> <span class='hs-varid'>subject</span> <a name="line-300"></a> <span class='hs-varid'>s</span> <span class='hs-varop'>`seq`</span> <span class='hs-varid'>loop</span> <span class='hs-layout'>(</span><span class='hs-varid'>n</span><span class='hs-varop'>+</span><span class='hs-num'>1</span><span class='hs-layout'>)</span> <span class='hs-layout'>(</span><span class='hs-varid'>o</span><span class='hs-varop'>+</span><span class='hs-num'>2</span><span class='hs-layout'>)</span> <span class='hs-layout'>(</span><span class='hs-varid'>s</span> <span class='hs-conop'>:</span> <span class='hs-varid'>acc</span><span class='hs-layout'>)</span> <a name="line-301"></a> <span class='hs-keyword'>in</span> <span class='hs-varid'>loop</span> <span class='hs-num'>0</span> <span class='hs-num'>0</span> <span class='hs-conid'>[]</span> <a name="line-302"></a> <a name="line-303"></a> <span class='hs-comment'>-- The first two-thirds of ovec is used to pass back captured</span> <a name="line-304"></a> <span class='hs-comment'>-- substrings When a match is successful, information about captured</span> <a name="line-305"></a> <span class='hs-comment'>-- substrings is returned in pairs of integers, starting at the</span> <a name="line-306"></a> <span class='hs-comment'>-- beginning of ovector, and continuing up to two-thirds of its length at</span> <a name="line-307"></a> <span class='hs-comment'>-- the most. The first pair, ovector[0] and ovector[1], identify the</span> <a name="line-308"></a> <span class='hs-comment'>-- portion of the subject string matched by the entire pattern. The next</span> <a name="line-309"></a> <span class='hs-comment'>-- pair is used for the first capturing subpattern, and so on. The</span> <a name="line-310"></a> <span class='hs-comment'>-- value returned by pcre_exec() is one more than the highest num- bered</span> <a name="line-311"></a> <span class='hs-comment'>-- pair that has been set. For example, if two sub- strings have been</span> <a name="line-312"></a> <span class='hs-comment'>-- captured, the returned value is 3. </span> <a name="line-313"></a> <a name="line-314"></a> <span class='hs-keyword'>where</span> <a name="line-315"></a> <span class='hs-comment'>-- The first element of a pair is set to the offset of the first</span> <a name="line-316"></a> <span class='hs-comment'>-- character in a substring, and the second is set to the offset of the</span> <a name="line-317"></a> <span class='hs-comment'>-- first character after the end of a substring.</span> <a name="line-318"></a> <span class='hs-varid'>substring</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>CInt</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>CInt</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-conid'>ByteString</span> <a name="line-319"></a> <span class='hs-varid'>substring</span> <span class='hs-varid'>x</span> <span class='hs-varid'>y</span> <span class='hs-keyword'>_</span> <span class='hs-keyglyph'>|</span> <span class='hs-varid'>x</span> <span class='hs-varop'>==</span> <span class='hs-varid'>y</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>empty</span> <span class='hs-comment'>-- XXX an unset subpattern</span> <a name="line-320"></a> <span class='hs-varid'>substring</span> <span class='hs-varid'>a</span> <span class='hs-varid'>b</span> <span class='hs-varid'>s</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>end</span> <span class='hs-comment'>-- note that we're not checking...</span> <a name="line-321"></a> <span class='hs-keyword'>where</span> <a name="line-322"></a> <span class='hs-varid'>start</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>unsafeDrop</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-varid'>a</span><span class='hs-layout'>)</span> <span class='hs-varid'>s</span> <a name="line-323"></a> <span class='hs-varid'>end</span> <span class='hs-keyglyph'>=</span> <span class='hs-conid'>S</span><span class='hs-varop'>.</span><span class='hs-varid'>unsafeTake</span> <span class='hs-layout'>(</span><span class='hs-varid'>fromIntegral</span> <span class='hs-layout'>(</span><span class='hs-varid'>b</span><span class='hs-comment'>-</span><span class='hs-varid'>a</span><span class='hs-layout'>)</span><span class='hs-layout'>)</span> <span class='hs-varid'>start</span> <a name="line-324"></a> <a name="line-325"></a> <a name="line-326"></a><a name="captureCount"></a><span class='hs-definition'>captureCount</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Regex</span> <span class='hs-keyglyph'>-></span> <span class='hs-conid'>Int</span> <a name="line-327"></a><span class='hs-definition'>captureCount</span> <span class='hs-layout'>(</span><span class='hs-conid'>Regex</span> <span class='hs-varid'>pcre_fp</span> <span class='hs-keyword'>_</span><span class='hs-layout'>)</span> <span class='hs-keyglyph'>=</span> <span class='hs-varid'>unsafePerformIO</span> <span class='hs-varop'>$</span> <span class='hs-keyword'>do</span> <a name="line-328"></a> <span class='hs-varid'>withForeignPtr</span> <span class='hs-varid'>pcre_fp</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>pcre_ptr</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyword'>do</span> <a name="line-329"></a> <span class='hs-varid'>captureCount'</span> <span class='hs-varid'>pcre_ptr</span> <a name="line-330"></a> <a name="line-331"></a><a name="captureCount'"></a><span class='hs-definition'>captureCount'</span> <span class='hs-varid'>pcre_fp</span> <span class='hs-keyglyph'>=</span> <a name="line-332"></a> <span class='hs-varid'>alloca</span> <span class='hs-varop'>$</span> <span class='hs-keyglyph'>\</span><span class='hs-varid'>n_ptr</span> <span class='hs-keyglyph'>-></span> <span class='hs-keyword'>do</span> <span class='hs-comment'>-- (st :: Ptr CInt)</span> <a name="line-333"></a> <span class='hs-varid'>c_pcre_fullinfo</span> <span class='hs-varid'>pcre_fp</span> <span class='hs-varid'>nullPtr</span> <span class='hs-varid'>info_capturecount</span> <span class='hs-varid'>n_ptr</span> <a name="line-334"></a> <span class='hs-varid'>return</span> <span class='hs-varop'>.</span> <span class='hs-varid'>fromIntegral</span> <span class='hs-varop'>=<<</span> <span class='hs-varid'>peek</span> <span class='hs-layout'>(</span><span class='hs-varid'>n_ptr</span> <span class='hs-keyglyph'>::</span> <span class='hs-conid'>Ptr</span> <span class='hs-conid'>CInt</span><span class='hs-layout'>)</span> </pre></body> </html>