Sophie

Sophie

distrib > Mageia > 4 > x86_64 > by-pkgid > 9a8dd268eda1b24202127d346e5c3fb8 > files > 71

ocaml-re-devel-1.2.1-3.mga4.x86_64.rpm

open Re

let re_match ?pos ?len r s res =
  expect_equal_app
    ~msg:(str_printer s)
    ~printer:arr_ofs_printer
    id res
    (fun () -> get_all_ofs (exec ?pos ?len (compile r) s)) ()
;;

let re_fail ?pos ?len r s =
  expect_equal_app
    ~msg:(str_printer s)
    ~printer:arr_ofs_printer
    not_found ()
    (fun () -> get_all_ofs (exec ?pos ?len (compile r) s)) ()
;;

(* Substring Extraction *)

let _ = 
  let r =
     seq [group (char 'a'); 
          opt   (group (char 'a')); 
          group (char 'b')]
  in
  let m = exec (compile r) "ab" in
  
  expect_pass "get" (fun () ->
    expect_eq_str id        "ab" (get m) 0;
    expect_eq_str id        "a"  (get m) 1;
    expect_eq_str not_found ()   (get m) 2;
    expect_eq_str id        "b"  (get m) 3;
    expect_eq_str not_found ()   (get m) 4;
  );

  expect_pass "get_ofs" (fun () ->
    expect_eq_ofs id        (0,2) (get_ofs m) 0;
    expect_eq_ofs id        (0,1) (get_ofs m) 1;
    expect_eq_ofs not_found ()    (get_ofs m) 2;
    expect_eq_ofs id        (1,2) (get_ofs m) 3;
    expect_eq_ofs not_found ()    (get_ofs m) 4;
  );

  expect_pass "get_all" (fun () ->
    expect_eq_arr_str
      id [|"ab";"a";"";"b"|]
      get_all m
  );

  expect_pass "get_all_ofs" (fun () ->
    expect_eq_arr_ofs
      id [|(0,2);(0,1);(-1,-1);(1,2)|]
      get_all_ofs m
  );

  expect_pass "test" (fun () ->
    expect_eq_bool id true (test m) 0;
    expect_eq_bool id true (test m) 1;
    expect_eq_bool id false (test m) 2;
    expect_eq_bool id true (test m) 3;
    expect_eq_bool id false (test m) 4;
  );
;;

(* Literal Match *)

expect_pass "str" (fun () ->
  re_match  (str "a")                   "a"     [|(0,1)|];
  re_fail   (str "a")                   "b";
);

expect_pass "char" (fun () ->
  re_match  (char 'a')                  "a"     [|(0,1)|];
  re_fail   (char 'a')                  "b";
);

(* Basic Operations *)

expect_pass "alt" (fun () ->
  re_match  (alt [char 'a'; char 'b'])  "a"     [|(0,1)|];
  re_match  (alt [char 'a'; char 'b'])  "b"     [|(0,1)|];
  re_fail   (alt [char 'a'; char 'b'])  "c";
);

expect_pass "seq" (fun () ->
  re_match  (seq [char 'a'; char 'b'])  "ab"    [|(0,2)|];
  re_fail   (seq [char 'a'; char 'b'])  "ac";
);

expect_pass "empty" (fun () ->
  re_fail   (empty)                     "";
  re_fail   (empty)                     "a";
);

expect_pass "epsilon" (fun () ->
  re_match  (epsilon)                   ""      [|(0,0)|];
  re_match  (epsilon)                   "a"     [|(0,0)|];
);

expect_pass "rep" (fun () ->
  re_match  (rep (char 'a'))            ""      [|(0,0)|];
  re_match  (rep (char 'a'))            "a"     [|(0,1)|];
  re_match  (rep (char 'a'))            "aa"    [|(0,2)|];
  re_match  (rep (char 'a'))            "b"     [|(0,0)|];
);

expect_pass "rep1" (fun () ->
  re_match  (rep1 (char 'a'))           "a"     [|(0,1)|];
  re_match  (rep1 (char 'a'))           "aa"    [|(0,2)|];
  re_fail   (rep1 (char 'a'))           "";
  re_fail   (rep1 (char 'a'))           "b";
);

expect_pass "repn" (fun () ->
  re_match  (repn (char 'a') 0 None)            ""      [|(0,0)|];
  re_match  (repn (char 'a') 0 (Some 0))        ""      [|(0,0)|];
  re_match  (repn (char 'a') 1 (Some 2))        "a"     [|(0,1)|];
  re_match  (repn (char 'a') 1 (Some 2))        "aa"    [|(0,2)|];
  re_fail   (repn (char 'a') 1 (Some 2))        "";
  re_match  (repn (char 'a') 1 (Some 2))        "aaa"   [|(0,2)|];

  expect_equal_app
    invalid_arg "Re.repn"
    (fun () -> repn empty (-1) None)  ();

  expect_equal_app
    invalid_arg "Re.repn"
    (fun () -> repn empty 1 (Some 0)) ();
);

expect_pass "opt" (fun () ->
  re_match  (opt (char 'a'))            ""      [|(0,0)|];
  re_match  (opt (char 'a'))            "a"     [|(0,1)|];
);

(* String, line, word *)

expect_pass "bol" (fun () ->
  re_match  (seq [bol; char 'a'])       "ab"    [|(0,1)|];
  re_match  (seq [bol; char 'a'])       "b\na"  [|(2,3)|];
  re_fail   (seq [bol; char 'a'])       "ba";
);

expect_pass "eol" (fun () ->
  re_match  (seq [char 'a'; eol])       "ba"    [|(1,2)|];
  re_match  (seq [char 'a'; eol])       "a\nb"  [|(0,1)|];
  re_match  (seq [char 'a'; eol])       "ba\n"  [|(1,2)|];
  re_fail   (seq [char 'a'; eol])       "ab";
);

expect_pass "bow" (fun () ->
  re_match  (seq [bow; char 'a'])       "a"     [|(0,1)|];
  re_match  (seq [bow; char 'a'])       "bb aa" [|(3,4)|];
  re_fail   (seq [bow; char 'a'])       "ba ba";
);

expect_pass "eow" (fun () ->
  re_match  (seq [char 'a'; eow])       "a"     [|(0,1)|];
  re_match  (seq [char 'a'; eow])       "bb aa" [|(4,5)|];
  re_fail   (seq [char 'a'; eow])       "ab ab";
);

expect_pass "bos" (fun () ->
  re_match  (seq [bos; char 'a'])       "ab"    [|(0,1)|];
  re_fail   (seq [bos; char 'a'])       "b\na";
  re_fail   (seq [bos; char 'a'])       "ba";
);

expect_pass "eos" (fun () ->
  re_match  (seq [char 'a'; eos])       "ba"    [|(1,2)|];
  re_fail   (seq [char 'a'; eos])       "a\nb";
  re_fail   (seq [char 'a'; eos])       "ba\n";
  re_fail   (seq [char 'a'; eos])       "ab";
);

expect_pass "leol" (fun () ->
  re_match  (seq [char 'a'; leol])      "ba"    [|(1,2)|];
  re_fail   (seq [char 'a'; leol])      "a\nb";
  re_match  (seq [char 'a'; leol])      "ba\n"  [|(1,2)|];
  re_fail   (seq [char 'a'; leol])      "ab";
  re_match  (alt [str "b\n"; seq [char 'a'; leol]])  "ab\n"    [|(1,3)|];
);

expect_pass "start" (fun () ->
  re_match ~pos:1 (seq [start; char 'a'])      "xab"    [|(1,2)|];
  re_fail  ~pos:1 (seq [start; char 'a'])      "xb\na";
  re_fail  ~pos:1 (seq [start; char 'a'])      "xba";
);

expect_pass "stop" (fun () ->
  re_match ~len:2 (seq [char 'a'; stop])       "bax"    [|(1,2)|];
  re_fail  ~len:3 (seq [char 'a'; stop])       "a\nbx";
  re_fail  ~len:3 (seq [char 'a'; stop])       "ba\nx";
  re_fail  ~len:2 (seq [char 'a'; stop])       "abx";
);

expect_pass "word" (fun () ->
  re_match  (word (str "aa"))           "aa"    [|(0,2)|];
  re_match  (word (str "aa"))           "bb aa" [|(3,5)|];
  re_fail   (word (str "aa"))           "aaa";
);

expect_pass "not_boundary" (fun () ->
  re_match (seq [not_boundary; char 'b'; not_boundary])  "abc"  [|(1,2)|];
  re_fail  (seq [not_boundary; char 'a'])  "abc";
  re_fail  (seq [char 'c'; not_boundary])  "abc";
);

(* Match semantics *)

expect_pass "default match semantics" (fun () ->
  re_match
    (seq [(rep (alt [char 'a'; char 'b'])); char 'b'])
    "aabaab"
    [|(0,6)|];
  re_match
    (alt [str "aa"; str "aaa"])
    "aaaa"
    [|(0, 2)|];
  re_match
    (alt [str "aaa"; str "aa"])
    "aaaa"
    [|(0, 3)|];
);

expect_pass "shortest match" (fun () ->
  re_match
    (shortest (seq [(rep (alt [char 'a'; char 'b'])); char 'b']))
    "aabaab"
    [|(0,3)|];
  re_match
    (shortest (alt [str "aa"; str "aaa"]))
    "aaaa"
    [|(0, 2)|];
  re_match
    (shortest (alt [str "aaa"; str "aa"]))
    "aaaa"
    [|(0, 2)|];
);

expect_pass "longest match" (fun () ->
  re_match
    (longest (seq [(rep (alt [char 'a'; char 'b'])); char 'b']))
    "aabaab"
    [|(0,6)|];
  re_match
    (longest (alt [str "aa"; str "aaa"]))
    "aaaa"
    [|(0, 3)|];
  re_match
    (longest (alt [str "aaa"; str "aa"]))
    "aaaa"
    [|(0, 3)|];
);

expect_pass "first match" (fun () ->
  re_match
    (first (seq [(rep (alt [char 'a'; char 'b'])); char 'b']))
    "aabaab"
    [|(0,6)|];
  re_match
    (first (alt [str "aa"; str "aaa"]))
    "aaaa"
    [|(0, 2)|];
  re_match
    (first (alt [str "aaa"; str "aa"]))
    "aaaa"
    [|(0, 3)|];
);

expect_pass "greedy" (fun () ->
  re_match
    (greedy (seq [(rep (alt [char 'a'; char 'b'])); char 'b']))
    "aabaab"
    [|(0,6)|];
  re_match
    (greedy (rep (group (opt (char 'a')))))
    "aa"
    [|(0,2); (2,2)|];
);

expect_pass "non_greedy" (fun () ->
  re_match
    (non_greedy (longest (seq [(rep (alt [char 'a'; char 'b'])); char 'b'])))
    "aabaab"
    [|(0,6)|];
  re_match
    (non_greedy (first (seq [(rep (alt [char 'a'; char 'b'])); char 'b'])))
    "aabaab"
    [|(0,3)|];
  re_match
    (non_greedy (longest (rep (group (opt (char 'a'))))))
    "aa"
    [|(0,2); (1,2)|];
);

expect_pass "match semantics" (fun () ->
  let r = rep (group (alt [str "aaa"; str "aa"])) in
  re_match (longest r)            "aaaaaaa" [|(0,7); (5, 7)|];
  re_match (first r)              "aaaaaaa" [|(0,6); (3, 6)|];
  re_match (first (non_greedy r)) "aaaaaaa" [|(0,0); (-1, -1)|];
  re_match (shortest r)           "aaaaaaa" [|(0,0); (-1, -1)|];
  let r' = rep (group (shortest (alt [str "aaa"; str "aa"]))) in
  re_match (longest r')           "aaaaaaa" [|(0,7); (4, 7)|];
  re_match (first r')             "aaaaaaa" [|(0,6); (4, 6)|];
);

(* Group (or submatch) *)

expect_pass "group" (fun () ->
  let r =
     seq [group (char 'a'); 
          opt   (group (char 'a')); 
          group (char 'b')]
  in
  expect_eq_arr_ofs
    id [|(0,2);(0,1);(-1,-1);(1,2)|]
    (fun () -> get_all_ofs (exec (compile r) "ab")) ()
);

expect_pass "no_group" (fun () ->
  let r =
     no_group (
       seq [group (char 'a'); 
            opt   (group (char 'a')); 
            group (char 'b')]
     )
  in
  expect_eq_arr_ofs
    id [|(0,2)|]
    (fun () -> get_all_ofs (exec (compile r) "ab")) ()
);

expect_pass "nest" (fun () ->
  let r =
    rep (nest (alt [group (char 'a'); char 'b']))
  in
  re_match r "ab" [|(0,2); (-1, -1)|];
  re_match r "ba" [|(0,2); (1, 2)|];
);

(* Character set *)

expect_pass "set" (fun () ->
  re_match (rep1 (set "abcd")) "bcbadbabcdba" [|(0,12)|];
  re_fail  (set "abcd") "e";
);

expect_pass "rg" (fun () ->
  re_match (rep1 (rg '0' '9')) "0123456789" [|(0,10)|];
  re_fail  (rep1 (rg '0' '9')) "a";
);

expect_pass "inter" (fun () ->
  re_match (rep1 (inter [rg '0' '9'; rg '4' '6']))  "456"   [|(0,3)|];
  re_fail  (rep1 (inter [rg '0' '9'; rg '4' '6']))  "7";
  re_match (inter [alt [char 'a'; char 'b']; char 'b']) "b" [|(0,1)|];
);

expect_pass "diff" (fun () ->
  re_match (rep1 (diff (rg '0' '9') (rg '4' '6')))  "0123789"   [|(0,7)|];
  re_fail  (rep1 (diff (rg '0' '9') (rg '4' '6')))  "4";
);

expect_pass "compl" (fun () ->
  re_match (rep1 (compl [rg '0' '9'; rg 'a' 'z'])) "A:Z+" [|(0,4)|];
  re_fail  (rep1 (compl [rg '0' '9'; rg 'a' 'z'])) "0";
  re_fail  (rep1 (compl [rg '0' '9'; rg 'a' 'z'])) "a";
);

(* Predefined character sets - should these be tested exhaustively? *)

(* Case modifiers *)

expect_pass "case" (fun () ->
  re_match (case (str "abc"))           "abc" [|(0,3)|];
  re_match (no_case (case (str "abc"))) "abc" [|(0,3)|];
  re_fail  (case (str "abc"))           "ABC";
  re_fail  (no_case (case (str "abc"))) "ABC";
);

expect_pass "no_case" (fun () ->
  re_match (no_case (str "abc"))        "abc" [|(0,3)|];
  re_match (no_case (str "abc"))        "ABC" [|(0,3)|];
  re_match (case (no_case (str "abc"))) "abc" [|(0,3)|];
  re_match (case (no_case (str "abc"))) "ABC" [|(0,3)|];
);

(* Fixed bugs *)

expect_pass "bugs" (fun () ->
  try
    ignore (Re.compile (Re_perl.re "(.*?)(\\WPl|\\Bpl)(.*)"))
  with _ ->
    fail "bug in Re.handle_case"
);