(* $Id: test_canonxml.ml 662 2004-05-25 20:57:28Z gerd $ * ---------------------------------------------------------------------- * *) open Pxp_document;; open Pxp_yacc;; open Pxp_types;; open Pxp_lexer_types;; let error_happened = ref false;; let rec prerr_error e = prerr_endline (string_of_exn e) ;; class warner = object method warn w = prerr_endline ("WARNING: " ^ w) end ;; let outbuf = String.create 8192;; let output_utf8 config s = match config.encoding with `Enc_utf8 -> print_string s | `Enc_iso88591 -> for i = 0 to String.length s - 1 do let c = Char.code(s.[i]) in if c <= 127 then print_char(Char.chr(c)) else begin print_char(Char.chr(0xc0 lor (c lsr 6))); print_char(Char.chr(0x80 lor (c land 0x3f))); end done | _ -> assert false ;; let re = Str.regexp "[&<>\"\009\010\013]";; let escaped s = Str.global_substitute re (fun _ -> match Str.matched_string s with "&" -> "&" | "<" -> "<" | ">" -> ">" | "\"" -> """ | "\009" -> "	" | "\010" -> " " | "\013" -> " " | _ -> assert false ) s ;; let rec output_xml config n = match n # node_type with T_super_root -> n # iter_nodes (output_xml config) | T_pinstr pi_name -> let [ pi ] = n # pinstr pi_name in output_utf8 config "<?"; output_utf8 config (pi # target); output_utf8 config " "; output_utf8 config (pi # value); output_utf8 config "?>"; | T_element name -> output_utf8 config "<"; output_utf8 config name; let sorted_attnames = Sort.list ( <= ) (n # attribute_names) in List.iter (fun attname -> match n # attribute attname with Value v -> output_utf8 config " "; output_utf8 config attname; output_utf8 config "=\""; output_utf8 config (escaped v); output_utf8 config "\""; | Valuelist vl -> let v = String.concat " " vl in output_utf8 config " "; output_utf8 config attname; output_utf8 config "=\""; output_utf8 config (escaped v); output_utf8 config "\""; | Implied_value -> () ) sorted_attnames; output_utf8 config ">"; n # iter_nodes (output_xml config); output_utf8 config "</"; output_utf8 config name; output_utf8 config ">"; | T_data -> let v = n # data in output_utf8 config (escaped v) | T_comment -> let v = match n # comment with None -> assert false | Some x -> x in output_utf8 config ("<!--" ^ v ^ "-->") | _ -> assert false ;; let parse debug wf iso88591 comments eb_atts filename = let spec = let e = new element_impl default_extension in make_spec_from_mapping ~super_root_exemplar: (new super_root_impl default_extension) ~default_pinstr_exemplar: (new pinstr_impl default_extension) ~comment_exemplar: (new comment_impl default_extension) ~data_exemplar: (new data_impl default_extension) ~default_element_exemplar: e ~element_mapping: (Hashtbl.create 1) () in let escape_atts tok pos mng = match tok with Lcurly -> "{" | LLcurly -> "{{" | Rcurly -> "}" | RRcurly -> "}}" | _ -> assert false in let config = { default_config with warner = new warner; debugging_mode = debug; enable_pinstr_nodes = true; enable_super_root_node = true; enable_comment_nodes = comments; drop_ignorable_whitespace = false; encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8; idref_pass = true; escape_attributes = if eb_atts then Some escape_atts else None; } in try let parse_fn = if wf then parse_wfdocument_entity config else let index = new hash_index in parse_document_entity ~id_index:(index :> 'ext index) config in let tree = parse_fn (from_file filename) spec in output_xml config (tree # root) with e -> error_happened := true; prerr_error e ;; let main() = let debug = ref false in let wf = ref false in let iso88591 = ref false in let comments = ref false in let eb_atts = ref false in let files = ref [] in Arg.parse [ "-d", Arg.Set debug, " turn debugging mode on"; "-wf", Arg.Set wf, " check only on well-formedness"; "-iso-8859-1", Arg.Set iso88591, " use ISO-8859-1 as internal encoding instead of UTF-8"; "-comments", Arg.Set comments, " output comments, too"; "-event-based-atts", Arg.Set eb_atts, " use the event-based attribute parsing algorithm"; ] (fun x -> files := x :: !files) " usage: test_canonxml [options] file ... List of options:"; files := List.rev !files; List.iter (parse !debug !wf !iso88591 !comments !eb_atts) !files; ;; main(); if !error_happened then exit(1);;