Sophie

Sophie

distrib > Fedora > 18 > x86_64 > by-pkgid > 5f2aa7a9df0831bf9db9ab38e75779bb > files > 16

erlang-neotoma-1.6.0-1.fc18.x86_64.rpm

-module(csv).
-export([parse/1,file/1]).
-compile(nowarn_unused_vars).
-compile({nowarn_unused_function,[p/4, p/5, p_eof/0, p_optional/1, p_not/1, p_assert/1, p_seq/1, p_and/1, p_choose/1, p_zero_or_more/1, p_one_or_more/1, p_label/2, p_string/1, p_anything/0, p_charclass/1, line/1, column/1]}).



file(Filename) -> {ok, Bin} = file:read_file(Filename), parse(Bin).

parse(List) when is_list(List) -> parse(list_to_binary(List));
parse(Input) when is_binary(Input) ->
    setup_memo(),
    Result = case 'rows'(Input,{{line,1},{column,1}}) of
                 {AST, <<>>, _Index} -> AST;
                 Any -> Any
             end,
    release_memo(), Result.

'rows'(Input, Index) ->
    p(Input, Index, 'rows', fun(I,D) -> (p_choose([p_seq([p_label('head', fun 'row'/2), p_label('tail', p_zero_or_more(p_seq([fun 'crlf'/2, fun 'row'/2])))]), p_string(<<>>)]))(I,D) end, fun(Node, Idx) ->
                                                                                                                                                                                                   case Node of
                                                                                                                                                                                                       [] -> [];
                                                                                                                                                                                                       [""] -> [];
                                                                                                                                                                                                       _ ->
                                                                                                                                                                                                           Head = proplists:get_value(head, Node),
                                                                                                                                                                                                           Tail = [R || [_,R] <- proplists:get_value(tail, Node)],
                                                                                                                                                                                                           [Head|Tail]
                                                                                                                                                                                                   end
                                                                                                                                                                                           end).

'row'(Input, Index) ->
    p(Input, Index, 'row', fun(I,D) -> (p_choose([p_seq([p_label('head', fun 'field'/2), p_label('tail', p_zero_or_more(p_seq([fun 'field_sep'/2, fun 'field'/2])))]), p_string(<<>>)]))(I,D) end, fun(Node, Idx) ->
                                                                                                                                                                                                           case Node of
                                                                                                                                                                                                               [] -> [];
                                                                                                                                                                                                               [""] -> [];
                                                                                                                                                                                                               _ ->
                                                                                                                                                                                                                   Head = proplists:get_value(head, Node),
                                                                                                                                                                                                                   Tail = [F || [_,F] <- proplists:get_value(tail, Node)],
                                                                                                                                                                                                                   [Head|Tail]
                                                                                                                                                                                                           end
                                                                                                                                                                                                   end).

'field'(Input, Index) ->
    p(Input, Index, 'field', fun(I,D) -> (p_choose([fun 'quoted_field'/2, p_zero_or_more(p_seq([p_not(fun 'field_sep'/2), p_not(fun 'crlf'/2), p_anything()]))]))(I,D) end, fun(Node, Idx) -> iolist_to_binary(Node) end).

'quoted_field'(Input, Index) ->
    p(Input, Index, 'quoted_field', fun(I,D) -> (p_seq([p_string(<<"\"">>), p_label('string', p_zero_or_more(p_choose([p_string(<<"\"\"">>), p_seq([p_not(p_string(<<"\"">>)), p_anything()])]))), p_string(<<"\"">>)]))(I,D) end, fun(Node, Idx) ->
                                                                                                                                                                                                                                           String = proplists:get_value(string, Node),
                                                                                                                                                                                                                                           re:replace(String, "[\"]{2}", "\"",[global, {return, binary}])
                                                                                                                                                                                                                                   end).

'field_sep'(Input, Index) ->
    p(Input, Index, 'field_sep', fun(I,D) -> (p_string(<<",">>))(I,D) end, fun(Node, Idx) -> Node end).

'crlf'(Input, Index) ->
    p(Input, Index, 'crlf', fun(I,D) -> (p_seq([p_optional(p_charclass(<<"[\r]">>)), p_charclass(<<"[\n]">>)]))(I,D) end, fun(Node, Idx) -> Node end).








p(Inp, Index, Name, ParseFun) ->
    p(Inp, Index, Name, ParseFun, fun(N, _Idx) -> N end).

p(Inp, StartIndex, Name, ParseFun, TransformFun) ->
    % Grab the memo table from ets
    Memo = get_memo(StartIndex),
    % See if the current reduction is memoized
    case proplists:lookup(Name, Memo) of
        % If it is, return the result
        {Name, Result} -> Result;
        % If not, attempt to parse
        _ ->
            case ParseFun(Inp, StartIndex) of
                % If it fails, memoize the failure
                {fail,_} = Failure ->
                    memoize(StartIndex, [{Name, Failure}|Memo]),
                    Failure;
                % If it passes, transform and memoize the result.
                {Result, InpRem, NewIndex} ->
                    Transformed = TransformFun(Result, StartIndex),
                    memoize(StartIndex, [{Name, {Transformed, InpRem, NewIndex}}|Memo]),
                    {Transformed, InpRem, NewIndex}
            end
    end.

setup_memo() ->
    put(parse_memo_table, ets:new(?MODULE, [set])).

release_memo() ->
    ets:delete(memo_table_name()).

memoize(Position, Struct) ->
    ets:insert(memo_table_name(), {Position, Struct}).

get_memo(Position) ->
    case ets:lookup(memo_table_name(), Position) of
        [] -> [];
        [{Position, PList}] -> PList
    end.

memo_table_name() ->
    get(parse_memo_table).

p_eof() ->
    fun(<<>>, Index) -> {eof, [], Index};
       (_, Index) -> {fail, {expected, eof, Index}} end.

p_optional(P) ->
    fun(Input, Index) ->
            case P(Input, Index) of
                {fail,_} -> {[], Input, Index};
                {_, _, _} = Success -> Success
            end
    end.

p_not(P) ->
    fun(Input, Index)->
            case P(Input,Index) of
                {fail,_} ->
                    {[], Input, Index};
                {Result, _, _} -> {fail, {expected, {no_match, Result},Index}}
            end
    end.

p_assert(P) ->
    fun(Input,Index) ->
            case P(Input,Index) of
                {fail,_} = Failure-> Failure;
                _ -> {[], Input, Index}
            end
    end.

p_and(P) ->
    p_seq(P).

p_seq(P) ->
    fun(Input, Index) ->
            p_all(P, Input, Index, [])
    end.

p_all([], Inp, Index, Accum ) -> {lists:reverse( Accum ), Inp, Index};
p_all([P|Parsers], Inp, Index, Accum) ->
    case P(Inp, Index) of
        {fail, _} = Failure -> Failure;
        {Result, InpRem, NewIndex} -> p_all(Parsers, InpRem, NewIndex, [Result|Accum])
    end.

p_choose(Parsers) ->
    fun(Input, Index) ->
            p_attempt(Parsers, Input, Index, none)
    end.

p_attempt([], _Input, _Index, Failure) -> Failure;
p_attempt([P|Parsers], Input, Index, FirstFailure)->
    case P(Input, Index) of
        {fail, _} = Failure ->
            case FirstFailure of
                none -> p_attempt(Parsers, Input, Index, Failure);
                _ -> p_attempt(Parsers, Input, Index, FirstFailure)
            end;
        Result -> Result
    end.

p_zero_or_more(P) ->
    fun(Input, Index) ->
            p_scan(P, Input, Index, [])
    end.

p_one_or_more(P) ->
    fun(Input, Index)->
            Result = p_scan(P, Input, Index, []),
            case Result of
                {[_|_], _, _} ->
                    Result;
                _ ->
                    {fail, {expected, Failure, _}} = P(Input,Index),
                    {fail, {expected, {at_least_one, Failure}, Index}}
            end
    end.

p_label(Tag, P) ->
    fun(Input, Index) ->
            case P(Input, Index) of
                {fail,_} = Failure ->
                    Failure;
                {Result, InpRem, NewIndex} ->
                    {{Tag, Result}, InpRem, NewIndex}
            end
    end.

p_scan(_, [], Index, Accum) -> {lists:reverse( Accum ), [], Index};
p_scan(P, Inp, Index, Accum) ->
    case P(Inp, Index) of
        {fail,_} -> {lists:reverse(Accum), Inp, Index};
        {Result, InpRem, NewIndex} -> p_scan(P, InpRem, NewIndex, [Result | Accum])
    end.

p_string(S) when is_list(S) -> p_string(list_to_binary(S));
p_string(S) ->
    Length = erlang:byte_size(S),
    fun(Input, Index) ->
            try
                <<S:Length/binary, Rest/binary>> = Input,
                {S, Rest, p_advance_index(S, Index)}
            catch
                error:{badmatch,_} -> {fail, {expected, {string, S}, Index}}
            end
    end.

p_anything() ->
    fun(<<>>, Index) -> {fail, {expected, any_character, Index}};
       (Input, Index) when is_binary(Input) ->
            <<C/utf8, Rest/binary>> = Input,
            {<<C/utf8>>, Rest, p_advance_index(<<C/utf8>>, Index)}
    end.

p_charclass(Class) ->
    {ok, RE} = re:compile(Class, [unicode, dotall]),
    fun(Inp, Index) ->
            case re:run(Inp, RE, [anchored]) of
                {match, [{0, Length}|_]} ->
                    {Head, Tail} = erlang:split_binary(Inp, Length),
                    {Head, Tail, p_advance_index(Head, Index)};
                _ -> {fail, {expected, {character_class, Class}, Index}}
            end
    end.

line({{line,L},_}) -> L;
line(_) -> undefined.

column({_,{column,C}}) -> C;
column(_) -> undefined.

p_advance_index(MatchedInput, Index) when is_list(MatchedInput) orelse is_binary(MatchedInput)-> % strings
    lists:foldl(fun p_advance_index/2, Index, unicode:characters_to_list(MatchedInput));
p_advance_index(MatchedInput, Index) when is_integer(MatchedInput) -> % single characters
    {{line, Line}, {column, Col}} = Index,
    case MatchedInput of
        $\n -> {{line, Line+1}, {column, 1}};
        _ -> {{line, Line}, {column, Col+1}}
    end.