From: Zeeshan Lakhani <zeeshan.lakhani@gmail.com> Date: Wed, 16 Mar 2016 16:44:31 -0400 Subject: [PATCH] - remove need for bisect lib/hyper_bisect as it requires Hipe, and we won't force that usecase for now. diff --git a/rebar-test.config b/rebar-test.config index 170844e..160b729 100644 --- a/rebar-test.config +++ b/rebar-test.config @@ -2,8 +2,6 @@ {deps, [ {basho_stats, "", {git, "https://github.com/knutin/basho_stats.git", {branch, "master"}}}, - {bisect, "", - {git, "https://github.com/knutin/bisect.git", {branch, "master"}}}, {proper, "", {git,"https://github.com/manopapad/proper.git", "master"}}, {stdlib2, "", diff --git a/rebar.config b/rebar.config index 5cf51a1..e09519a 100644 --- a/rebar.config +++ b/rebar.config @@ -1,7 +1,6 @@ {cover_enabled, true}. {deps, [ - {bisect, "", - {git, "https://github.com/knutin/bisect.git", {branch, "master"}}} + {proper, "", {git,"https://github.com/manopapad/proper.git", "master"}} ]}. diff --git a/src/hyper.erl b/src/hyper.erl index e7c80f2..a8f2fab 100644 --- a/src/hyper.erl +++ b/src/hyper.erl @@ -311,7 +311,7 @@ perf_report() -> Ps = [15], Cards = [1, 100, 500, 1000, 2500, 5000, 10000, 15000, 25000, 50000, 100000, 1000000], - Mods = [hyper_gb, hyper_array, hyper_bisect, hyper_binary, hyper_carray], + Mods = [hyper_gb, hyper_array, hyper_binary, hyper_carray], Repeats = 10, Time = fun (F, Args) -> diff --git a/src/hyper_bisect.erl b/src/hyper_bisect.erl deleted file mode 100644 index 2c31521..0000000 --- a/src/hyper_bisect.erl +++ /dev/null @@ -1,250 +0,0 @@ --module(hyper_bisect). --include_lib("eunit/include/eunit.hrl"). --behaviour(hyper_register). - --export([new/1, - set/3, - max_merge/1, - max_merge/2, - reduce_precision/2, - bytes/1, - register_sum/1, - zero_count/1, - encode_registers/1, - decode_registers/2, - compact/1]). - - --define(KEY_SIZE, 16). --define(VALUE_SIZE, 8). - -%% -%% BEHAVIOUR -%% - -new(P) -> - DenseSize = trunc(math:pow(2, P)), - EntrySize = (?KEY_SIZE + ?VALUE_SIZE) div 8, - Threshold = DenseSize div EntrySize, - {sparse, bisect:new(?KEY_SIZE div 8, ?VALUE_SIZE div 8), P, Threshold}. - - -set(Index, Value, {sparse, B, P, Threshold}) -> - V = <<Value:?VALUE_SIZE/integer>>, - UpdateF = fun (OldValue) when OldValue >= V -> OldValue; - (OldValue) when OldValue < V -> V - end, - NewB = bisect:update(B, <<Index:?KEY_SIZE/integer>>, V, UpdateF), - - case bisect:num_keys(NewB) < Threshold of - true -> - {sparse, NewB, P, Threshold}; - false -> - {dense, bisect2dense(NewB, P)} - end; - -set(Index, Value, {dense, B} = D) -> - case binary:at(B, Index) of - R when R > Value -> - D; - _ -> - <<Left:Index/binary, _:?VALUE_SIZE, Right/binary>> = B, - {dense, iolist_to_binary([Left, <<Value:?VALUE_SIZE/integer>>, Right])} - end. - - - -fold(F, Acc, {sparse, B, _, _}) -> - InterfaceF = fun (<<Index:?KEY_SIZE/integer>>, - <<Value:?VALUE_SIZE/integer>>, - A) -> - F(Index, Value, A) - end, - bisect:foldl(B, InterfaceF, Acc); -fold(F, Acc, {dense, B}) -> - do_dense_fold(F, Acc, B). - - -max_merge(Registers) -> - [First | Rest] = Registers, - lists:foldl(fun max_merge/2, First, Rest). - - - -max_merge({sparse, Small, P, T}, {sparse, Big, P, T}) -> - ToInsert = bisect:foldl(Small, - fun (Index, Value, Acc) -> - case bisect:find(Big, Index) of - not_found -> - [{Index, Value} | Acc]; - V when V >= Value -> - Acc; - V when V < Value -> - [{Index, Value} | Acc] - end - end, - []), - Merged = bisect:bulk_insert(Big, lists:sort(ToInsert)), - {sparse, Merged, P, T}; - - -max_merge({dense, Left}, {dense, Right}) -> - Merged = iolist_to_binary( - do_dense_merge(Left, Right)), - {dense, Merged}; - -max_merge({dense, Dense}, {sparse, Sparse, P, _}) -> - {dense, iolist_to_binary( - do_dense_merge(Dense, bisect2dense(Sparse, P)))}; - -max_merge({sparse, Sparse, P, _}, {dense, Dense}) -> - {dense, iolist_to_binary( - do_dense_merge(Dense, bisect2dense(Sparse, P)))}. - -reduce_precision(_NewP, _B) -> - throw(not_implemented). - -compact(B) -> - B. - -bytes({sparse, Sparse, _, _}) -> bisect:size(Sparse); -bytes({dense, Dense}) -> erlang:byte_size(Dense). - - -register_sum(B) -> - M = case B of - {dense, Bytes} -> erlang:byte_size(Bytes); - {sparse, _, P, _} -> trunc(math:pow(2, P)) - end, - - {MaxI, Sum} = fold(fun (Index, Value, {I, Acc}) -> - Zeros = Index - I - 1, - {Index, Acc + math:pow(2, -Value) - + (math:pow(2, -0) * Zeros)} - end, {-1, 0}, B), - Sum + (M - 1 - MaxI) * math:pow(2, -0). - -zero_count({dense, _} = B) -> - fold(fun (_, 0, Acc) -> Acc + 1; - (_, _, Acc) -> Acc - end, 0, B); -zero_count({sparse, B, P, _}) -> - M = trunc(math:pow(2, P)), - M - bisect:num_keys(B). - -encode_registers({dense, B}) -> - B; -encode_registers({sparse, B, P, _}) -> - M = trunc(math:pow(2, P)), - iolist_to_binary( - do_encode_registers(M-1, B, [])). - - - -decode_registers(AllBytes, P) -> - M = DenseSize = trunc(math:pow(2, P)), - EntrySize = (?KEY_SIZE + ?VALUE_SIZE) div 8, - Threshold = DenseSize div EntrySize, - - Bytes = case AllBytes of - <<B:M/binary>> -> B; - <<B:M/binary, 0>> -> B - end, - - L = do_decode_registers(Bytes, 0), - case length(L) < Threshold of - true -> - New = bisect:new(?KEY_SIZE div 8, ?VALUE_SIZE div 8), - {sparse, bisect:from_orddict(New, L), P, Threshold}; - false -> - {dense, Bytes} - end. - - -%% -%% INTERNAL -%% - -do_dense_merge(<<>>, <<>>) -> - []; -do_dense_merge(<<Left:?VALUE_SIZE/integer, LeftRest/binary>>, - <<Right:?VALUE_SIZE/integer, RightRest/binary>>) -> - [max(Left, Right) | do_dense_merge(LeftRest, RightRest)]. - - - -do_dense_fold(F, Acc, B) -> - do_dense_fold(F, Acc, B, 0). - -do_dense_fold(_, Acc, <<>>, _) -> - Acc; -do_dense_fold(F, Acc, <<Value, Rest/binary>>, Index) -> - do_dense_fold(F, F(Index, Value, Acc), Rest, Index+1). - - - -bisect2dense(B, P) -> - M = trunc(math:pow(2, P)), - {LastI, L} = lists:foldl(fun ({<<Index:?KEY_SIZE/integer>>, V}, {I, Acc}) -> - Index < M orelse throw(index_too_high), - - Fill = case Index - I of - 0 -> - []; - ToFill -> - binary:copy(<<0>>, ToFill - 1) - end, - - {Index, [V, Fill | Acc]} - - end, {-1, []}, bisect:to_orddict(B)), - %% Fill last - Filled = [binary:copy(<<0>>, M - LastI - 1) | L], - - iolist_to_binary(lists:reverse(Filled)). - -do_encode_registers(I, _B, ByteEncoded) when I < 0 -> - ByteEncoded; -do_encode_registers(I, B, ByteEncoded) when I >= 0 -> - Byte = case bisect:find(B, <<I:?KEY_SIZE/integer>>) of - not_found -> - <<0>>; - V -> - V % already encoded - end, - do_encode_registers(I - 1, B, [Byte | ByteEncoded]). - - -do_decode_registers(<<>>, _) -> - []; -do_decode_registers(<<0, Rest/binary>>, I) -> - do_decode_registers(Rest, I+1); -do_decode_registers(<<Value:?VALUE_SIZE, Rest/binary>>, I) -> - [{<<I:?KEY_SIZE/integer>>, <<Value:?VALUE_SIZE/integer>>} - | do_decode_registers(Rest, I+1)]. - - - -%% -%% TESTS -%% - -bisect2dense_test() -> - P = 4, - M = 16, % pow(2, P) - - {sparse, B, _, _} = - lists:foldl(fun ({I, V}, Acc) -> - set(I, V, Acc) - end, new(P), [{2, 1}, {1, 1}, {4, 4}, {15, 5}]), - - Dense = bisect2dense(B, P), - ?assertEqual(M, size(Dense)), - - - Expected = <<0, 1, 1, 0, - 4, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 5>>, - ?assertEqual(M, size(Expected)), - ?assertEqual(Expected, Dense). diff --git a/test/hyper_test.erl b/test/hyper_test.erl index 18d2a87..08405d6 100644 --- a/test/hyper_test.erl +++ b/test/hyper_test.erl @@ -28,13 +28,11 @@ hyper_test_() -> ?_test(many_union_t()), ?_test(union_t()), ?_test(union_mixed_precision_t()), - ?_test(small_big_union_t()), ?_test(intersect_card_t()), ?_test(bad_serialization_t()), {"Union property with hyper_carry", RunProp(prop_union(hyper_carray))}, {"Union property with hyper_binary", RunProp(prop_union(hyper_binary))}, {"Union property with hyper_array", RunProp(prop_union(hyper_array))}, - {"Union property with hyper_bisect", RunProp(prop_union(hyper_bisect))}, {"Union property with hyper_gb", RunProp(prop_union(hyper_gb))}, RunProp(prop_set()), RunProp(prop_serialize()) @@ -89,13 +87,11 @@ backend_t() -> Gb = hyper:compact(hyper:insert_many(Values, hyper:new(P, hyper_gb))), Array = hyper:compact(hyper:insert_many(Values, hyper:new(P, hyper_array))), - Bisect = hyper:compact(hyper:insert_many(Values, hyper:new(P, hyper_bisect))), Binary = hyper:compact(hyper:insert_many(Values, hyper:new(P, hyper_binary))), Carray = hyper:compact(hyper:insert_many(Values, hyper:new(P, hyper_carray))), {hyper_gb , GbRegisters} = Gb#hyper.registers, {hyper_array , ArrayRegisters} = Array#hyper.registers, - {hyper_bisect, BisectRegisters} = Bisect#hyper.registers, {hyper_binary, BinaryRegisters} = Binary#hyper.registers, {hyper_carray, CarrayRegisters} = Carray#hyper.registers, @@ -126,25 +122,21 @@ backend_t() -> ?assertEqual(ExpectedBytes, hyper_gb:encode_registers(GbRegisters)), ?assertEqual(ExpectedBytes, hyper_array:encode_registers(ArrayRegisters)), - ?assertEqual(ExpectedBytes, hyper_bisect:encode_registers(BisectRegisters)), ?assertEqual(ExpectedBytes, hyper_binary:encode_registers(BinaryRegisters)), ?assertEqual(ExpectedBytes, hyper_carray:encode_registers(CarrayRegisters)), ?assertEqual(hyper:card(Gb), hyper:card(hyper:from_json(hyper:to_json(Array), hyper_gb))), ?assertEqual(Array, hyper:from_json(hyper:to_json(Array), hyper_array)), - ?assertEqual(Bisect, hyper:from_json(hyper:to_json(Array), hyper_bisect)), ?assertEqual(Binary, hyper:from_json(hyper:to_json(Binary), hyper_binary)), ?assertEqual(Carray, hyper:from_json(hyper:to_json(Carray), hyper_carray)), ?assertEqual(hyper:to_json(Gb), hyper:to_json(Array)), - ?assertEqual(hyper:to_json(Gb), hyper:to_json(Bisect)), ?assertEqual(hyper:to_json(Gb), hyper:to_json(Binary)), ?assertEqual(hyper:to_json(Gb), hyper:to_json(Carray)), ?assertEqual(hyper:card(Gb), hyper:card(Array)), - ?assertEqual(hyper:card(Gb), hyper:card(Bisect)), ?assertEqual(hyper:card(Gb), hyper:card(Binary)), ?assertEqual(hyper:card(Gb), hyper:card(Carray)). @@ -296,27 +288,6 @@ union_mixed_precision_t() -> || Mod <- [hyper_binary]]. -small_big_union_t() -> - random:seed(1, 2, 3), - SmallCard = 100, - BigCard = 15000, % switches to dense at 10922 items - - SmallSet = sets:from_list(generate_unique(SmallCard)), - BigSet = sets:from_list(generate_unique(BigCard)), - - SmallHyper = hyper:insert_many(sets:to_list(SmallSet), - hyper:new(15, hyper_bisect)), - BigHyper = hyper:insert_many(sets:to_list(BigSet), - hyper:new(15, hyper_bisect)), - ?assertMatch({hyper_bisect, {sparse, _, _, _}}, SmallHyper#hyper.registers), - ?assertMatch({hyper_bisect, {dense, _}}, BigHyper#hyper.registers), - - UnionHyper = hyper:union(SmallHyper, BigHyper), - TrueUnion = sets:size(sets:union(SmallSet, BigSet)), - ?assert(abs(hyper:card(UnionHyper) - TrueUnion) < TrueUnion * 0.01). - - - intersect_card_t() -> random:seed(1, 2, 3), @@ -379,7 +350,7 @@ bad_serialization_t() -> %% backends() -> - [hyper_gb, hyper_array, hyper_bisect, hyper_binary, hyper_carray]. + [hyper_gb, hyper_array, hyper_binary, hyper_carray]. gen_values() ->