#!/usr/bin/perl use strict; use warnings; use 5.010; use Data::Dumper; use List::Util qw(shuffle sum max); use Time::HiRes qw(gettimeofday tv_interval); use FindBin; use lib "$FindBin::Bin/../lib"; use lib "$FindBin::Bin/../t/lib"; use BitStreamTest; use POSIX; my $type = 'wordvec'; # Time with small, big, and mixed numbers. sub ceillog2 { my $v = shift; $v--; my $b = 1; $b++ while ($v >>= 1); $b; } my @encodings = qw| gamma bvzeta(2) fib ber varint deltagol(11) arice(0) omegagol(11) ss(3-1-3) gg(6) eg(3) sss(3-1-10) baer(1) golomb(6) ss(3-0-0-1-3) rice(3) |; my $list_n = 10000; my @list; srand(15); sub rand_geo { my $param = shift; my $N = shift; # Inspired by Bio::Tools::RandomDistFunctions (Jason Stajich, Mike Sanderson) # Any misuse of their function is purely my fault. my $den; if( $param < 1e-8) { $den = (-1 * $param) - ( $param * $param ) / 2; } else { $den = log(1 - $param); } my $z = log(1 - rand(1)) / $den; $z = POSIX::floor($z) + 1; $z = $N if $z > $N; return $z; } { push @list, rand_geo(0.1, 65535) for (1 .. $list_n); } print "List holds ", scalar @list, " numbers\n"; #@list = shuffle(@list); # average value my $avg = int((sum @list) / scalar @list); # bytes required in fixed size (FOR encoding) my $bytes = int(ceillog2(max @list) * scalar @list / 8); #push @encodings, 'golomb(' . int(0.69 * $avg) . ')'; if (0) { my $minsize = 140000; my $maxval = max @list; foreach my $p1 (0 .. 8) { foreach my $p2 (0 .. 8) { next unless ($p1 + $p2) <= 8; next unless BitStream::Code::StartStop::max_code_for_startstop([$p1,$p2]) >= $maxval; my $stream = stream_encode_array($type, "ss($p1-$p2)", @list); my $len = $stream->len; if ($len < $minsize) { print "new min: $len ss($p1-$p2)\n"; $minsize = $len; } } } foreach my $p1 (0 .. 8) { foreach my $p2 (0 .. 8) { foreach my $p3 (0 .. 8) { next unless ($p1 + $p2 + $p3) <= 8; next unless BitStream::Code::StartStop::max_code_for_startstop([$p1,$p2,$p3]) >= $maxval; my $stream = stream_encode_array($type, "ss($p1-$p2-$p3)", @list); my $len = $stream->len; if ($len < $minsize) { print "new min: $len ss($p1-$p2-$p3)\n"; $minsize = $len; } } } } foreach my $p1 (0 .. 8) { foreach my $p2 (0 .. 8) { foreach my $p3 (0 .. 8) { foreach my $p4 (0 .. 8) { next unless ($p1 + $p2 + $p3 + $p4) <= 8; next unless BitStream::Code::StartStop::max_code_for_startstop([$p1,$p2,$p3,$p4]) >= $maxval; my $stream = stream_encode_array($type, "ss($p1-$p2-$p3-$p4)", @list); my $len = $stream->len; if ($len < $minsize) { print "new min: $len ss($p1-$p2-$p3-$p4)\n"; $minsize = $len; } } } } } foreach my $p1 (0 .. 8) { foreach my $p2 (0 .. 8) { foreach my $p3 (0 .. 8) { foreach my $p4 (0 .. 8) { foreach my $p5 (0 .. 8) { next unless ($p1 + $p2 + $p3 + $p4 + $p5) <= 8; next unless BitStream::Code::StartStop::max_code_for_startstop([$p1,$p2,$p3,$p4, $p5]) >= $maxval; my $stream = stream_encode_array($type, "ss($p1-$p2-$p3-$p4-$p5)", @list); my $len = $stream->len; if ($len < $minsize) { print "new min: $len ss($p1-$p2-$p3-$p4-$p5)\n"; $minsize = $len; } } } } } } } print "List (avg $avg, max ", max(@list), ", $bytes binary):\n"; time_list($_, @list) for (@encodings); sub time_list { my $encoding = shift; my @list = @_; my $s1 = [gettimeofday]; my $stream = stream_encode_array($type, $encoding, @list); die "Stream ($encoding) construction failure" unless defined $stream; my $e1 = int(tv_interval($s1)*1_000_000); my $len = $stream->len; my $s2 = [gettimeofday]; my @a = stream_decode_array($encoding, $stream); my $e2 = int(tv_interval($s2)*1_000_000); foreach my $i (0 .. $#list) { die "incorrect $encoding coding for $i" if $a[$i] != $list[$i]; } printf " %-14s: %8d bytes %8d uS encode %8d uS decode\n", $encoding, int(($len+7)/8), $e1, $e2; 1; }