#!/pro/bin/perl # ss-dup-tk.pl: Find dups in spreadsheet # (m)'09 [23-01-2009] Copyright H.M.Brand 2005-2015 use strict; use warnings; sub usage { my $err = shift and select STDERR; print "usage: $0 [-t] [-S <sheets>] [-R <rows>] [-C columns] [-F <fields>]\n", "\t-t Only check on true values\n", "\t-S sheets Check sheet(s). Defaul = 1, 1,3-5,all\n", "\t-R rows Check row(s). Defaul = all, 6,19-66\n", "\t-C columns Check column(s). Defaul = all, 2,5-9\n", "\t-F fields Check field(s). Defaul = all, A1,A2,B15,C23\n"; exit $err; } # usage use Spreadsheet::Read; use Getopt::Long qw(:config bundling nopermute noignorecase); my $opt_v = 0; my $opt_t = 0; # Only check on true values my @opt_S; # Sheets to print my @opt_R; # Rows to print my @opt_C; # Columns to print my @opt_F; GetOptions ( "help|?" => sub { usage (0); }, "S|sheets=s" => \@opt_S, "R|rows=s" => \@opt_R, "C|columns=s" => \@opt_C, "F|fields=s" => \@opt_F, "t|true" => \$opt_t, "v|verbose:1" => \$opt_v, ) or usage (1); @opt_S or @opt_S = (1); use Tk; use Tk::ROText; my $file = shift || (sort { -M $b <=> -M $a } glob "*.xls")[0]; my ($mw, $is, $ss, $dt) = (MainWindow->new, "1.0"); sub ReadFile { $file or return; $dt->delete ("1.0", "end"); unless ($ss = ReadData ($file)) { $dt->insert ("end", "Cannot read $file as spreadsheet\n"); return; } my @ss = map { qq{"$ss->[$_]{label}"} } 1 .. $ss->[0]{sheets}; my @finfo = ( "File: $file", ( map { "Sheet $_: '$ss->[$_]{label}'\t($ss->[$_]{maxcol} x $ss->[$_]{maxrow})" } 1 .. $ss->[0]{sheets} ), "=============================================================="); $dt->insert ("end", join "\n", @finfo, ""); $is = (@finfo + 1).".0"; return $ss; } # ReadFile my $tf = $mw->Frame ()->pack (qw( -side top -anchor nw -expand 1 -fill both )); $tf->Entry ( -textvariable => \$file, -width => 40, -vcmd => \&ReadFile, )->pack (qw(-side left -expand 1 -fill both)); my %ftyp; for ([ xls => [ "Excel Files", [qw( .xls .XLS )] ] ], [ xlsx => [ "Excel Files", [qw( .xlsx .XLSX )] ] ], [ sxc => [ "OpenOffice Files", [qw( .sxc .SXC )] ] ], [ ods => [ "OpenOffice Files", [qw( .ods .ODS )] ] ], [ csv => [ "CSV Files", [qw( .csv .CSV )] ] ], ) { my ($ft, $r) = @$_; Spreadsheet::Read::parses ($ft) or next; push @{$ftyp{$r->[0]}}, @{$r->[1]}; push @{$ftyp{"All spreadsheet types"}}, @{$r->[1]}; } $tf->Button ( -text => "Select file", -command => sub { $ss = undef; $file = $mw->getOpenFile ( -filetypes => [ ( map { [ $_, $ftyp{$_} ] } sort keys %ftyp ), [ "All files", "*" ], ], ); ReadFile (); }, )->pack (qw(-side left -expand 1 -fill both)); $tf->Button ( -text => "Detect", -command => \&Detect, )->pack (qw(-side left -expand 1 -fill both)); $tf->Button ( -text => "Show", -command => \&Show, )->pack (qw(-side left -expand 1 -fill both)); $tf->Button ( -text => "Exit", -command => \&exit, )->pack (qw(-side left -expand 1 -fill both)); my $mf = $mw->Frame ()->pack (qw( -side top -anchor nw -expand 1 -fill both )); my $sw = $mf->Scrolled ("ROText", -scrollbars => "osoe", -height => 40, -width => 85, -foreground => "Black", -background => "White", -highlightthickness => 0, -setgrid => 1)->pack (qw(-expand 1 -fill both)); $dt = $sw->Subwidget ("scrolled"); #$sw->Subwidget ("xscrollbar")->packForget; $dt->configure ( -wrap => "none", -font => "mono 12", ); my $bf = $mw->Frame ()->pack (qw( -side top -anchor nw -expand 1 -fill both )); $bf->Checkbutton ( -variable => \$opt_t, -text => "True values only", )->pack (qw(-side left -expand 1 -fill both)); { my $opt_S = @opt_S ? join ",", @opt_S : 1; $bf->Label ( -text => "Sheet(s)", )->pack (qw(-side left -expand 1 -fill both)); $bf->Entry ( -textvariable => \$opt_S, -width => 10, -validate => "focusout", -vcmd => sub { @opt_S = grep m/\S/, split m/\s*,\s*/ => $opt_S; 1; }, )->pack (qw(-side left -expand 1 -fill both)); } { my $opt_R = join ",", @opt_R; $bf->Label ( -text => "Rows(s)", )->pack (qw(-side left -expand 1 -fill both)); $bf->Entry ( -textvariable => \$opt_R, -width => 10, -validate => "focusout", -vcmd => sub { @opt_R = grep m/\S/, split m/\s*,\s*/ => $opt_R; 1; }, )->pack (qw(-side left -expand 1 -fill both)); } { my $opt_C = join ",", @opt_C; $bf->Label ( -text => "Columns(s)", )->pack (qw(-side left -expand 1 -fill both)); $bf->Entry ( -textvariable => \$opt_C, -width => 10, -validate => "focusout", -vcmd => sub { @opt_C = grep m/\S/, split m/\s*,\s*/ => $opt_C; 1; }, )->pack (qw(-side left -expand 1 -fill both)); } sub ranges (@) { my @g; foreach my $arg (@_) { for (split m/,/, $arg) { if (m/^(\w+)\.\.(\w+)$/) { my ($s, $e) = ($1, $2); $s =~ m/^[1-9]\d*$/ or ($s, $e) = (qq("$s"), qq("$e")); eval "push \@g, $s .. $e"; } else { push @g, $_; } } } $opt_v and print STDERR "( @g )\n"; @g; } # ranges sub Detect { $ss or ReadFile (); $dt->delete ($is, "end"); $dt->insert ("end", join "\n", "", "Shts: @opt_S", "Rows: @opt_R", "Cols: @opt_C", "--------------------------------------------------------------", ""); my %done; my @S = $opt_S[0] eq "all" ? (1 .. $ss->[0]{sheets}) : ranges (@opt_S); my @R = ranges (@opt_R); my @C = ranges (@opt_C); my %f = map { uc $_ => 1 } ("@opt_F" =~ m/(\b[A-Z]\d+\b)/ig); foreach my $s (@S) { my $xls = $ss->[$s] or die "Cannot read sheet $s\n"; my @r = @R ? @R : (1 .. $xls->{maxrow}); my @c = @C ? @C : (1 .. $xls->{maxcol}); foreach my $r (@r) { foreach my $c (@c) { defined $xls->{cell}[$c][$r] or next; my $v = uc $xls->{cell}[$c][$r]; my $cell = cr2cell ($c, $r); @S > 1 and $cell = $xls->{label} . "[$cell]"; $opt_t && !$v and next; @opt_F && !exists $f{$cell} and next; if (exists $done{$v}) { $dt->insert ("end", sprintf "Cell %-5s is dup of %-5s '%s'\n", $cell, $done{$v}, $v); next; } $done{$v} = $cell; } } } } # Detect sub Show { $ss or ReadFile (); $dt->delete ($is, "end"); $dt->insert ("end", join "\n", "", "Shts: @opt_S", "Rows: @opt_R", "Cols: @opt_C"); my @S = $opt_S[0] eq "all" ? (1 .. $ss->[0]{sheets}) : ranges (@opt_S); my @R = ranges (@opt_R); my @C = ranges (@opt_C); my %f = map { uc $_ => 1 } ("@opt_F" =~ m/(\b[A-Z]\d+\b)/ig); foreach my $s (@S) { my $xls = $ss->[$s] or die "Cannot read sheet $s\n"; $dt->insert ("end", "\n--------------------------------------------------------------". "\nSheet $s: '$xls->{label}'\t($xls->{maxcol} x $xls->{maxrow})\n"); my @r = @R ? @R : (1 .. $xls->{maxrow}); my @c = @C ? @C : (1 .. $xls->{maxcol}); $dt->insert ("end", " |"); for (@c) { (my $ch = cr2cell ($_, 1)) =~ s/1$//; $dt->insert ("end", sprintf "%11s |", $ch); } $dt->insert ("end", "\n-----+"); $dt->insert ("end", "------------+") for @c; foreach my $r (@r) { $dt->insert ("end", sprintf "\n%4d |", $r); foreach my $c (@c) { my $cell = cr2cell ($c, $r); my $v = defined $xls->{cell}[$c][$r] ? $xls->{$cell} : "--"; length ($v) < 12 and substr $v, 0, 0, " " x (12 - length $v); $dt->insert ("end", substr ($v, 0, 12). "|"); } } } } # Show MainLoop;