#!/usr/bin/perl # bioperl.pl # cjm@fruitfly.org use strict; use lib '.'; no strict "vars"; use Data::Dumper; use Bio::Perl; use Bio::SeqIO; use Getopt::Long; my $h = {}; GetOptions($h, "file|f=s", ); my @cmds = get_default_cmds(); shell($h, \@cmds, @ARGV); # prepare for some seriously hacky code.... sub shell { my $h = shift; my @cmds = @{shift || []}; my @args = @_; my $prompt = $ENV{BIOPERL_PROMPT} || "BioPerl> "; my $quit = 0; my @lines = (); my $r; my $rv; my $seq; my @pseqs = (); my $seqio; my $wseqio; my $fastadb; my $options = {echo=>0, chatty=>10}; my $loadfn = $h->{'file'}; if ($loadfn) { @lines = ("load '$loadfn'"); } sub hr { print "\n===============================\n"; } sub nl { print "\n"; } sub demo { if (! -d 't/data') { print "To run the demo, you must be in the bioperl directory\n"; } @lines = split(/\n/, q[ %keep = %$options; +format '' +outformat '' +echo 1 # BioPerl shell utility - Demo # # We're now going to take a tour # through some of the features of # this tool. # # # This demo will go through some of # the major commands, feeding you # the commands as you go. all you have # to do is hit <ENTER> every time # you see the prompt $prompt # you will then see the output of # the command on your terminal window. # type 'q' to end the tour # at any time. # waitenter # PARSING GENBANK RECORDS # ----------------------- # to parse genbank files, use # the read_seq() method, or # simply use the '<' command. # # First of all we're going to # take a look at the file # 't/data/test.genbank' # Let's examine the file itself # using the unix command "cat" # (you can use any unix command # using the ! at the beginning # of a line) ^!cat t/data/test.genbank waitenter # Ok, you can see this is a # typical file of genbank records. # Let's get the first sequence # from the file ^<t/data/test.genbank waitenter # we have parsed the first # record of the file, and placed # the sequence object into # the variable $seq # # if you are familiar with perl # objects and the bioperl object # model, you can interact with # the object; for instance, to # display the residues we use the # seq() method like this: ^print $seq->seq() waitenter # # we can cycle through all the # sequences in the file using # the ',' command. ^, waitenter # this fetched the second sequence # and placed it in the $seq variable # # we can change the output format # by setting the 'outformat' parameter # like this: ^+outformat fasta ^, waitenter # now the sequences are output in # fasta format # to change to embl format: ^+outformat embl ^, waitenter # we can also fetch _all_ seqs from # a file; for this example we will # use t/data/swiss.dat, which is in # swiss format. usually bioperl can guess # the file format from the file extension # but this isn't possible here, so we # must help by setting the input format: ^+format swiss # now lets get all the sequences, like this: ^<*t/data/swiss.dat waitenter # typing <* is equivalent to # using the read_seqs() function, # like this: ^read_seqs('t/data/swiss.dat') waitenter # we now have all the sequences in # the array @seqs # we can write these all out as fasta ^+outformat fasta ^>* # we can also write these out to a file: ^>*myfile.tmp ^!cat myfile.tmp # # RANDOM ACCESS OF FASTA FILES # END +echo 0 %$options = %keep ]); @lines = map { s/^ *//; $_; } @lines; } sub error { if ($error) { print "Error:\n$error"; } else { print "No errors have been reported\n"; } } sub fmt { $options->{format} = shift if @_; print "format=$options->{format}\n"; } # should this move to Bio::Perl ? sub seqio { my $filename = shift; $options->{format} = shift if @_; if( !defined $filename ) { warn "read_sequence($filename) - usage incorrect"; } if( defined $options->{format} ) { $seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $options->{format}); } else { $seqio = Bio::SeqIO->new( '-file' => $filename); } $seqio; } sub wseqio { my $filename = shift; $options->{format} = shift if @_; my @args = (); if ($filename && $filename !~ /^\>/) { $filename = ">$filename"; } push(@args, -file => "$filename") if $filename; push(@args, -fh => \*STDOUT) unless $filename; push(@args, -format => $options->{outformat}) if $options->{outformat}; $wseqio = Bio::SeqIO->new( @args ); $wseqio; } sub show_seq { return unless $seq; if ($wseqio) { $wseqio->write_seq($seq); } else { printf "seq display id: %s\n", $seq->display_id; } } sub addseq { push(@pseqs, @_); while (scalar(@pseqs) > 50) { # todo - history variable shift @pseqs; } } sub next_seq { if ($seqio) { eval { $seq = $seqio->next_seq; }; if ($@) { $error = $@; print "There was an error getting the seq. Type 'error'\n"; print "for full details\n"; print "(Maybe you have to explicitly set the format?)"; } addseq($seq); } else { print STDERR "use read_seq first\n"; } show_seq; $seq; } sub next_seqs { @seqs = (); if ($seqio) { while ($seq = $seqio->next_seq) { printf "%s\n", $seq->display_id; push(@seqs, $seq); } } $seq = $seqs[$#seqs] if @seqs; @seqs } sub read_seq { seqio(@_); next_seq(); } sub read_seqs { seqio(@_); next_seqs(); } sub write_seq { wseqio(@_); $wseqio->write_seq($seq) if $seq; } sub write_seqs { wseqio(@_); map { $wseqio->write_seq($_) } @seqs; } sub pod { if (!-d "Bio") { print "You need to be in the bioperl directory!\n"; } else { my $mod = shift; unix("pod2text", "Bio/$mod.pm"); } } sub fastadb { require "Bio/DB/Fasta.pm"; my $f = shift; $fastadb = Bio::DB::Fasta->new($f); print "Set \$fastadb object\n"; $fastadb; } sub subseq { if (!$fastadb) { fastadb(shift); } $seq = $fastadb->get_Seq_by_id(shift); if (@_) { printf "%s\n", $seq->subseq(@_); } $seq; } sub load { open(F, shift); @lines = map {chomp;$_} <F>; close(F); } sub waitenter { print "<hit ENTER to continue>"; <STDIN>; } sub showintro { hr; print "This is a text-based commandline interface to BioPerl;\n"; print "\n"; } sub checkoptions { } sub showoptions { my $k = shift; my @k = defined $k ? ($k) : keys %$options; foreach my $ok ($k) { my $v = sprintf("%s", $options->{$k}); if ($v =~ /HASH/) { # hide perl internal details # from user; if they are experienced # perlhackers they can just # type "x $options" to see the # gory details $v = "undisplayable"; } printf("%20s:%s\n", $ok, $b); } } sub set { my ($k,$v) = @_; if (defined($v)) { $options->{$k} = $v; checkoptions; } else { showoptions($k); } # if ($k eq "format") { # seqio; # } if ($k eq "outformat") { wseqio; } } sub echo { my $e = shift; if (defined($e)) { set("echo", $e); } else { set("echo", !$options->{echo}); } } sub options { map {print "$_ = $options->{$_}\n"} keys%$options; } sub showcommands { hr; print "BioPerl Shell Commands:\n"; my $layout = "%5s : %-20s - %s\n"; printf $layout, "cmd", "function", "summary"; printf "%s\n", ("-" x 40); foreach my $c (@cmds) { my $sc = $c->{shortcut}; $sc =~ s/\\//g; printf($layout, $sc, $c->{'func'} . "()", $c->{'summary'} ); } } sub showexamples { print "\nExamples:\n-------\n"; } sub showvariables { hr; print "Shell variables:\n"; print q[ $seq : Bio::SeqI object $seqio : Bio::SeqIO object @pseqs : array of previous Bio::SeqI objects ]; nl; } sub welcome { print "Welcome to the BioPerl shell interface!\n\n"; print "\n\nType 'help' for instructions\n"; print "\n\nType 'demo' for demonstration\n"; print "\n\nThis is ALPHA software - commands may change\n"; print "-lots more commands need to be added to take full\n"; print "advantage of the bioperl functionality\n\n"; } sub help { my $topic = shift; my $c; if ($topic) { ($c) = grep {$_->{func} eq $topic} @cmds; } if ($c) { print "Function: $c->{func}\n"; print "Shortcut: $c->{shortcut}\n" if $c->{shortcut}; print "Summary: $c->{summary}\n" if $c->{summary}; print "=======\n"; print "$c->{docs}\n" if $c->{docs}; } elsif ($topic eq "advanced") { hr; nl; } else { hr; print "\nBioPerl Shell Help\n\n"; showintro; waitenter; showcommands; waitenter; showvariables; waitenter; showexamples; nl; nl; nl; print "Type \"demo\" for an interactive demo of commands\n\n"; print "Type \"help advanced\" for advanced options\n\n"; hr; nl; } } sub p { print shift; print "\n"; } sub x { print Dumper shift; print "\n"; } # trick to allow barewords as keywords... sub advanced {"advanced"} sub unix { my @cmds = @_; my $c = join(" ", @cmds); print `$c`; } welcome; require Term::ReadLine; require Shell; checkoptions; print "\n"; my $termline = shift || Term::ReadLine->new($prompt); my $rcfile = "$ENV{HOME}/.goshellrc"; if (-f $rcfile) { open(F, $rcfile); @lines = <F>; close(F); } my $end_signal = ""; while (!$quit) { if ($end_signal) { @lines = ($lines); while ($end_signal && ($line = $termline->readline("? "))) { if($line !~ /$end_signal/) { $lines[0].= "\n$line"; } else { $end_signal = ""; } } next; } my $line = @lines ? shift @lines : $termline->readline($prompt); if ($line =~ /^\^/) { $line =~ s/^\^//; print "$prompt$line"; my $e = <STDIN>; if ($e =~ /^q/) { $line = ""; @lines = (); } } if ($options->{echo} && $line !~ /\+?wait/) { if ($line =~ /^\#/) { print "$line\n"; } else { print "$prompt$line\n"; } if ($options->{sleep}) { sleep $options->{sleep}; } if ($options->{wait}) { sleep $options->{wait}; } } my ($cmd, @w) = split(' ',$line); $_ = $cmd; if (/^\<\<(.*)/) { $end_signal = $1; } # check for shortcuts my $selected; foreach my $c (@cmds) { my $shortcut = $c->{'shortcut'}; next unless $shortcut; if ($line =~ /^$shortcut(.*)/) { if (!defined($selected) || length($shortcut) > length($selected->{shortcut} || "")) { # get the most specific match $selected = $c; } } } if ($selected) { my $shortcut = $selected->{'shortcut'}; if ($line =~ /^$shortcut(.*)/) { my @w = map {"'".$_."'" } split(' ', $1); $line = $selected->{'func'}." ".join(", ", @w); } } $rv = eval $line; # print "\n"; # print "RV=$rv;;;\n"; if ($@) { print STDERR $@; } if ($options->{sleep}) { sleep $options->{sleep}; } if ($options->{wait}) { sleep $options->{wait}; $options->{wait} = 0; } } } sub get_default_cmds { my @cmds = ( { func => 'read_seq', shortcut => '\<', summary => 'read a Seq from a file', }, { func => 'next_seq', shortcut => ',', summary => 'get the next Seq', }, { func => 'read_seqs', shortcut => '\<\*', summary => 'read all Seqs from a file', }, { func => 'write_seq', shortcut => '\>', summary => 'write a Seq to screen/file', }, { func => 'write_seqs', shortcut => '\>\*', summary => 'write a Seq to screen/file', }, { func => 'fastadb', shortcut => 'fa', summary => 'fast fasta access', }, { func => 'subseq', summary => 'get a subseq from a fastadb', }, { func => 'set', shortcut => '\+', summary => 'set a shell parameter', }, { func => 'unix', shortcut => '\!', summary => 'run a unix command', }, { func => 'x', summary => 'display variable (and internals) using dumper', }, ); return @cmds; }