#!/usr/bin/perl # # Fetch sequence data via OBDA registry system # # usage: rfetch -i <file_with_accession_list> -a -v -d embl -s start -e end # use Bio::DB::Registry; use Bio::SeqIO; use Getopt::Long; use strict; my $database = 'embl_biosql'; my $start = undef; my $end = undef; my $format = 'fasta'; my $file = undef; my $acc = undef; my $verbose = undef; &GetOptions( 'd|database:s' => \$database, 's|start:i' => \$start, 'e|end:i' => \$end, 'f|format:s' => \$format, 'i|input:s' => \$file, 'a|acc' => \$acc, 'v|verbose' => \$verbose, ); my $registry = Bio::DB::Registry->new(); my $db = $registry->get_database($database); my $seqout = Bio::SeqIO->new( '-format' => $format, '-fh' => \*STDOUT); my @ids; if( defined $file ) { open(F,$file) || die "cannot open $file $!"; while( <F> ) { my ($id) = split; push(@ids,$id); } } else { @ids = @ARGV; } foreach my $id ( @ids ) { my $seq; if( $verbose ){ print STDERR "fetching $id\n"; } if( $acc ) { $seq = $db->get_Seq_by_acc($id); } else { $seq = $db->get_Seq_by_id($id); } if( defined $start && defined $end ) { $seq = $seq->trunc($start,$end); } $seqout->write_seq($seq); }