#!/usr/bin/perl # see http://zfish.nichd.nih.gov/tools/subsequence.cgi # uncomment and modify the next two lines # if your perl is in a nonstandard directory #use lib '/disk3/local/lib/perl5/site_perl'; #use lib '/disk3/local/lib/perl5/'; use CGI qw/:standard :html3/; use Bio::DB::GenBank; use File::Temp; use FileHandle; print header, start_html(-title => 'find subsequence of large GenBank entries',-author => 'Jonathan_Epstein\@nih.gov'); print_form() unless param; print_results() if param; sub print_results { $gb = new Bio::DB::GenBank; $accession = param('accession'); eval { $seq = $gb->get_Seq_by_acc($accession); # Accession Number }; if ($@) { print "***ERROR: accession $accession not found***\n"; return; } $segment_start = param('start'); $segment_end = param('length_or_end_value'); $segment_end = $segment_start+$segment_end-1 if param('length_or_end_choice') eq 'Length'; if ($segment_end<$segment_start || $segment_start<0) { print "***ERROR: invalid segment start and end values:$segment_start,$segment_end***\n"; return; } $len = $seq->length(); if ($segment_end>$len) { print "***ERROR: maximum length $len exceeded***\n"; return; } $subseq = $seq->subseq ($segment_start,$segment_end); $name = "subsequence of $accession"; $strand = "+"; $strand = "-" if (param('reverse')); # For some reason, there seems to be a problem if you use the file # handle provided by File::Temp. Similarly, there's a problem if you # pass a filename to BioPerl below rather than a file handle. However, # constructing our own file handle and then passing it to BioPerl works # fine. (undef, $filename) = File::Temp::tempfile(); $fh = new FileHandle "> $filename"; $seqoutlong = Bio::SeqIO->new( '-format' => 'Fasta',-fh => $fh); $seqobj = Bio::PrimarySeq->new ( -seq => $subseq, -id => $name . "[length:$len]:" . $segment_start . "-" . $segment_end . "(" . $strand . "strand)", -moltype => 'dna' ); $seqobj = $seqobj->revcom if ($strand ne "+"); $seqoutlong->write_seq($seqobj); $fh->close; undef $fh; # Now we parse the FASTA file which was just generated, and perform # some simple conversions to HTML. open (TEMPORARY, "<$filename") or die "unable to open temporary file $filename\n"; print "<tt>\n"; while (<TEMPORARY>) { print $_; print "<br>\n"; } close TEMPORARY; print "</tt>\n"; unlink $filename; } sub print_form { print p("This web page permits you to extract a short subsequence of DNA from a large GenBank entry. This is especially useful in an era of huge \"contigs\" of genomic DNA, where you only want to extract a few hundred base pairs for subsequent analysis.\n"); print p,"This program also illustrates the power of ",a({-href => 'http://www.BioPerl.org/'}, "BioPerl"), ", a powerful set of tools for molecular biology analysis. The ", a({-href => 'subsequence.pl.txt'}, "source code"), " for this program is less than 90 lines long.\n"; print p,"You must specify the GenBank accession number along with a start position. You may specify either the length of the subsequence you wish to extract or, equivalently, the endpoint.\n"; print "The sequence may be reverse-complemented if you wish, e.g., the reverse complement of <font color=green>ATCGC</font> is <font color=yellow>GCGAT</font>.\n"; print p,"To test this web page, try accession NT_004002, start 50000, length 400.\n"; print start_form,table( Tr(td("Enter your GenBank accession"),td(textfield(-name => 'accession',-size => 20))), Tr(td("Start position"),td(textfield(-name => 'start',-size => 10))), Tr(td("Specify length or end position"), td(radio_group (-name => 'length_or_end_choice',-values => [Length, End], default => Length))), Tr(td("Length or end position"), td(textfield (-name => length_or_end_value,-size => 20))), Tr(td("Reverse complement?"), td(checkbox (-name => 'reverse')))), submit ("Find my subsequence"); print hr(),"Credits: Jonathan Epstein (Jonathan_Epstein\@nih.gov)"; }