Sophie

Sophie

distrib > Mageia > 5 > i586 > media > core-release > by-pkgid > 11e2672dfe7ba0b4c4f1129d81751bef > files > 5

python-biopython-doc-1.64-4.mga5.i586.rpm

# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
#

from __future__ import print_function

import sys
import os
import time
from reportlab.lib import colors
from reportlab.lib.units import cm

from Bio.Blast.Applications import NcbitblastxCommandline
from Bio.Graphics.GenomeDiagram import Diagram, CrossLink
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import SeqIO

#Modify this line to point at the Artemis/ACT example data which is online at:
# https://github.com/sanger-pathogens/Artemis/tree/master/etc
input_folder = "/Applications/Artemis/Artemis.app/Contents/artemis/etc"

name = "af063097_v_b132222"
file_a = "af063097.embl"
file_b = "b132222.embl"
format_a = "embl"
format_b = "embl"
file_a_vs_b = "af063097_v_b132222.crunch"

for f in [file_a, file_b, file_a_vs_b]:
    if not os.path.isfile(os.path.join(input_folder, f)):
        print("Missing input file %s.fna" % f)
        sys.exit(1)

#Only doing a_vs_b here, could also have b_vs_c and c_vs_d etc
genomes = [
    (os.path.join(input_folder, file_a), format_a),
    (os.path.join(input_folder, file_b), format_b),
]
comparisons = [os.path.join(input_folder, file_a_vs_b)]

#Create diagram with tracks, each with a feature set
assert len(genomes) >= 2 and len(genomes) == len(comparisons)+1
gd_diagram = Diagram(name, track_size=0.35, circular=False)
tracks = dict()
feature_sets = dict()
records = dict()
for f, format in genomes:
    records[f] = SeqIO.read(f, format)
    tracks[f] = gd_diagram.new_track(1, name=f, start=0, end=len(records[f]),
                                     scale_smalltick_interval=1000,
                                     scale_largetick_interval=10000,
                                     greytrack=True, greytrack_labels=0)
    feature_sets[f] = tracks[f].new_set()

print("Drawing matches...")
for i, crunch_file in enumerate(comparisons):
    q = genomes[i+1][0]  # query file
    s = genomes[i][0]  # subject file
    q_set = feature_sets[q]
    s_set = feature_sets[s]
    handle = open(crunch_file)
    for line in handle:
        if line[0]=="#":
            continue
        parts = line.rstrip("\n").split(None, 7)
        #0 = score
        #1 = id
        #2 = S1
        #3 = E1
        #4 = seq1
        #5 = S2
        #6 = E2
        #7 = seq2
        try:
            q_start, q_end = int(parts[2]), int(parts[3])
            s_start, s_end = int(parts[5]), int(parts[6])
        except IndexError:
            sys.stderr.write(repr(line) + "\n")
            sys.stderr.write(repr(parts) + "\n")
            raise
        flip = False
        if q_start > q_end:
            flip = not flip
            q_start, q_end = q_end, q_start
        if s_start > s_end:
            flip = not flip
            s_start, s_end = s_end, s_start
        if flip:
            c = colors.Color(0, 0, 1, alpha=0.25)
            b = False
        else:
            c = colors.Color(1, 0, 0, alpha=0.25)
            b = False
        q_feature = q_set.add_feature(SeqFeature(FeatureLocation(q_start-1, q_end)),
                                                 color=c, border=b)
        s_feature = s_set.add_feature(SeqFeature(FeatureLocation(s_start-1, s_end)),
                                                 color=c, border=b)
        gd_diagram.cross_track_links.append(CrossLink(q_feature, s_feature, c, b))
        #NOTE: We are using the same colour for all the matches,
        #with transparency. This means overlayed matches will appear darker.
        #It also means the drawing order not very important.
        #Note ACT puts long hits at the back, and colours by hit score
    handle.close()

print("Drawing CDS features...")
for f, format in genomes:
    record = records[f]
    feature_set = feature_sets[f]
    #Mark the CDS features
    for cds in record.features:
        if cds.type != "CDS":
            continue
        feature_set.add_feature(cds, sigil="ARROW",
                                color=colors.lightblue,
                                border=colors.blue)

gd_diagram.draw(format="linear", fragments=3,
                orientation="landscape", pagesize=(20*cm, 10*cm))
gd_diagram.write(name + ".pdf", "PDF")

gd_diagram.draw(format="circular",
                orientation="landscape", pagesize=(20*cm, 20*cm))
gd_diagram.write(name + "_c.pdf", "PDF")