## graph10.py
# Plot weight hydrophobity values for a sequence
# Use a triangle average of width 19
# The bR structure is known so highlight the secondary structure features; from
# http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=66360541
import fasta_reader
from pylab import *
# Kyte & Doolittle index of hydrophobicity
kd = { 'A': 1.8,'R':-4.5,'N':-3.5,'D':-3.5,'C': 2.5,
'Q':-3.5,'E':-3.5,'G':-0.4,'H':-3.2,'I': 4.5,
'L': 3.8,'K':-3.9,'M': 1.9,'F': 2.8,'P':-1.6,
'S':-0.8,'T':-0.7,'W':-0.9,'Y':-1.3,'V': 4.2 }
infile = open("bacteriorhodopsin.fasta")
record = fasta_reader.read_fasta_record(infile)
num_residues = len(record.sequence)
values = []
for residue in record.sequence:
values.append(kd[residue])
# I've computed the values for each residue. Now compute the moving
# average for a window size of 19. Transmembrane helixes are often
# about 20 residues in length so peaks in this plot suggest
# transmembrane regions.
window_size = 19
half_window = (window_size-1)/2
# Precompute the offsets list for better performance.
offsets = range(-half_window, half_window+1)
weights = [1,2,3,4,5,6,7,8,9,10,9,8,7,6,5,4,3,2,1]
total_weight = sum(weights)
y_data = []
for i in range(half_window, num_residues-half_window):
weighted_value = 0.0
for offset, weight in zip(offsets, weights):
weighted_value += weight*values[i+offset]
y_data.append(weighted_value / total_weight)
# Exclude the first and last residues.
# The final list is in biologist coordinates (the first residue is
# index 1)
x_data = range(half_window, half_window+len(y_data))
plot(x_data, y_data, linewidth=1.0)
# Draw a reasonable cutoff for membrane prediction
# Value of 1.6 taken from
# http://arbl.cvmbs.colostate.edu/molkit/hydropathy/
axhline(y=1.6)
# Draw the known helix and sheet ranges
axvspan(9, 32, facecolor="yellow", alpha=0.4) # helix
axvspan(36, 61, facecolor="yellow", alpha=0.4)
axvspan(64, 71, facecolor="red", alpha=0.4) # sheet
axvspan(72, 79, facecolor="red", alpha=0.4) # sheet
axvspan(82, 100, facecolor="yellow", alpha=0.4)
axvspan(104, 126, facecolor="yellow", alpha=0.4)
axvspan(130, 159, facecolor="yellow", alpha=0.4)
axvspan(164, 190, facecolor="yellow", alpha=0.4)
axvspan(200, 224, facecolor="yellow", alpha=0.4)
# Show exactly the length of the sequence
axis(xmin = 1, xmax = num_residues)
xlabel("residue number")
ylabel("hydrophobicity (triangle average over %d values)" % window_size)
record_id = record.title.split(" ", 1)[0]
title("K&D hydrophobicity for " + record_id)
show()