## graph10.py # Plot weight hydrophobity values for a sequence # Use a triangle average of width 19 # The bR structure is known so highlight the secondary structure features; from # http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&val=66360541 import fasta_reader from pylab import * # Kyte & Doolittle index of hydrophobicity kd = { 'A': 1.8,'R':-4.5,'N':-3.5,'D':-3.5,'C': 2.5, 'Q':-3.5,'E':-3.5,'G':-0.4,'H':-3.2,'I': 4.5, 'L': 3.8,'K':-3.9,'M': 1.9,'F': 2.8,'P':-1.6, 'S':-0.8,'T':-0.7,'W':-0.9,'Y':-1.3,'V': 4.2 } infile = open("bacteriorhodopsin.fasta") record = fasta_reader.read_fasta_record(infile) num_residues = len(record.sequence) values = [] for residue in record.sequence: values.append(kd[residue]) # I've computed the values for each residue. Now compute the moving # average for a window size of 19. Transmembrane helixes are often # about 20 residues in length so peaks in this plot suggest # transmembrane regions. window_size = 19 half_window = (window_size-1)/2 # Precompute the offsets list for better performance. offsets = range(-half_window, half_window+1) weights = [1,2,3,4,5,6,7,8,9,10,9,8,7,6,5,4,3,2,1] total_weight = sum(weights) y_data = [] for i in range(half_window, num_residues-half_window): weighted_value = 0.0 for offset, weight in zip(offsets, weights): weighted_value += weight*values[i+offset] y_data.append(weighted_value / total_weight) # Exclude the first and last residues. # The final list is in biologist coordinates (the first residue is # index 1) x_data = range(half_window, half_window+len(y_data)) plot(x_data, y_data, linewidth=1.0) # Draw a reasonable cutoff for membrane prediction # Value of 1.6 taken from # http://arbl.cvmbs.colostate.edu/molkit/hydropathy/ axhline(y=1.6) # Draw the known helix and sheet ranges axvspan(9, 32, facecolor="yellow", alpha=0.4) # helix axvspan(36, 61, facecolor="yellow", alpha=0.4) axvspan(64, 71, facecolor="red", alpha=0.4) # sheet axvspan(72, 79, facecolor="red", alpha=0.4) # sheet axvspan(82, 100, facecolor="yellow", alpha=0.4) axvspan(104, 126, facecolor="yellow", alpha=0.4) axvspan(130, 159, facecolor="yellow", alpha=0.4) axvspan(164, 190, facecolor="yellow", alpha=0.4) axvspan(200, 224, facecolor="yellow", alpha=0.4) # Show exactly the length of the sequence axis(xmin = 1, xmax = num_residues) xlabel("residue number") ylabel("hydrophobicity (triangle average over %d values)" % window_size) record_id = record.title.split(" ", 1)[0] title("K&D hydrophobicity for " + record_id) show()