#!/usr/bin/env python """ REQUIRED INSTALLATIONS: -Python 2.5 -Scipy 0.5.2 -NumPy 1.0.1 -Biopython 1.42 -Numerical 24.2 (install through Biopython website) -mxTextTools: http://www.egenix.com/files/python/mxTextTools.html INPUT REQUIREMENTS: -2 PDB files PDB file must be in correct PDB format (i.e. chain ID's present, unique atom name within each residue, occupancy, etc...) Local_wRMSD.py INFORMATION NOTE: Scaling factor is set to 2 for local alignments. TO RUN Local_wRMSD.py: Local_wRMSD.py Protein_1.pdb Protein_1_Chain_ID Protein_2.pdb Protein_2_Chain_ID Example: Local_wRMSD.py 3ERD.pdb A 3ERT.pdb A Example output: Unique weighted solutions with a corresponding %wSUM To run through cygwin: In c:cygwin\etc\profile Change PATH to local python (Python25): PATH=/usr/local/bin:/cygdrive/c/Python25:/usr/bin:/bin:/usr/X11R6/bin:$PATH export PATH For questions or comments: Kelly Damm kdamm@umich.edu University of Michigan Carlson Lab """ #define global functions from __future__ import division import sys,re,cgi,os from scipy import sort,transpose import Numeric, LinearAlgebra from Numeric import * def run_Local_wRMSD(file1,file2,X_Chain_ID,Y_Chain_ID): #COMPARE RESIDUES FROM PROTEIN X AND PROTEIN Y BY CHAINS, REMOVES NONMATCHING RESIDUE COORDINATES AND RETURNS CA COORDINATES FROM MATCHING RESIDUES xlist,ylist = Ensure_Correspondence(X_Chain_ID,Y_Chain_ID,file1,file2) x = transpose(xlist) y = transpose(ylist) #RETURNS ALL COORDINATES OF PROTEIN X FOR TRANSFORMATION all = getAll_Coords(file1) title=(file1.split('.')[0], file1) #PERFORM STANDARD AND WEIGHTED RMSD CALCULATION final_wSUM,All_final_Coords = weighted_alignment(x,y,all,2) #DETERMINE UNIQUE LOCAL SOLUTIONS all_answer = Unique_Coords(All_final_Coords) s = getStructure(file1) all_sorted_wSUM = Coord_Positions(all_answer,All_final_Coords,s,final_wSUM,title) rounded_wSUM = round_decimal(all_sorted_wSUM) solns = percent(rounded_wSUM) print_pos_soln(solns,title) #ENSURE RESIDUE CORRESPONDENCE def Ensure_Correspondence(X_Chain_ID,Y_Chain_ID,file1,file2): xlist =[] ylist =[] x_CAResIDs= getCA_Resseq(file1, X_Chain_ID) y_CAResIDs= getCA_Resseq(file2, Y_Chain_ID) #REMOVE DISORDERED RESIDUES x_disordered = get_Disordered(file1,X_Chain_ID) y_disordered = get_Disordered(file2,Y_Chain_ID) xremove_disorder,x_ResSeq1 = remove_ResID(x_CAResIDs,x_disordered) yremove_disorder,y_ResSeq1 = remove_ResID(y_CAResIDs,y_disordered) #COMPARE PROTEIN1 TO PROTEIN2 AND REMOVE NONMATCHING RESIDUES x_Nonmatch, y_Nonmatch = compare(x_ResSeq1,y_ResSeq1) #MAKE LIST OF ALL RESIDUES TO BE REMOVED x_Remove = x_disordered + x_Nonmatch x_Remove.sort() y_Remove = y_disordered + y_Nonmatch y_Remove.sort() #DETERMINE POSITION OF RESIDUES TO BE REMOVED IN PDB FILES x_Res_positions = get_Residue_Position(x_CAResIDs, x_Remove) y_Res_positions = get_Residue_Position(y_CAResIDs, y_Remove) #RETURNS ALL CA COORDINATES x_CAcoords = transpose(get_CAcoords(file1,X_Chain_ID)) y_CAcoords = transpose(get_CAcoords(file2,Y_Chain_ID)) #Removes CA coordinates of nonmatching residues xlist = zero_CAcoords(x_CAcoords,x_Res_positions) ylist = zero_CAcoords(y_CAcoords,y_Res_positions) #FINAL CHECK TO ENSURE RESIDUE CORRESPONDENCE n = len(xlist) j = len(ylist) if n != j: sys.exit("Proteins do not have same number of atoms; Protein X has",n,"atoms, while Protein Y has",j,"atoms") #CHECK TO DETERMINE IF APPROPRIATE NUMBER OF COORDINATES PRESENT if (len(xlist[0]) != 3): sys.exit("Protein X does not have a 3xn atom coordinate set") if (len(ylist[0]) != 3): sys.exit("Protein Y does not have a 3xn atom coordinate set") #CHECK TO DETERMINE IF >4 COORDINATES PRESENT FOR EACH PROTEIN if n < 4: sys.exit("Protein X has 3 or less coordinates, 4 or more needed to perform alignment") if j < 4: sys.exit("Protein Y has 3 or less coordinates, 4 or more needed to perform alignment") return xlist,ylist ##WEIGHTED RMSD ALIGNMENT## def weighted_alignment(x,y,all,scaling_factor): All_final_Coords = [] #TOTAL NUMBER OF ATOMS atoms = len(x[0]) residue = int(atoms * (.10)) final_wSUM = [] #10 LOCAL STANDARD ALIGNMENTS (t = counter, goes through 10 iterations; begin = first residue in local section; end = last residue in local section) t = 1 begin = 0 end = 10 while t < 11: x_domain = [coords[begin:end] for coords in x] y_domain = [coords[begin:end] for coords in y] n = len(x_domain[0]) #TRANSLATE PROTEINS X AND Y TO CENTER x_mean = mean(x_domain,n) y_mean = mean(y_domain,n) x_trans = translation(x_domain, x_mean) x_translated = nested_list(x_trans,n) y_trans = translation(y_domain, y_mean) y_translated = nested_list(y_trans,n) x_transpose = transpose(x_translated) #CALCULATE COVARIANCE MATRIX (y_translated *x_translated^t) R = matrixmultiply(y_translated, x_transpose) R_transpose = transpose(R) R2 = matrixmultiply(R_transpose, R) #DETERMINE THE EIGENVECTORS AND EIGENVALUES of R2 mu,A = LinearAlgebra.eigenvectors(R2) #SORT EIGENVECTORS IN DECREASING ORDER OF EIGENVALUES a = [(mu[i],A[i]) for i in range(len(A))] a.sort() a.reverse() mu = [s[0] for s in a] A = [s[1] for s in a] #DETERMINE RIGHT-HANDED SYSTEM A_3 = crossproduct(A[0], A[1]) A = [A[0], A[1], A_3] #CALCULATE B, NORMALIZED PRODUCT OF (RxA) B_1 = matrixmultiply(R, A[0]) B_2 = matrixmultiply(R, A[1]) norm_B_1 = normalize(B_1) norm_B_2 = normalize(B_2) norm_B_3 = crossproduct(norm_B_1,norm_B_2) B = [norm_B_1,norm_B_2,norm_B_3] B_transpose = transpose(B) #CALCULATE ROTATION MATRIX, U U = rotation_matrix(B_transpose, A) #TRANSLATE ALL COORDINATES OF PROTEIN X s_all_trans = translation(all, x_mean) j = len(all[0]) s_all_translated = nested_list(s_all_trans,j) s_all_rot = matrixmultiply(U,s_all_translated) #ADD MEAN VALUES OF PROTEIN Y TO ROTATED COORDINATES OF PROTEIN X s_all_coords = add_coords(s_all_rot, y_mean) s_all2 = nested_list(s_all_coords,j) s_allrot = transpose(s_all2) #TRANSFORM CA COORDINATES ONLY g = len(x[0]) s_x_ca_trans = translation(x, x_mean) s_x_ca_translated = nested_list(s_x_ca_trans,g) s_x_ca_rot = matrixmultiply(U,s_x_ca_translated) s_x_ca_coords = add_coords(s_x_ca_rot, y_mean) xt = nested_list(s_x_ca_coords,g) S_RMSD = sRMSD(xt, y) #AFTER LOCAL STANDARD ALIGNMENT, WEIGHTED ALIGNMENT PERFORMED USING ENTIRE PROTEIN #z = number of iterations z = 1 weighted_rmsds = [] w_metric = [] all_list = [] #DETERMINE APPROPRIATE SCALING FACTOR while z < 501: p = len(xt[0]) #TRANSLATE WEIGHTED CENTROIDS TO ORIGIN #CALCULATE WEIGHTS (protein1, protein2, scaling_factor) weights = weight(xt,y,scaling_factor) weighted_x_mean = weight_trans(xt,weights,p) weighted_y_mean = weight_trans(y,weights,p) #USING NEW WEIGHTED TRANSLATION w_x_trans = translation(xt, weighted_x_mean) w_y_trans = translation(y, weighted_y_mean) w_x_translated = nested_list(w_x_trans,p) w_y_translated = nested_list(w_y_trans,p) #CALCULATE WEIGHTED COVARIENCE MATRIX weighted_rot = weight(w_x_translated,w_y_translated,scaling_factor) weighted_x_translated = multiply(weighted_rot,w_x_translated) wx_transpose = transpose(weighted_x_translated) R = matrixmultiply(w_y_translated,wx_transpose) R_transpose = transpose(R) R2 = matrixmultiply(R_transpose,R) #DETERMINE THE EIGENVECTORS AND EIGENVALUES of R2 mu,A = LinearAlgebra.eigenvectors(R2) #SORT EIGENVECTORS IN DECREASING ORDER OF EIGENVALUES a = [(mu[k],A[k]) for k in range(len(A))] a.sort() a.reverse() mu = [b[0] for b in a] A = [b[1] for b in a] #DETERMINE RIGHT-HANDED SYSTEM A_3 = crossproduct(A[0], A[1]) A = [A[0], A[1], A_3] #CALCULATE B, NORMALIZED PRODUCT OF (RxA) B_1 = matrixmultiply(R, A[0]) B_2 = matrixmultiply(R, A[1]) norm_B_1 = normalize(B_1) norm_B_2 = normalize(B_2) norm_B_3 = crossproduct(norm_B_1,norm_B_2) B = [norm_B_1,norm_B_2,norm_B_3] B_transpose = transpose(B) #CALCULATE WEIGHTED ROTATION MATRIX, U U = rotation_matrix(B_transpose, A) w_x_rot = matrixmultiply(U,w_x_translated) #CALCULATE WEIGHTED RMSD weighted_rmsds.append(wRMSD(w_x_rot, w_y_translated,scaling_factor)) #CALCULATE %wSUM w_metric.append(wSUM(w_x_rot, w_y_translated,scaling_factor,p)) #DETERMINE IF CONVERGENCE IS REACHED if z > 1: wRMSD_diff = w_metric[-2] - w_metric[-1] else: wRMSD_diff = [] if 0 < wRMSD_diff < 0.000001: #TRANSFORM ALL COORDINATES OF PROTEIN X w_all_trans = translation(s_all2, weighted_x_mean) i = len(all[0]) w_all_translated = nested_list(w_all_trans,i) w_all_rot = matrixmultiply(U,w_all_translated) #ADD ALL MEAN VALUES OF PROTEIN Y TO ALL ROTATED COORDINATES OF PROTEIN X w_all_coords = add_coords(w_all_rot, weighted_y_mean) w_all = nested_list(w_all_coords,i) w_allrot = transpose(w_all) All_final_Coords.append(w_allrot) iters = z - 1 #WRITE OUT WEIGHTED RMSD SOLUTION final_wSUM.append(w_metric[-1]) begin = begin + residue end = end + residue t = t + 1 break else: #ADD MEAN VALUES OF PROTEIN Y TO ROTATED COORDINATES OF PROTEIN X w_x_coords = add_coords(w_x_rot, weighted_y_mean) xt = nested_list(w_x_coords,p) #TRANSFORM ALL COORDINATES OF PROTEIN X all_trans = translation(s_all2, weighted_x_mean) i = len(all[0]) all_translated = nested_list(all_trans,i) all_rot = matrixmultiply(U,all_translated) #ADD ALL MEAN VALUES OF PROTEIN Y TO ALL ROTATED COORDINATES OF PROTEIN X all_coords = add_coords(all_rot, weighted_y_mean) s_all2 = nested_list(all_coords,i) z = z + 1 else: #print "Solution",t,":Alignment stopped after 1000 iterations, convergence was never reached" allrot_nocovg = transpose(s_all2) All_final_Coords.append(allrot_nocovg) iters = z - 1 #WRITE OUT WEIGHTED RMSD SOLUTION final_wSUM.append(w_metric[-1]) begin = begin + residue end = end + residue t = t + 1 else: end return final_wSUM,All_final_Coords #####HELPER FUNCTIONS##### def getCA_Resseq(filename,chain_key): x =[] parser=PDBParser() structure=parser.get_structure(filename.split('.')[0], filename) for model in structure.get_list(): chain_A = model[chain_key] for residue in chain_A.get_list(): if residue.has_id("CA"): resseq=residue.get_id()[1] x.append(resseq) return x def get_Disordered(filename,chain_key): x =[] parser=PDBParser() structure=parser.get_structure(filename.split('.')[0], filename) for model in structure.get_list(): chain_A = model[chain_key] for residue in chain_A.get_list(): if residue.is_disordered(): resseq = residue.get_id()[1] x.append(resseq) return x def remove_ResID(filename,list): result=[] result1=[] for value in filename: if value in list: result.append(value) else: result1.append(value) return result,result1 def compare(x,y): x_list = [] y_list = [] for each in x: if each not in y: x_list.append(each) for each in y: if each not in x: y_list.append(each) x_list = zero2(x_list) y_list = zero2(y_list) x_list = sort(x_list) y_list = sort(y_list) x_list = list(x_list) y_list = list(y_list) return x_list, y_list def zero2(x): result = [] for each in x: if each != 0: result.append(each) else: continue return result def get_Residue_Position(first,list): c = 0 q = len(list) result2 = [] while c < q: result = [] for each in first: result.append(each) if each != list[c]: continue else: break ans = len(result) ans = ans - 1 result2.append(ans) c = c + 1 return result2 def get_CAcoords(filename,chain_key): x =[] parser=PDBParser() structure=parser.get_structure(filename.split('.')[0], filename) for model in structure.get_list(): chain_A = model[chain_key] for residue in chain_A.get_list(): if residue.has_id("CA"): ca=residue["CA"] x.append(ca.get_coord()) x_t = transpose(x) return x_t ###THANK YOU TO MARK BENSON FOR UPDATING THIS HELPER FUNCTION### def zero_CAcoords(first,list): c = 0 q = len(list) r = len(first) s = r - q result = array([[s]]) import copy temp = copy.copy(first) if q != 0: result = copy.copy(first) for i in range(len(first)) : if i not in list : if i + c < r : temp[i] = first[i+c] else: c = c + 1 result[i] = [0,0,0] if i + c < r : temp[i] = first[i+c] a = temp[:s] return a else: return first def getAll_Coords(filename): result =[] parser=PDBParser() structure=parser.get_structure(filename.split('.')[0], filename) for model in structure.get_list(): chain = model.get_list() for each in chain: for res in each.get_list(): for x in res.get_list(): result.append(x.get_coord()) x_t = transpose(result) return x_t def getStructure(filename): parser = PDBParser() structure = parser.get_structure(filename.split('.')[0], filename) return structure def writeStructure(structure,filename): from Bio.PDB.PDBIO import PDBIO import sys io = PDBIO() io.set_structure(structure) io.save(filename) def setAll(structure,newCACoords): allCAs =[] for model in structure.get_list(): chain = model.get_list() for each in chain: for res in each.get_list(): for x in res.get_list(): allCAs.append(x) if len(allCAs) != len(newCACoords): print "wrong number of atoms .. structure had",len(allCAs),"you gave me",len(newCACoords) raise Exception("wrong number of atoms") for newCoords,ca in zip(newCACoords,allCAs): #print newCoords,ca.get_coord() ca.set_coord(newCoords) #print ca.get_coord() #raise Exception("time to stop") def mean(first,n): return [sum(each)/n for each in first] def translation(first,second): c = 0 k = [] q = len(first) while c < q: for each in first[c]: subtr = each - second[c] k.append(subtr) c = c + 1 return k def nested_list(name,n): first1 = name[0:n] first2 = name[n:2*n] first3 = name[2*n:3*n] first_translated =[first1,first2,first3] return first_translated def crossproduct(a,b): C_0 = a[1]*b[2] - a[2]*b[1] C_1 = a[2]*b[0] - a[0]*b[2] C_2 = a[0]*b[1] - a[1]*b[0] return [C_0, C_1, C_2] def normalize(a): B_0 = (a[0])/(((a[0]**2)+(a[1]**2)+((a[2])**2))**(1/2)) B_1 = (a[1])/(((a[0]**2)+(a[1]**2)+((a[2])**2))**(1/2)) B_2 = (a[2])/(((a[0]**2)+(a[1]**2)+((a[2])**2))**(1/2)) return [B_0, B_1, B_2] def rotation_matrix(first, second): U = matrixmultiply(first, second) return U def sqr(matrix): k = [] for each in matrix: sq = (each)**2 k.append(sq) return k def sqroot(matrix): j = [] for each in matrix: sqroot = sqrt(each) j.append(sqroot) return j def sRMSD(first,second): first = array(first) second = array(second) subtr = first - second def sqr(matrix): k = [] for each in matrix: sq = (each)**2 k.append(sq) return k subtr_s = sqr(subtr) sum_subtr_s = sum(subtr_s) def sqroot(matrix): j = [] for each in matrix: sqroot = sqrt(each) j.append(sqroot) return j d = sqroot(sum_subtr_s) sq_d = sqr(d) s_sq_d = sum(sq_d) tot = (len(first[0])) value = sqrt(s_sq_d/tot) return value def add_coords(first,second): c = 0 k = [] q = len(first) while c < q: for each in first[c]: add = each + second[c] k.append(add) c = c + 1 return k def weight_trans(first,weight,n): mult = multiply(first, weight) sum_mult = sum(mult) mean = [sum(each)/n for each in mult] return mean def weight(first,second,constant): first = array(first) second = array(second) subtr = first - second subtr_s = sqr(subtr) sum_subtr_s = sum(subtr_s) d = sqroot(sum_subtr_s) weighted_d = Gaussian(d,constant) weighted_d = Gaussian2(weighted_d) return weighted_d def wSUM(first,second,constant,atoms): first = array(first) second = array(second) subtr = first - second subtr_s = sqr(subtr) sum_subtr_s = sum(subtr_s) d = sqroot(sum_subtr_s) weighted_d = Gaussian(d,constant) weighted_d = Gaussian2(weighted_d) sum_weighted_d = sum(weighted_d) value = sum_weighted_d/atoms return value def wRMSD(first,second,constant): first = array(first) second = array(second) subtr = first - second subtr_s = sqr(subtr) sum_subtr_s = sum(subtr_s) d = sqroot(sum_subtr_s) weighted_d = Gaussian(d,constant) weights = Gaussian2(weighted_d) sq_d = sqr(d) wd = multiply(sq_d,weights) s_wd = sum(wd) n = len(d) s_sq_d_divide = s_wd/n value = sqrt(s_sq_d_divide) return value def Gaussian(first,z): value = [] for each in first: weight = (-((each)**2)/z) value.append(weight) return value def Gaussian2(first): value = [] for each in first: weight = exp(each) value.append(weight) return value def Compare_Solns(file,t): list = [t + 1] c = t + 1 while c < 10: SRMSD = sRMSD(transpose(file[c]),transpose(file[t])) c = c + 1 if SRMSD < 0.5: list.append(c) return list def Unique_Coords(file): t = 0 all_answer = [] while t < 9: if t == 0: answer = Compare_Solns(file,t) all_answer.append(answer) else: for each in all_answer: if t + 1 not in each: answer = Compare_Solns(file,t) else: answer = [0] break all_answer.append(answer) t = t + 1 for each in all_answer: if 10 not in each: all_answer.append([10]) break return all_answer def round_decimal(file): rounded_wSUM = [] for each in file: rounded_wSUM.append(round(each,4)) return rounded_wSUM def percent(file): catch = [] for each in file: a = each * 100 catch.append(a) return catch def Coord_Positions(file,coords,s,final_wSUM,title): new_list = [] all_wSUM = [] ALL_first = [] all_soln_coords = [] for each in file: if each != [0]: new_list.append(each) for num in new_list: first = num[0] ALL_first.append(first) for value in ALL_first: wSUM = final_wSUM[value -1] all_wSUM.append(wSUM) soln_coords = (coords[value - 1]) all_soln_coords.append(soln_coords) #Sort coords in order of %wSUM a = [(all_wSUM[k],all_soln_coords[k]) for k in range(len(all_soln_coords))] a.sort() a.reverse() all_wSUM = [b[0] for b in a] all_soln_coords = [b[1] for b in a] count = 1 #Write out weighted solns for sorted_coords in all_soln_coords: setAll(s,sorted_coords) writeStructure(s,'%s_wRMSD_%s.pdb'%(title[0],count)) count = count + 1 return all_wSUM def print_pos_soln(file1,title): n = len(file1) c = 0 while c < n: print title[0],"wRMSD",c+1,"corresponds to %wSUM of",file1[c],"%" c = c + 1 ##### HELPER FUNCTIONS IN ENSURE RESIDUE CORRESPONDENCE FUNCTION ##### ##### THIS HAS BEEN MODIFIED TO WORK WITH WRMSD CODE ##### # Copyright (C) 2002, Thomas Hamelryck (thamelry@vub.ac.be) # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. # Python stuff import sys from string import split from Numeric import array, Float0 # My stuff from Bio.PDB.StructureBuilder import StructureBuilder from Bio.PDB.PDBExceptions import PDBConstructionException from Bio.PDB.parse_pdb_header import _parse_pdb_header_list __doc__="Parser for PDB files." # If PDB spec says "COLUMNS 18-20" this means line[17:20] class PDBParser: """ Parse a PDB file and return a Structure object. """ def __init__(self, PERMISSIVE=0, get_header=0, structure_builder=None): """ The PDB parser call a number of standard methods in an aggregated StructureBuilder object. Normally this object is instanciated by the PDBParser object itself, but if the user provides his own StructureBuilder object, the latter is used instead. Arguments: o PERMISSIVE - int, if this is 0 exceptions in constructing the SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are caught, but some residues or atoms will be missing. THESE EXCEPTIONS ARE DUE TO PROBLEMS IN THE PDB FILE!. o structure_builder - an optional user implemented StructureBuilder class. """ if structure_builder!=None: self.structure_builder=structure_builder else: self.structure_builder=StructureBuilder() self.header=None self.trailer=None self.line_counter=0 self.PERMISSIVE=PERMISSIVE # Public methods def get_structure(self, id, file): """Return the structure. Arguments: o id - string, the id that will be used for the structure o file - name of the PDB file OR an open filehandle """ self.header=None self.trailer=None # Make a StructureBuilder instance (pass id of structure as parameter) self.structure_builder.init_structure(id) if isinstance(file, basestring): file=open(file) self._parse(file.readlines()) file.close() self.structure_builder.set_header(self.header) # Return the Structure instance return self.structure_builder.get_structure() def get_header(self): "Return the header." return self.header def get_trailer(self): "Return the trailer." return self.trailer # Private methods def _parse(self, header_coords_trailer): "Parse the PDB file." # Extract the header; return the rest of the file self.header, coords_trailer=self._get_header(header_coords_trailer) # Parse the atomic data; return the PDB file trailer self.trailer=self._parse_coordinates(coords_trailer) def _get_header(self, header_coords_trailer): "Get the header of the PDB file, return the rest." structure_builder=self.structure_builder for i in range(0, len(header_coords_trailer)): structure_builder.set_line_counter(i+1) line=header_coords_trailer[i] record_type=line[0:6] if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '): break header=header_coords_trailer[0:i] # Return the rest of the coords+trailer for further processing self.line_counter=i coords_trailer=header_coords_trailer[i:] header_dict=_parse_pdb_header_list(header) return header_dict, coords_trailer def _parse_coordinates(self, coords_trailer): "Parse the atomic data in the PDB file." local_line_counter=0 structure_builder=self.structure_builder current_model_id=0 # Flag we have an open model model_open=0 current_chain_id=None current_segid=None current_residue_id=None current_resname=None for i in range(0, len(coords_trailer)): line=coords_trailer[i] record_type=line[0:6] global_line_counter=self.line_counter+local_line_counter+1 structure_builder.set_line_counter(global_line_counter) if(record_type=='ATOM ' or record_type=='HETATM'): # Initialize the Model - there was no explicit MODEL record if not model_open: structure_builder.init_model(current_model_id) current_model_id+=1 model_open=1 fullname=line[12:16] # get rid of whitespace in atom names split_list=split(fullname) if len(split_list)!=1: # atom name has internal spaces, e.g. " N B ", so # we do not strip spaces name=fullname else: # atom name is like " CA ", so we can strip spaces name=split_list[0] altloc=line[16:17] resname=line[17:20] chainid=line[21:22] try: serial_number=int(line[6:11]) except: serial_number=0 resseq=int(split(line[22:26])[0]) # sequence identifier icode=line[26:27] # insertion code if record_type=='HETATM': # hetero atom flag if resname=="HOH" or resname=="WAT": hetero_flag="W" else: hetero_flag="H" else: hetero_flag=" " residue_id=(hetero_flag, resseq, icode) # atomic coordinates x=float(line[30:38]) y=float(line[38:46]) z=float(line[46:54]) coord=array((x, y, z), Float0) # occupancy & B factor occupancy=float(line[54:60]) bfactor=float(line[60:66]) segid=line[72:76] if current_segid!=segid: current_segid=segid structure_builder.init_seg(current_segid) if current_chain_id!=chainid: current_chain_id=chainid structure_builder.init_chain(current_chain_id) current_residue_id=residue_id current_resname=resname try: structure_builder.init_residue(resname, hetero_flag, resseq, icode) except PDBConstructionException, message: self._handle_PDB_exception(message, global_line_counter) elif current_residue_id!=residue_id or current_resname!=resname: current_residue_id=residue_id current_resname=resname try: structure_builder.init_residue(resname, hetero_flag, resseq, icode) except PDBConstructionException, message: self._handle_PDB_exception(message, global_line_counter) # init atom try: structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, fullname, serial_number) except PDBConstructionException, message: self._handle_PDB_exception(message, global_line_counter) elif(record_type=='ANISOU'): anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70])) # U's are scaled by 10^4 anisou_array=(array(anisou, Float0)/10000.0).astype(Float0) structure_builder.set_anisou(anisou_array) elif(record_type=='MODEL '): structure_builder.init_model(current_model_id) current_model_id+=1 model_open=1 current_chain_id=None current_residue_id=None elif(record_type=='END ' or record_type=='CONECT'): # End of atomic data, return the trailer self.line_counter=self.line_counter+local_line_counter return coords_trailer[local_line_counter:] elif(record_type=='ENDMDL'): model_open=0 current_chain_id=None current_residue_id=None elif(record_type=='SIGUIJ'): # standard deviation of anisotropic B factor siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70])) # U sigma's are scaled by 10^4 siguij_array=(array(siguij, Float0)/10000.0).astype(Float0) structure_builder.set_siguij(siguij_array) elif(record_type=='SIGATM'): # standard deviation of atomic positions sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66])) sigatm_array=array(sigatm, Float0) structure_builder.set_sigatm(sigatm_array) local_line_counter=local_line_counter+1 # EOF (does not end in END or CONECT) self.line_counter=self.line_counter+local_line_counter return [] def _handle_PDB_exception(self, message, line_counter): """ This method catches an exception that occurs in the StructureBuilder object (if PERMISSIVE==1), or raises it again, this time adding the PDB line number to the error message. """ message="%s at line %i." % (message, line_counter) if self.PERMISSIVE: # just print a warning - some residues/atoms will be missing print "PDBConstructionException: %s" % message print "Exception ignored.\nSome atoms or residues will be missing in the data structure." else: # exceptions are fatal - raise again with new message (including line nr) raise PDBConstructionException, message if __name__=="__main__": import sys p=PDBParser(PERMISSIVE=1) s=p.get_structure("scr", sys.argv[1]) for m in s.get_iterator(): p=m.get_parent() assert(p is s) for c in m.get_iterator(): p=c.get_parent() assert(p is m) for r in c.get_iterator(): p=r.get_parent() assert(p is c) for a in r.get_iterator(): p=a.get_parent() if not p is r: print p, r #RUN Local_wRMSD.py if __name__ == "__main__": if len(sys.argv) != 5: print "usage: Local_wRMSD.py Protein_X.pdb Protein_X_ChainID Protein_Y.pdb Protein_Y_ChainID" sys.exit() filename1 = sys.argv[1] filename2 = sys.argv[3] X_Chain_ID = sys.argv[2] Y_Chain_ID = sys.argv[4] run_Local_wRMSD(filename1,filename2,X_Chain_ID,Y_Chain_ID)