#!/usr/bin/env python

""" REQUIRED INSTALLATIONS:
	-Python 2.5
	-Scipy 0.5.2
	-NumPy 1.0.1
	-Biopython 1.42
	-Numerical 24.2 (install through Biopython website)
	-mxTextTools: http://www.egenix.com/files/python/mxTextTools.html

INPUT REQUIREMENTS:
	-2 PDB files
		PDB file must be in correct PDB format (i.e. chain ID's present, unique atom name within each residue, occupancy, etc...)

Local_wRMSD.py INFORMATION

	NOTE: Scaling factor is set to 2 for local alignments.

	TO RUN Local_wRMSD.py:

		Local_wRMSD.py Protein_1.pdb Protein_1_Chain_ID Protein_2.pdb Protein_2_Chain_ID

		Example:
		Local_wRMSD.py 3ERD.pdb A 3ERT.pdb A

		Example output:	
		Unique weighted solutions with a corresponding %wSUM


To run through cygwin:
	In c:cygwin\etc\profile
	
	Change PATH to local python (Python25):

	PATH=/usr/local/bin:/cygdrive/c/Python25:/usr/bin:/bin:/usr/X11R6/bin:$PATH
	export PATH

For questions or comments:
Kelly Damm
kdamm@umich.edu

University of Michigan
Carlson Lab """


#define global functions
from __future__ import division
import sys,re,cgi,os
from scipy import sort,transpose
import Numeric, LinearAlgebra
from Numeric import *

def run_Local_wRMSD(file1,file2,X_Chain_ID,Y_Chain_ID):
	#COMPARE RESIDUES FROM PROTEIN X AND PROTEIN Y BY CHAINS, REMOVES NONMATCHING RESIDUE COORDINATES AND RETURNS CA COORDINATES FROM MATCHING RESIDUES
	xlist,ylist = Ensure_Correspondence(X_Chain_ID,Y_Chain_ID,file1,file2)
	x = transpose(xlist)
	y = transpose(ylist)

	#RETURNS ALL COORDINATES OF PROTEIN X FOR TRANSFORMATION
	all = getAll_Coords(file1)
	title=(file1.split('.')[0], file1)

	#PERFORM STANDARD AND WEIGHTED RMSD CALCULATION
	final_wSUM,All_final_Coords = weighted_alignment(x,y,all,2)

	#DETERMINE UNIQUE LOCAL SOLUTIONS
	all_answer = Unique_Coords(All_final_Coords)
	s = getStructure(file1)	
	all_sorted_wSUM = Coord_Positions(all_answer,All_final_Coords,s,final_wSUM,title)
	rounded_wSUM = round_decimal(all_sorted_wSUM)
	solns = percent(rounded_wSUM)
	print_pos_soln(solns,title)


#ENSURE RESIDUE CORRESPONDENCE
def Ensure_Correspondence(X_Chain_ID,Y_Chain_ID,file1,file2):
	xlist =[]
	ylist =[]	
	x_CAResIDs= getCA_Resseq(file1, X_Chain_ID)
	y_CAResIDs= getCA_Resseq(file2, Y_Chain_ID)	

	#REMOVE DISORDERED RESIDUES
	x_disordered = get_Disordered(file1,X_Chain_ID)
	y_disordered = get_Disordered(file2,Y_Chain_ID)

	xremove_disorder,x_ResSeq1 = remove_ResID(x_CAResIDs,x_disordered)	
	yremove_disorder,y_ResSeq1 = remove_ResID(y_CAResIDs,y_disordered)

	#COMPARE PROTEIN1 TO PROTEIN2 AND REMOVE NONMATCHING RESIDUES
	x_Nonmatch, y_Nonmatch = compare(x_ResSeq1,y_ResSeq1)

	#MAKE LIST OF ALL RESIDUES TO BE REMOVED
	x_Remove = x_disordered + x_Nonmatch
	x_Remove.sort()
	y_Remove = y_disordered + y_Nonmatch
	y_Remove.sort()

	#DETERMINE POSITION OF RESIDUES TO BE REMOVED IN PDB FILES
	x_Res_positions = get_Residue_Position(x_CAResIDs, x_Remove)
	y_Res_positions = get_Residue_Position(y_CAResIDs, y_Remove)

	#RETURNS ALL CA COORDINATES
	x_CAcoords = transpose(get_CAcoords(file1,X_Chain_ID))
	y_CAcoords = transpose(get_CAcoords(file2,Y_Chain_ID))

	#Removes CA coordinates of nonmatching residues
	xlist = zero_CAcoords(x_CAcoords,x_Res_positions)
	ylist = zero_CAcoords(y_CAcoords,y_Res_positions)

	#FINAL CHECK TO ENSURE RESIDUE CORRESPONDENCE
	n = len(xlist)
	j = len(ylist)
	if n != j:
		sys.exit("Proteins do not have same number of atoms; Protein X has",n,"atoms, while Protein Y has",j,"atoms")
		
	#CHECK TO DETERMINE IF APPROPRIATE NUMBER OF COORDINATES PRESENT
	if (len(xlist[0]) != 3): 
		sys.exit("Protein X does not have a 3xn atom coordinate set")
	if (len(ylist[0]) != 3):
		sys.exit("Protein Y does not have a 3xn atom coordinate set")

	#CHECK TO DETERMINE IF >4 COORDINATES PRESENT FOR EACH PROTEIN
	if n < 4:
		sys.exit("Protein X has 3 or less coordinates, 4 or more needed to perform alignment")
	if j < 4:
		sys.exit("Protein Y has 3 or less coordinates, 4 or more needed to perform alignment")
	return xlist,ylist


##WEIGHTED RMSD ALIGNMENT##
def weighted_alignment(x,y,all,scaling_factor):
	All_final_Coords = []
	
	#TOTAL NUMBER OF ATOMS
	atoms = len(x[0])
	residue = int(atoms * (.10))
	final_wSUM = [] 	

	#10 LOCAL STANDARD ALIGNMENTS (t = counter, goes through 10 iterations; begin = first residue in local section; end = last residue in local section)
	t = 1
	begin = 0
	end = 10
	while t < 11:
		x_domain = [coords[begin:end] for coords in x]
		y_domain = [coords[begin:end] for coords in y]
		n = len(x_domain[0])

	#TRANSLATE PROTEINS X AND Y TO CENTER
		x_mean = mean(x_domain,n)
		y_mean = mean(y_domain,n)
		x_trans = translation(x_domain, x_mean)
		x_translated = nested_list(x_trans,n)
		y_trans = translation(y_domain, y_mean)
		y_translated = nested_list(y_trans,n)
		x_transpose = transpose(x_translated)

		#CALCULATE COVARIANCE MATRIX (y_translated *x_translated^t)
		R = matrixmultiply(y_translated, x_transpose)
		R_transpose = transpose(R)
		R2 = matrixmultiply(R_transpose, R)

		#DETERMINE THE EIGENVECTORS AND EIGENVALUES of R2 
		mu,A = LinearAlgebra.eigenvectors(R2)

		#SORT EIGENVECTORS IN DECREASING ORDER OF EIGENVALUES
		a = [(mu[i],A[i]) for i in range(len(A))]
		a.sort()
		a.reverse()
		mu = [s[0] for s in a]
		A = [s[1] for s in a]

		#DETERMINE RIGHT-HANDED SYSTEM
		A_3 = crossproduct(A[0], A[1])
		A = [A[0], A[1], A_3]

		#CALCULATE B, NORMALIZED PRODUCT OF (RxA)
		B_1 = matrixmultiply(R, A[0])
		B_2 = matrixmultiply(R, A[1])
		norm_B_1 = normalize(B_1)
		norm_B_2 = normalize(B_2)
		norm_B_3 = crossproduct(norm_B_1,norm_B_2)
		B = [norm_B_1,norm_B_2,norm_B_3]
		B_transpose = transpose(B)

		#CALCULATE ROTATION MATRIX, U 
		U = rotation_matrix(B_transpose, A)

		#TRANSLATE ALL COORDINATES OF PROTEIN X
		s_all_trans = translation(all, x_mean)
		j = len(all[0])
		s_all_translated = nested_list(s_all_trans,j)
		s_all_rot = matrixmultiply(U,s_all_translated)
	
		#ADD MEAN VALUES OF PROTEIN Y TO ROTATED COORDINATES OF PROTEIN X
		s_all_coords = add_coords(s_all_rot, y_mean)
		s_all2 = nested_list(s_all_coords,j)
		s_allrot = transpose(s_all2)

		#TRANSFORM CA COORDINATES ONLY
		g = len(x[0])
		s_x_ca_trans = translation(x, x_mean)
		s_x_ca_translated = nested_list(s_x_ca_trans,g)
		s_x_ca_rot = matrixmultiply(U,s_x_ca_translated)
		s_x_ca_coords = add_coords(s_x_ca_rot, y_mean)
		xt = nested_list(s_x_ca_coords,g)
		S_RMSD = sRMSD(xt, y)

		#AFTER LOCAL STANDARD ALIGNMENT, WEIGHTED ALIGNMENT PERFORMED USING ENTIRE PROTEIN
		#z = number of iterations
		z = 1
		weighted_rmsds = []
		w_metric = []
		all_list = []

		#DETERMINE APPROPRIATE SCALING FACTOR
		while z < 501:
			p = len(xt[0])

			#TRANSLATE WEIGHTED CENTROIDS TO ORIGIN
			#CALCULATE WEIGHTS (protein1, protein2, scaling_factor)
			weights = weight(xt,y,scaling_factor)
			weighted_x_mean = weight_trans(xt,weights,p)
			weighted_y_mean = weight_trans(y,weights,p)

			#USING NEW WEIGHTED TRANSLATION
			w_x_trans = translation(xt, weighted_x_mean)
			w_y_trans = translation(y, weighted_y_mean)
			w_x_translated = nested_list(w_x_trans,p)
			w_y_translated = nested_list(w_y_trans,p)

			#CALCULATE WEIGHTED COVARIENCE MATRIX
			weighted_rot = weight(w_x_translated,w_y_translated,scaling_factor)
			weighted_x_translated = multiply(weighted_rot,w_x_translated)
			wx_transpose = transpose(weighted_x_translated)
			R = matrixmultiply(w_y_translated,wx_transpose)
			R_transpose = transpose(R)
			R2 = matrixmultiply(R_transpose,R)

			#DETERMINE THE EIGENVECTORS AND EIGENVALUES of R2 
			mu,A = LinearAlgebra.eigenvectors(R2)

			#SORT EIGENVECTORS IN DECREASING ORDER OF EIGENVALUES
			a = [(mu[k],A[k]) for k in range(len(A))]
			a.sort()
			a.reverse()
			mu = [b[0] for b in a]
			A = [b[1] for b in a]

			#DETERMINE RIGHT-HANDED SYSTEM
			A_3 = crossproduct(A[0], A[1])
			A = [A[0], A[1], A_3]

			#CALCULATE B, NORMALIZED PRODUCT OF (RxA)
			B_1 = matrixmultiply(R, A[0])
			B_2 = matrixmultiply(R, A[1])
			norm_B_1 = normalize(B_1)
			norm_B_2 = normalize(B_2)
			norm_B_3 = crossproduct(norm_B_1,norm_B_2)
			B = [norm_B_1,norm_B_2,norm_B_3]
			B_transpose = transpose(B)

			#CALCULATE WEIGHTED ROTATION MATRIX, U
			U = rotation_matrix(B_transpose, A)			
			w_x_rot = matrixmultiply(U,w_x_translated)

			#CALCULATE WEIGHTED RMSD
			weighted_rmsds.append(wRMSD(w_x_rot, w_y_translated,scaling_factor))

			#CALCULATE %wSUM
			w_metric.append(wSUM(w_x_rot, w_y_translated,scaling_factor,p))

			#DETERMINE IF CONVERGENCE IS REACHED
			if z > 1:
				wRMSD_diff = w_metric[-2] - w_metric[-1]

			else:
				wRMSD_diff = []

			if 0 < wRMSD_diff < 0.000001:
				#TRANSFORM ALL COORDINATES OF PROTEIN X
				w_all_trans = translation(s_all2, weighted_x_mean)
				i = len(all[0])
				w_all_translated = nested_list(w_all_trans,i)
				w_all_rot = matrixmultiply(U,w_all_translated)

				#ADD ALL MEAN VALUES OF PROTEIN Y TO ALL ROTATED COORDINATES OF PROTEIN X
				w_all_coords = add_coords(w_all_rot, weighted_y_mean)
				w_all = nested_list(w_all_coords,i)
				w_allrot = transpose(w_all)
				All_final_Coords.append(w_allrot)
				iters = z - 1

				#WRITE OUT WEIGHTED RMSD SOLUTION
				final_wSUM.append(w_metric[-1])
				begin = begin + residue
				end = end + residue
				t = t + 1
				break

			else:
				#ADD MEAN VALUES OF PROTEIN Y TO ROTATED COORDINATES OF PROTEIN X
				w_x_coords = add_coords(w_x_rot, weighted_y_mean)
				xt = nested_list(w_x_coords,p) 

				#TRANSFORM ALL COORDINATES OF PROTEIN X
				all_trans = translation(s_all2, weighted_x_mean)
				i = len(all[0])
				all_translated = nested_list(all_trans,i)
				all_rot = matrixmultiply(U,all_translated)

				#ADD ALL MEAN VALUES OF PROTEIN Y TO ALL ROTATED COORDINATES OF PROTEIN X
				all_coords = add_coords(all_rot, weighted_y_mean)
				s_all2 = nested_list(all_coords,i)
				z = z + 1
		else:	
			#print "Solution",t,":Alignment stopped after 1000 iterations, convergence was never reached"
			allrot_nocovg = transpose(s_all2)
			All_final_Coords.append(allrot_nocovg)
			iters = z - 1

			#WRITE OUT WEIGHTED RMSD SOLUTION
			final_wSUM.append(w_metric[-1])
			begin = begin + residue
			end = end + residue
			t = t + 1	
	else:
		end
	return final_wSUM,All_final_Coords


#####HELPER FUNCTIONS#####
def getCA_Resseq(filename,chain_key):
	x =[]
	parser=PDBParser()
	structure=parser.get_structure(filename.split('.')[0], filename)
	for model in structure.get_list():
		chain_A = model[chain_key]
		for residue in chain_A.get_list():
			if residue.has_id("CA"):
				resseq=residue.get_id()[1]
				x.append(resseq)
	return x	

def get_Disordered(filename,chain_key):
	x =[]
	parser=PDBParser()
	structure=parser.get_structure(filename.split('.')[0], filename)
	for model in structure.get_list():
		chain_A = model[chain_key]
		for residue in chain_A.get_list():
			if residue.is_disordered():
				resseq = residue.get_id()[1]
				x.append(resseq)
	return x

def remove_ResID(filename,list):
	result=[]
	result1=[]
	for value in filename:
		if value in list:
			result.append(value)
		else:
			result1.append(value)
	return result,result1

def compare(x,y):
	x_list = []
	y_list = []
    	for each in x:
    		if each not in y:
			x_list.append(each)
	for each in y:
        	if each not in x:
           		y_list.append(each)
	x_list = zero2(x_list)
	y_list = zero2(y_list)
	x_list = sort(x_list)
	y_list = sort(y_list)
	x_list = list(x_list)
	y_list = list(y_list)
	return x_list, y_list

def zero2(x):
	result = []
	for each in x:
		if each != 0:
			result.append(each)
		else:
			continue
	return result 

def get_Residue_Position(first,list):
	c = 0
	q = len(list)
	result2 = []
	while c < q:
		result = []
		for each in first:
			result.append(each)
			if each != list[c]:
				continue
			else:
				break
		ans = len(result)
		ans = ans - 1
		result2.append(ans)
		c = c + 1
	return result2

def get_CAcoords(filename,chain_key):
	x =[]
	parser=PDBParser()
	structure=parser.get_structure(filename.split('.')[0], filename)
	for model in structure.get_list():
		chain_A = model[chain_key]
		for residue in chain_A.get_list():
			if residue.has_id("CA"):
				ca=residue["CA"]
				x.append(ca.get_coord())
	x_t  = transpose(x)
	return x_t

###THANK YOU TO MARK BENSON FOR UPDATING THIS HELPER FUNCTION###
def zero_CAcoords(first,list):
        c = 0
        q = len(list)
        r = len(first)
        s = r - q
        result = array([[s]])
        import copy
        temp = copy.copy(first)
        if q != 0:
		result = copy.copy(first)
		for i in range(len(first)) :
			if i not in list :
				if i + c < r :
					temp[i] = first[i+c]
			else:
				c = c + 1

				result[i] = [0,0,0]
				if i + c < r :
					temp[i] = first[i+c]
		a = temp[:s]	
		return a
        else:
              return first

def getAll_Coords(filename):
	result =[]
	parser=PDBParser()
	structure=parser.get_structure(filename.split('.')[0], filename)
	for model in structure.get_list():
		chain = model.get_list()
		for each in chain:
			for res in each.get_list():
				for x in res.get_list():
					result.append(x.get_coord())
	x_t = transpose(result)
	return x_t

def getStructure(filename):
	parser = PDBParser()
	structure = parser.get_structure(filename.split('.')[0], filename)
	return structure

def writeStructure(structure,filename):
	from Bio.PDB.PDBIO import PDBIO
	import sys
	io = PDBIO()
	io.set_structure(structure)
	io.save(filename)	

def setAll(structure,newCACoords):
	allCAs =[]
	for model in structure.get_list():
		chain = model.get_list()
		for each in chain:
			for res in each.get_list():
				for x in res.get_list():
					allCAs.append(x)
	if len(allCAs) != len(newCACoords):
		print "wrong number of atoms .. structure had",len(allCAs),"you gave me",len(newCACoords)
		raise Exception("wrong number of atoms")
	for newCoords,ca in zip(newCACoords,allCAs):
		#print newCoords,ca.get_coord()
		ca.set_coord(newCoords)
		#print ca.get_coord()
		#raise Exception("time to stop")

def mean(first,n):
	return [sum(each)/n for each in first]

def translation(first,second):
	c = 0
	k = []
	q = len(first)
	while c < q:
		for each in first[c]:
			subtr = each - second[c]
			k.append(subtr)
		c = c + 1
	return k

def nested_list(name,n):
	first1 = name[0:n]
	first2 = name[n:2*n]
	first3 = name[2*n:3*n]
	first_translated =[first1,first2,first3]
	return first_translated

def crossproduct(a,b):
	C_0 = a[1]*b[2] - a[2]*b[1]
	C_1 = a[2]*b[0] - a[0]*b[2]
	C_2 = a[0]*b[1] - a[1]*b[0]
	return [C_0, C_1, C_2]
	
def normalize(a):
	B_0 = (a[0])/(((a[0]**2)+(a[1]**2)+((a[2])**2))**(1/2))
	B_1 = (a[1])/(((a[0]**2)+(a[1]**2)+((a[2])**2))**(1/2))
	B_2 = (a[2])/(((a[0]**2)+(a[1]**2)+((a[2])**2))**(1/2))
	return [B_0, B_1, B_2]

def rotation_matrix(first, second):
	U = matrixmultiply(first, second)
	return U

def sqr(matrix):
	k = []
	for each in matrix:
		sq = (each)**2
		k.append(sq)
	return k

def sqroot(matrix):
	j = []
	for each in matrix:
		sqroot = sqrt(each)
		j.append(sqroot)
	return j

def sRMSD(first,second):
	first = array(first)
	second = array(second)
	subtr = first - second
	def sqr(matrix):
		k = []
		for each in matrix:
			sq = (each)**2
			k.append(sq)
		return k
	subtr_s = sqr(subtr)
	sum_subtr_s = sum(subtr_s)
	def sqroot(matrix):
		j = []
		for each in matrix:
			sqroot = sqrt(each)
			j.append(sqroot)
		return j
	d = sqroot(sum_subtr_s)
	sq_d = sqr(d)
	s_sq_d = sum(sq_d)
	tot = (len(first[0]))
	value = sqrt(s_sq_d/tot)
	return value

def add_coords(first,second):
	c = 0
	k = []
	q = len(first)
	while c < q:
		for each in first[c]:
			add = each + second[c]
			k.append(add)
		c = c + 1
	return k

def weight_trans(first,weight,n):
	mult = multiply(first, weight)
	sum_mult = sum(mult)
	mean = [sum(each)/n for each in mult]
	return mean

def weight(first,second,constant):
	first = array(first)
	second = array(second)
	subtr = first - second
	subtr_s = sqr(subtr)
	sum_subtr_s = sum(subtr_s)
	d = sqroot(sum_subtr_s)
	weighted_d = Gaussian(d,constant)
	weighted_d = Gaussian2(weighted_d)
	return weighted_d

def wSUM(first,second,constant,atoms):
	first = array(first)
	second = array(second)
	subtr = first - second
	subtr_s = sqr(subtr)
	sum_subtr_s = sum(subtr_s)
	d = sqroot(sum_subtr_s)
	weighted_d = Gaussian(d,constant)
	weighted_d = Gaussian2(weighted_d)
	sum_weighted_d = sum(weighted_d)
	value = sum_weighted_d/atoms
	return value

def wRMSD(first,second,constant):
	first = array(first)
	second = array(second)
	subtr = first - second
	subtr_s = sqr(subtr)
	sum_subtr_s = sum(subtr_s)
	d = sqroot(sum_subtr_s)
	weighted_d = Gaussian(d,constant)
	weights = Gaussian2(weighted_d)
	sq_d = sqr(d)
	wd = multiply(sq_d,weights)
	s_wd = sum(wd)
	n = len(d)
	s_sq_d_divide = s_wd/n
	value = sqrt(s_sq_d_divide)
	return value

def Gaussian(first,z):
	value = []
	for each in first:
		weight = (-((each)**2)/z)
		value.append(weight)
	return value	

def Gaussian2(first):
	value = []
	for each in first:
		weight = exp(each)
		value.append(weight)
	return value

def Compare_Solns(file,t):	
	list = [t + 1]
	c = t + 1
	while c < 10:
		SRMSD = sRMSD(transpose(file[c]),transpose(file[t]))
		c = c + 1
		if SRMSD < 0.5:
			list.append(c)
	return list

def Unique_Coords(file):	
	t = 0
	all_answer = []
	while t < 9:
		if t == 0:
			answer = Compare_Solns(file,t)
			all_answer.append(answer)
		else:
			for each in all_answer:
				if t + 1 not in each:
					answer = Compare_Solns(file,t)
				else:
					answer = [0]
					break
			all_answer.append(answer)
		t = t + 1
	for each in all_answer:
		if 10 not in each:
			all_answer.append([10])
			break
	return all_answer
	
def round_decimal(file):
	rounded_wSUM = []
	for each in file:
		rounded_wSUM.append(round(each,4))
	return rounded_wSUM

def percent(file):
	catch = []
	for each in file:
		a = each * 100
		catch.append(a)
	return catch

def Coord_Positions(file,coords,s,final_wSUM,title):
	new_list = []
	all_wSUM = []
	ALL_first = []
	all_soln_coords = []
	for each in file:
		if each != [0]:
			new_list.append(each)
	for num in new_list:
		first = num[0]
		ALL_first.append(first)
	for value in ALL_first:
		wSUM = final_wSUM[value -1]
		all_wSUM.append(wSUM)
		soln_coords = (coords[value - 1])
		all_soln_coords.append(soln_coords)
	#Sort coords in order of %wSUM
	a = [(all_wSUM[k],all_soln_coords[k]) for k in range(len(all_soln_coords))]
	a.sort()
	a.reverse()
	all_wSUM = [b[0] for b in a]
	all_soln_coords = [b[1] for b in a]
	count = 1
	#Write out weighted solns
	for sorted_coords in all_soln_coords:
		setAll(s,sorted_coords)
		writeStructure(s,'%s_wRMSD_%s.pdb'%(title[0],count))
		count = count + 1
	return all_wSUM

def print_pos_soln(file1,title):
	n = len(file1)
	c = 0
	while c < n:
		print title[0],"wRMSD",c+1,"corresponds to %wSUM of",file1[c],"%"
		c = c + 1


##### HELPER FUNCTIONS IN ENSURE RESIDUE CORRESPONDENCE FUNCTION #####
##### THIS HAS BEEN MODIFIED TO WORK WITH WRMSD CODE #####
# Copyright (C) 2002, Thomas Hamelryck (thamelry@vub.ac.be)
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.  


# Python stuff
import sys
from string import split
from Numeric import array, Float0

# My stuff
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.PDBExceptions import PDBConstructionException
from Bio.PDB.parse_pdb_header import _parse_pdb_header_list

__doc__="Parser for PDB files."


# If PDB spec says "COLUMNS 18-20" this means line[17:20]


class PDBParser:
    """
    Parse a PDB file and return a Structure object.
    """

    def __init__(self, PERMISSIVE=0, get_header=0, structure_builder=None):
        """
        The PDB parser call a number of standard methods in an aggregated
        StructureBuilder object. Normally this object is instanciated by the
        PDBParser object itself, but if the user provides his own StructureBuilder
        object, the latter is used instead.

        Arguments:
        o PERMISSIVE - int, if this is 0 exceptions in constructing the
        SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are 
        caught, but some residues or atoms will be missing. THESE EXCEPTIONS 
        ARE DUE TO PROBLEMS IN THE PDB FILE!.
        o structure_builder - an optional user implemented StructureBuilder class. 
        """
        if structure_builder!=None:
            self.structure_builder=structure_builder
        else:
            self.structure_builder=StructureBuilder()
        self.header=None
        self.trailer=None
        self.line_counter=0
        self.PERMISSIVE=PERMISSIVE

    # Public methods

    def get_structure(self, id, file):
        """Return the structure.

        Arguments:
        o id - string, the id that will be used for the structure
        o file - name of the PDB file OR an open filehandle
        """
        self.header=None
        self.trailer=None
        # Make a StructureBuilder instance (pass id of structure as parameter)
        self.structure_builder.init_structure(id)
        if isinstance(file, basestring):
            file=open(file)
        self._parse(file.readlines())
        file.close()
        self.structure_builder.set_header(self.header)
        # Return the Structure instance
        return self.structure_builder.get_structure()

    def get_header(self):
        "Return the header."
        return self.header

    def get_trailer(self):
        "Return the trailer."
        return self.trailer

    # Private methods
    
    def _parse(self, header_coords_trailer):
        "Parse the PDB file."
        # Extract the header; return the rest of the file
        self.header, coords_trailer=self._get_header(header_coords_trailer)
        # Parse the atomic data; return the PDB file trailer
        self.trailer=self._parse_coordinates(coords_trailer)
    
    def _get_header(self, header_coords_trailer):
        "Get the header of the PDB file, return the rest."
        structure_builder=self.structure_builder
        for i in range(0, len(header_coords_trailer)):
            structure_builder.set_line_counter(i+1)
            line=header_coords_trailer[i]
            record_type=line[0:6] 
            if(record_type=='ATOM  ' or record_type=='HETATM' or record_type=='MODEL '):
                break
        header=header_coords_trailer[0:i]
        # Return the rest of the coords+trailer for further processing
        self.line_counter=i
        coords_trailer=header_coords_trailer[i:]
        header_dict=_parse_pdb_header_list(header)
        return header_dict, coords_trailer
    
    def _parse_coordinates(self, coords_trailer):
        "Parse the atomic data in the PDB file."
        local_line_counter=0
        structure_builder=self.structure_builder
        current_model_id=0
        # Flag we have an open model
        model_open=0
        current_chain_id=None
        current_segid=None
        current_residue_id=None
        current_resname=None
        for i in range(0, len(coords_trailer)):
            line=coords_trailer[i]
            record_type=line[0:6]
            global_line_counter=self.line_counter+local_line_counter+1
            structure_builder.set_line_counter(global_line_counter)
            if(record_type=='ATOM  ' or record_type=='HETATM'):
                # Initialize the Model - there was no explicit MODEL record
                if not model_open:
                    structure_builder.init_model(current_model_id)
                    current_model_id+=1
                    model_open=1
                fullname=line[12:16]
                # get rid of whitespace in atom names
                split_list=split(fullname)
                if len(split_list)!=1:
                    # atom name has internal spaces, e.g. " N B ", so
                    # we do not strip spaces
                    name=fullname
                else:
                    # atom name is like " CA ", so we can strip spaces
                    name=split_list[0]
                altloc=line[16:17]
                resname=line[17:20]
                chainid=line[21:22]
                try:
                    serial_number=int(line[6:11])
                except:
                    serial_number=0
                resseq=int(split(line[22:26])[0])   # sequence identifier   
                icode=line[26:27]           # insertion code
                if record_type=='HETATM':       # hetero atom flag
                    if resname=="HOH" or resname=="WAT":
                        hetero_flag="W"
                    else:
                        hetero_flag="H"
                else:
                    hetero_flag=" "
                residue_id=(hetero_flag, resseq, icode)
                # atomic coordinates
                x=float(line[30:38]) 
                y=float(line[38:46]) 
                z=float(line[46:54])
                coord=array((x, y, z), Float0)
                # occupancy & B factor
                occupancy=float(line[54:60])
                bfactor=float(line[60:66])
                segid=line[72:76]
                if current_segid!=segid:
                    current_segid=segid
                    structure_builder.init_seg(current_segid)
                if current_chain_id!=chainid:
                    current_chain_id=chainid
                    structure_builder.init_chain(current_chain_id)
                    current_residue_id=residue_id
                    current_resname=resname
                    try:
                        structure_builder.init_residue(resname, hetero_flag, resseq, icode)
                    except PDBConstructionException, message:
                        self._handle_PDB_exception(message, global_line_counter)
                elif current_residue_id!=residue_id or current_resname!=resname:
                    current_residue_id=residue_id
                    current_resname=resname
                    try:
                        structure_builder.init_residue(resname, hetero_flag, resseq, icode)
                    except PDBConstructionException, message:
                        self._handle_PDB_exception(message, global_line_counter) 
                # init atom
                try:
                    structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, fullname, serial_number)
                except PDBConstructionException, message:
                    self._handle_PDB_exception(message, global_line_counter)
            elif(record_type=='ANISOU'):
                anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70]))
                # U's are scaled by 10^4 
                anisou_array=(array(anisou, Float0)/10000.0).astype(Float0)
                structure_builder.set_anisou(anisou_array)
            elif(record_type=='MODEL '):
                structure_builder.init_model(current_model_id)
                current_model_id+=1
                model_open=1
                current_chain_id=None
                current_residue_id=None
            elif(record_type=='END   ' or record_type=='CONECT'):
                # End of atomic data, return the trailer
                self.line_counter=self.line_counter+local_line_counter
                return coords_trailer[local_line_counter:]
            elif(record_type=='ENDMDL'):
                model_open=0
                current_chain_id=None
                current_residue_id=None
            elif(record_type=='SIGUIJ'):
                # standard deviation of anisotropic B factor
                siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70]))
                # U sigma's are scaled by 10^4
                siguij_array=(array(siguij, Float0)/10000.0).astype(Float0)   
                structure_builder.set_siguij(siguij_array)
            elif(record_type=='SIGATM'):
                # standard deviation of atomic positions
                sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66]))
                sigatm_array=array(sigatm, Float0)
                structure_builder.set_sigatm(sigatm_array)
            local_line_counter=local_line_counter+1
        # EOF (does not end in END or CONECT)
        self.line_counter=self.line_counter+local_line_counter
        return []

    def _handle_PDB_exception(self, message, line_counter):
        """
        This method catches an exception that occurs in the StructureBuilder
        object (if PERMISSIVE==1), or raises it again, this time adding the 
        PDB line number to the error message.
        """
        message="%s at line %i." % (message, line_counter)
        if self.PERMISSIVE:
            # just print a warning - some residues/atoms will be missing
            print "PDBConstructionException: %s" % message
            print "Exception ignored.\nSome atoms or residues will be missing in the data structure."
        else:
            # exceptions are fatal - raise again with new message (including line nr)
            raise PDBConstructionException, message

if __name__=="__main__":

	import sys

	p=PDBParser(PERMISSIVE=1)

	s=p.get_structure("scr", sys.argv[1])

	for m in s.get_iterator():
		p=m.get_parent()
		assert(p is s)
		for c in m.get_iterator():
			p=c.get_parent()
			assert(p is m)
			for r in c.get_iterator():
				p=r.get_parent()
				assert(p is c)
				for a in r.get_iterator():
					p=a.get_parent()
					if not p is r:
						print p, r


#RUN Local_wRMSD.py 
if __name__ == "__main__":
	if len(sys.argv) != 5:
    		print "usage: Local_wRMSD.py Protein_X.pdb Protein_X_ChainID Protein_Y.pdb Protein_Y_ChainID" 
		sys.exit()
    	filename1 = sys.argv[1]
   	filename2 = sys.argv[3]
	X_Chain_ID = sys.argv[2]
	Y_Chain_ID = sys.argv[4]
	run_Local_wRMSD(filename1,filename2,X_Chain_ID,Y_Chain_ID)