16 April 2014

572. autorotate/superimpose python script

If you want to calculate reaction coordinates between two structures you need to make sure that the structures haven't been rotated or translated, something which easily happens if you allow symmetry in gaussian and (it seems) z-matrix in nwchem.

I've written a script that lets you take two structures and align and superimpose them so that only the atoms that take part in the reaction move.

It works by you defining a minimum of four atoms that aren't supposed to move /relative to each other/ (i.e. they can be translated/rotated --- just not relative to each other) between the two structures. Four atoms far from each other are ideal. You need to make sure that they also don't lie in the same plane, but form a three-dimensional space.

I've tried this on real molecules too and it works better than I'd ever dared to hope for. The more 'stationary' atoms that you can use to make up the transformation matrix, the better.


Example:
In this example atom F is in a different location in structures a and b. The structures have also been rotated relative to each other.

a.xyz
6 A A 0.00000 0.00000 1.00000 B 0.00000 1.00000 0.00000 C 1.00000 0.00000 0.00000 D -1.00000 0.00000 0.00000 E 0.00000 0.00000 -1.00000 F 0.00000 0.500000 0.00000
b.xyz
6 B A 1 0 0 B 0 1 0 C 0 0 -1 D 0 0 1 E -1 0 0 F 0 -1 0

./autorotate.py a.xyz b.xyz '1,2,3,4'
Selected atoms in molecules 1 and 2 ['A', 0.0, 0.0, 1.0] ['A', 1.0, 0.0, 0.0] ['B', 0.0, 1.0, 0.0] ['B', 0.0, 1.0, 0.0] ['C', 1.0, 0.0, 0.0] ['C', 0.0, 0.0, -1.0] ['D', -1.0, 0.0, 0.0] ['D', 0.0, 0.0, 1.0] Transformation max error: 3.33066907388e-16 Writing to a.rot.xyz

a.rot.xyz
6 A A 1.00000 0.00000 0.00000 B -0.00000 1.00000 -0.00000 C -0.00000 0.00000 -1.00000 D -0.00000 0.00000 1.00000 E -1.00000 -0.00000 -0.00000 F -0.00000 0.50000 -0.00000
This is how it looks (note that the axis aren't aligned with (1,0,0; 0,1,0; 0,0,1) but seem to go through the centre of the molecule):
a.xyz
a.rot.xyz


b.xyz





Code:
#!/usr/bin/python
import sys
import numpy as np

#autorotate input_1.xyz input_2.xyz '1,2,3,4'
# need to pick at least four atoms that are not in the same plane
# input_1.xyz will be rotated to align with input_2.xyz
# you pick at least four atoms that should have the same positions
# relative to one another (i.e. distance and relative geometry). These 
# are then used to calculate an affine transform matrix which is used 
# to rotate and translate structure input_1.xyz to overlap with 
# structure 2

def formatinput(argument):
 infile1=sys.argv[1]
 atoms=sys.argv[3]
 atoms=atoms.split(',')
 coord_sys=[]

 for n in atoms:
  coord_sys+=[int(n)-1]
 try:
  infile2=sys.argv[2]
 except:
  infile2=''
 infile=[infile1,infile2]
 return infile,coord_sys
 
def getrawdata(infile):
 f=open(infile,'r')
 
 n=0
 preamble=[]
 
 struct=[]
 
 for line in f:
  if n<2: data-blogger-escaped-if="" data-blogger-escaped-line.rstrip="" data-blogger-escaped-n="" data-blogger-escaped-preamble="">1:
   line=line.rstrip()
   struct+=[line]
  n+=1
 xyz=[struct]
 
 return xyz, preamble

def getcoords(rawdata,preamble,atoms):
 
 n=0
 cartesian=[]
 
 for structure in rawdata:
  n=n+1
  num="%03d" % (n,)
  for item in structure:
   
   coordx=filter(None,item.split(' '))
   coordy=filter(None,item.split('\t'))
   if len(coordx)>len(coordy):
    coords=coordx
   else:
    coords=coordy
      
   coordinates=[float(coords[1]),float(coords[2]),float(coords[3])]
   element=coords[0]
   cartesian+=[[element,float(coords[1]),float(coords[2]),float(coords[3])]]
     
 return cartesian

def getstructures(rawdata,preamble):
 
 n=0
 cartesian=[]
 
 for structure in rawdata:
  n=n+1
  num="%03d" % (n,)
  for item in structure:
   
   coordx=filter(None,item.split(' '))
   coordy=filter(None,item.split('\t'))
   if len(coordx)>len(coordy):
    coords=coordx
   else:
    coords=coordy
      
   coordinates=[float(coords[1]),float(coords[2]),float(coords[3])]
   element=coords[0]
   cartesian+=[coordinates]
     
 return cartesian

def affine_transform(atoms,structures):
# from http://stackoverflow.com/questions/20546182/how-to-perform-coordinates-affine-transformation-using-python-part-2
 primaryatomcoords=[]
 for n in atoms:
  primaryatomcoords+=[structures[0][n]]

 secondaryatomcoords=[]
 for n in atoms:
  secondaryatomcoords+=[structures[1][n]]

 primary = np.array(primaryatomcoords)
 secondary = np.array(secondaryatomcoords)
 primaryfull = np.array(structures[0])

 # Pad the data with ones, so that our transformation can do translations too
 n = primary.shape[0]
 pad = lambda x: np.hstack([x, np.ones((x.shape[0], 1))])
 unpad = lambda x: x[:,:-1]
 X = pad(primary)
 Y = pad(secondary)
 Xp= pad(primaryfull)

 # Solve the least squares problem X * A = Y
 # to find our transformation matrix A
 A, res, rank, s = np.linalg.lstsq(X, Y)

 transform = lambda x: unpad(np.dot(pad(x), A))

# print "Max error should be as small as possible if the rotation is successful"
# print "If max error is large you may have selected a bad set of atoms"
 print "Transformation max error:", np.abs(secondary - transform(primary)).max()
 secondaryfull=transform(primaryfull)
 return secondaryfull

def transform_xyz(tmatrix,newxyz):
 final_xyz=[]
 for n in newxyz:
  coord=np.mat(str(n[0])+';'+str(n[1])+';'+str(n[2]))
  newcoord=np.dot(tmatrix,coord)
  newcoord=np.matrix.tolist(newcoord)
  final_xyz+=[[ newcoord[0][0],newcoord[1][0],newcoord[2][0]]]
 return final_xyz

def genxyzstring(coords,elementnumber):
 x_str='%10.5f'% coords[0]
 y_str='%10.5f'% coords[1]
 z_str='%10.5f'% coords[2]
 element=elementnumber
 xyz_string=element+(3-len(element))*' '+10*' '+\
 (8-len(x_str))*' '+x_str+10*' '+(8-len(y_str))*' '+y_str+10*' '+(8-len(z_str))*' '+z_str+'\n'
 
 return xyz_string

def write_aligned(aligned_structure,atom_coords,preamble,outfile):
 outfile=outfile.replace('.xyz','.rot.xyz')
 print "Writing to ",outfile
 g=open(outfile,'w')
 g.write(str(preamble[0])+'\n'+str(preamble[1])+'\n')
 
 for n in range(0,len(aligned_structure)):
  xyzstring=genxyzstring(aligned_structure[n],atom_coords[n][0])
  g.write(xyzstring)
 g.close()
 return 0
 
if __name__ == "__main__":
 infile,atoms=formatinput(sys.argv)
 
 xyz=['','']
 preamble=['','']
 
 #get raw data
 xyz[0],preamble[0]=getrawdata(infile[0])
 xyz[1],preamble[1]=getrawdata(infile[1])

 atom_coords=[getcoords(xyz[0],preamble[0],atoms)]
 atom_coords+=[getcoords(xyz[1],preamble[1],atoms)]
 
 #collect structures from raw data
 structures=[getstructures(xyz[0],preamble[0])]
 structures+=[getstructures(xyz[1],preamble[1])]
 
 print "Selected atoms in molecules 1 and 2"
 for n in atoms:
  print atom_coords[0][n],atom_coords[1][n]
  
 #transform structure
 aligned_structure=affine_transform(atoms,structures)
 
 write_aligned(aligned_structure,atom_coords[0],preamble[0],str(infile[0]))
 

No comments:

Post a Comment