#!/usr/bin/python3
import uuid
import itertools
import warnings
import numpy as np
import dpdata
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import connected_components
from scipy.spatial import cKDTree
try:
from openbabel import openbabel
except ImportError:
try:
import openbabel
except ImportError:
pass
try:
from ase import Atoms, Atom
from ase.data import atomic_numbers
except ImportError:
pass
def _crd2frag(symbols, crds, pbc=False, cell=None, return_bonds=False):
atomnumber = len(symbols)
all_atoms = Atoms(symbols = symbols, positions = crds, pbc=pbc, cell=cell)
if pbc:
repeated_atoms = all_atoms.repeat(2)[atomnumber:]
tree = cKDTree(crds)
d = tree.query(repeated_atoms.get_positions(), k=1)[0]
nearest = d < 5
ghost_atoms = repeated_atoms[nearest]
realnumber = np.where(nearest)[0] % atomnumber
all_atoms += ghost_atoms
# Use openbabel to connect atoms
mol = openbabel.OBMol()
mol.BeginModify()
for idx, (num, position) in enumerate(zip(all_atoms.get_atomic_numbers(), all_atoms.positions)):
atom = mol.NewAtom(idx)
atom.SetAtomicNum(int(num))
atom.SetVector(*position)
mol.ConnectTheDots()
mol.PerceiveBondOrders()
mol.EndModify()
bonds = []
for ii in range(mol.NumBonds()):
bond = mol.GetBond(ii)
a = bond.GetBeginAtom().GetId()
b = bond.GetEndAtom().GetId()
bo = bond.GetBondOrder()
if a >= atomnumber and b >= atomnumber:
# duplicated
continue
elif a >= atomnumber:
a = realnumber[a-atomnumber]
elif b >= atomnumber:
b = realnumber[b-atomnumber]
bonds.extend([[a, b, bo], [b, a, bo]])
bonds = np.array(bonds, ndmin=2).reshape((-1, 3))
graph = csr_matrix(
(bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber))
frag_numb, frag_index = connected_components(graph, 0)
if return_bonds:
return frag_numb, frag_index, graph
return frag_numb, frag_index
def _crd2mul(symbols, crds):
atomnumber = len(symbols)
xyzstring = ''.join((f"{atomnumber}\nDPGEN\n", "\n".join(
['{:2s} {:22.15f} {:22.15f} {:22.15f}'.format(s, x, y, z)
for s, (x, y, z) in zip(symbols, crds)])))
conv = openbabel.OBConversion()
conv.SetInAndOutFormats('xyz', 'gjf')
mol = openbabel.OBMol()
conv.ReadString(mol, xyzstring)
gjfstring = conv.WriteString(mol)
try:
mul = int(gjfstring.split('\n')[4].split()[1])
except IndexError:
# openbabel 3.0
mul = int(gjfstring.split('\n')[5].split()[1])
return mul
[docs]def detect_multiplicity(symbols):
# currently only support charge=0
# oxygen -> 3
if np.count_nonzero(symbols == ["O"]) == 2 and symbols.size == 2:
return 3
# calculates the total number of electrons, assumes they are paired as much as possible
n_total = sum([atomic_numbers[s] for s in symbols])
return n_total % 2 + 1
[docs]def take_cluster(old_conf_name, type_map, idx, jdata):
cutoff = jdata['cluster_cutoff']
cutoff_hard = jdata.get('cluster_cutoff_hard', None)
sys = dpdata.System(old_conf_name, fmt = 'lammps/dump', type_map = type_map)
atom_names = sys['atom_names']
atom_types = sys['atom_types']
cell = sys['cells'][0]
coords = sys['coords'][0]
symbols = [atom_names[atom_type] for atom_type in atom_types]
# detect fragment
frag_numb, frag_index, graph = _crd2frag(symbols, coords, True, cell, return_bonds=True)
# get_distances
all_atoms = Atoms(symbols = symbols, positions = coords, pbc=True, cell=cell)
all_atoms[idx].tag = 1
distances = all_atoms.get_distances(idx, range(len(all_atoms)), mic=True)
distancescutoff = distances < cutoff
cutoff_atoms_idx = np.where(distancescutoff)[0]
if cutoff_hard is not None:
distancescutoff_hard = distances < cutoff_hard
cutoff_atoms_idx_hard = np.where(distancescutoff_hard)[0]
# make cutoff atoms in molecules
taken_atoms_idx = []
added = []
for ii in range(frag_numb):
frag_atoms_idx = np.where(frag_index == ii)[0]
if cutoff_hard is not None:
# drop atoms out of the hard cutoff anyway
frag_atoms_idx = np.intersect1d(frag_atoms_idx, cutoff_atoms_idx_hard)
if np.any(np.isin(frag_atoms_idx, cutoff_atoms_idx)):
if 'cluster_minify' in jdata and jdata['cluster_minify']:
# support for organic species
take_frag_idx=[]
for aa in frag_atoms_idx:
if np.any(np.isin(aa, cutoff_atoms_idx)):
# atom is in the soft cutoff
# pick up anyway
take_frag_idx.append(aa)
elif np.count_nonzero(np.logical_and(distancescutoff, graph.toarray()[aa]==1)):
# atom is between the hard cutoff and the soft cutoff
# and has a single bond with the atom inside
if all_atoms[aa].symbol == 'H':
# for atom H: just add it
take_frag_idx.append(aa)
else:
# for other atoms (C, O, etc.): replace it with a ghost H atom
near_atom_idx = np.nonzero(np.logical_and(distancescutoff, graph.toarray()[aa]>0))[0][0]
vector = all_atoms[aa].position - all_atoms[near_atom_idx].position
new_position = all_atoms[near_atom_idx].position + vector / np.linalg.norm(vector) * 1.09
added.append(Atom('H', new_position))
elif np.count_nonzero(np.logical_and(distancescutoff, graph.toarray()[aa]>1)):
# if that atom has a double bond with the atom inside
# just pick up the whole fragment (within the hard cutoff)
# TODO: use a more fantastic method
take_frag_idx=frag_atoms_idx
break
else:
take_frag_idx = frag_atoms_idx
taken_atoms_idx.append(take_frag_idx)
all_taken_atoms_idx = np.concatenate(taken_atoms_idx)
# wrap
cutoff_atoms = sum(added, all_atoms[all_taken_atoms_idx])
cutoff_atoms.wrap(
center=coords[idx] /
cutoff_atoms.get_cell_lengths_and_angles()[0: 3],
pbc=True)
coords = cutoff_atoms.get_positions()
sys.data['coords'] = np.array([coords])
sys.data['atom_types'] = np.array(list(atom_types[all_taken_atoms_idx]) + [atom_names.index('H')]*len(added))
sys.data['atom_pref'] = np.array([cutoff_atoms.get_tags()])
for ii, _ in enumerate(atom_names):
sys.data['atom_numbs'][ii] = np.count_nonzero(sys.data['atom_types']==ii)
return sys