diff --git a/vermouth/processors/__init__.py b/vermouth/processors/__init__.py index 9445c62f4..d96041ff7 100644 --- a/vermouth/processors/__init__.py +++ b/vermouth/processors/__init__.py @@ -44,3 +44,4 @@ from .sort_molecule_atoms import SortMoleculeAtoms from .merge_all_molecules import MergeAllMolecules from .annotate_mut_mod import AnnotateMutMod +from .generate_coordinates import GenerateCoordinates diff --git a/vermouth/processors/generate_coordinates.py b/vermouth/processors/generate_coordinates.py new file mode 100755 index 000000000..1940d2415 --- /dev/null +++ b/vermouth/processors/generate_coordinates.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2018 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Provides a processor for building missing coordinates. +""" + +import numpy as np +import networkx as nx + +from .processor import Processor +from .. import selectors + + +def mindist(A, B): + ''' + Determine the smallest distance between two point sets (np.ndarray) A and B + ''' + return ((A[:, None] - B[None])**2).sum(axis=2).min(axis=1) + + +def get_atoms_missing_coords(molecule): + ''' + Determine particles without coordinates in molecule. + ''' + return [ + ix for ix in molecule + if not selectors.selector_has_position(molecule.nodes[ix]) + ] + + +def get_anchors(molecule, indices): + ''' + Determine particles with known coordinates connected to + particles with given indices. + ''' + return { + a for ix in indices for a in molecule[ix].keys() + if selectors.selector_has_position(molecule.nodes[a]) + } + + +def align_z(v): + ''' + Generate rotation matrix aligning z-axis onto vector v (and vice-versa). + ''' + # This routine is based on the notion that for any two + # normalized vectors, the alignment is a 180 degree + # rotation around the resultant vector. + + w = v / (v ** 2).sum() ** 0.5 + + if w[2] <= 1e-8 - 1: + return -np.eye(3) # Mind the inversion ... + + w[2] += 1 + + return (2 / (w**2).sum()) * w * w[:, None] - np.eye(3) + + +def conicring(points=24, distance=0.125, angle=109.4, axis=None): + ''' + Create a ring of positions corresponding to a cone with given angle. + An angle of 109.4 is suitable for ideal SP3 substituents. + ''' + p = 2 * np.pi * np.arange(points) / points + r = distance * np.sin(angle * np.pi / 360) + z = distance * np.cos(angle * np.pi / 360) + X = np.stack([r * np.cos(p), r * np.sin(p), z * np.ones(points)], axis=1) + if axis is None: + return X + return X @ align_z(axis) + + +def double_cone(points=24, distance=0.125, angle=109.4, axis=None): + '''Create positions for two consecutive (SP3) bonded particles''' + P = conicring(points, distance, angle, axis) + S = np.array([P @ align_z(x) + x for x in P]) + return P, S + + +def triple_cone(points=24, distance=0.125, angle=109.4): + '''Create possible positions for three consecutive (SP3) bonded particles''' + P, S = double_cone(points, distance, angle) + T = np.array([[[P @ align_z(u - x) + u] for u in U] for x, U in zip(P, S)]) + return P, S, T + + +def _out(molecule, anchor, base, distance=0.125): + ''' + Generate coordinates for atom bonded to anchor + using resultant vector of known substituents. + + This approach works for placement of single unknown + substituents on sp1, sp2 or sp3 atoms. It should also + work for bipyramidal and other configurations. + + b1 + \ + b2--a-->X + / + b3 + + X := position to determine + a := position of anchoring particle + B = [b1, b2, ...] := positions of base particles + + Parameters + ---------- + molecule: vermouth.molecule.Molecule + A molecule with missing atoms + anchor: integer + Index to anchor node: a node with positions connected to a node without + base: list of integers + Indices to particles with positions connected to anchor + distance: float + Distance of target position from anchor + + Returns + ------- + position + ''' + a = molecule.nodes[anchor]['position'] + B = [ molecule.nodes[ix]['position'] for ix in base ] + B = B - a + u = -B.sum(axis=0) + u *= distance / ((u ** 2).sum() ** 0.5) + return a + u + + +def _chiral(molecule, anchor, base, distance=(0.125, 0.125)): + ''' + Generate coordinates for atoms bonded to chiral center + based on two known substituents. + + This approach uses the positions of two substituents to + generate coordinates for one or two atoms connected to + a chiral center. The two atoms are indicated as `up` and + `down`, referring to the direction with respect to the + cross-product of the two substituents. If these control + atoms are not given explicitly in order, then the control + atoms are inferred from the connections of the anchor with + known coordinates. In that case, the specific chirality + of the center (R or S) cannot be controlled. + + NOTE: This function should probably be able to determine + the weights of the substituents to allow placement + based on R or S chirality. + + Parameters + ---------- + molecule: vermouth.molecule.Molecule + A molecule with missing atoms + anchor: integer + Index to anchor node: a node with positions connected to a node without + base: list of integers + Indices to particles with positions connected to anchor + distance: tuple of two floats + Distances of target positions from anchor + + Returns + ------- + tuple of positions (up, down) + ''' + a = molecule.nodes[anchor]['position'] + B = [ molecule.nodes[ix]['position'] for ix in base ] + B = B - a + # Negative resultant vector + u = -0.5 * (B[0] + B[1]) + v = np.cross(B[0], B[1]) + # Set length of v to length to half distance between subs + v = 0.5 * ((B[0] - B[1])**2).sum()**0.5 * v / (v ** 2).sum()**0.5 + uv = u + v + # Normalization of vector sum + n = 1 / (uv**2).sum()**0.5 + + rup, rdown = distance + + pup = a + rup * n * uv + pdown = a + rdown * n * (u - v) + + return pup, pdown + + +class Segment: + '''Class for subgraphs of (missing) atoms with anchors''' + def __init__(self, molecule, limbs): + self.molecule = molecule + self.anchors = limbs[0][0] + self.bases = limbs[0][1] + self.limbs = [ s[2] for s in limbs ] + + def __str__(self): + return ':'.join([str(self.anchors), str(self.bases), str(self.limbs)]) + + def extrude(self, distance=0.125, coords=None, contact=0.5): + ''' + Generate positions for particles without, connected to anchors. + ''' + mol = self.molecule + + for i, (a, b) in enumerate(zip(self.anchors, self.bases)): + target = [ k for k in mol.neighbors(a) if k not in b ] + valence = len(target) + len(b) + + if valence > 2 and len(b) == valence - 1: + # Trivial chiral/flat/bipyramidal/octahedral/... + mol.nodes[target[0]]['position'] = _out(mol, anchor=a, base=b) + for k in self.limbs: + k.discard(target[0]) + # Simply update this segment and return it + self.bases[i] = [a] + self.anchors[i] = target[0] + return [self] + + if valence == 4 and len(b) == 2: + # Trivial chiral - except for the actual chirality + # a simple 'up' or 'down' flag would be helpful + # amino acid side chain is 'up' + up, down = _chiral(mol, anchor=a, base=b) + mol.nodes[target[0]]['position'] = up + mol.nodes[target[1]]['position'] = down + # Simply update this segment and return it + for k in self.limbs: + k.discard(target[0]) + self.bases[i] = [a] + self.anchors[i] = target[0] + return [self] + + if valence == 1: + # Generate a sphere of points + # Check for overlaps + # Take a pick + # Should be done last anyway + # Best collected and stored + continue + + # So a valence of 2 (missing 1), 3 (missing 2) or 4 (missing 3) + # The cases 2(1), 3(2), 4(3) can be solved by sampling possible + # candidate points from a conical set. + + if len(b) == 1 and all(len(s) < 100 for s in self.limbs): + ax = mol.nodes[a]['position'] + bx = mol.nodes[b[0]]['position'] + P = conicring(axis=ax-bx) + ax + if coords is not None: + P[mindist(P, coords) < contact**2] = np.nan + P = P.reshape((len(self.limbs), -1, 3)) + for px in P.transpose((1,0,2)): + if px.max() == np.nan: + continue + # Split the limbs to new segments and return those + #out = [] + for t, pi in zip(target, px): + self.molecule.nodes[t]['position'] = pi + # for k in self.limbs: + # if len(k) > 1 and t in k: + # k.discard(t) + # for g in nx.connected_components(self.molecule.subgraph(k)): + # out.append( + # Segment( + # self.molecule, + # [(t, [[a]], g)], + # self.coords, + # self.contact + # ) + # ) + # This commented-out stuff may be useful for speeding up + # coordinate generation, because it avoids needing lookup + # of missing coordinates in subsequent cycles. + #return out + return + break + return None + + +class GenerateCoordinates(Processor): + """ + A processor for generating coordinates for atoms without. + """ + def __init__(self, contact=0.5): + super().__init__() + self.contact = contact + self.coordinates = None + + def run_system(self, system): + """ + Process `system`. + + Parameters + ---------- + system: vermouth.system.System + The system to process. Is modified in-place. + """ + + self.coordinates = np.stack( + [ + a.get('position', (np.nan, np.nan, np.nan)) + for m in system.molecules + for i, a in m.atoms + ] + ) + + for molecule in system.molecules: + self.run_molecule(molecule) + + def run_molecule(self, molecule): + """ + Process a single molecule. Must be implemented by subclasses. + + Parameters + ---------- + molecule: vermouth.molecule.Molecule + The molecule to process. + + Returns + ------- + vermouth.molecule.Molecule + The provided molecule with complete coordinates + """ + + # Missing atoms - those without 'positions' + missing = get_atoms_missing_coords(molecule) + + prev = len(molecule) + while prev - len(missing) > 0: + # Limbs: subgraphs of missing atoms with anchors and bases + limbs = [] + for g in nx.connected_components(molecule.subgraph(missing)): + anchors = list(get_anchors(molecule, g)) + bases = [ list(get_anchors(molecule, [a])) for a in anchors ] + limbs.append((anchors, bases, g)) + limbs.sort() + + # Segment: subgraphs of limbs sharing anchors and bases + segments = [] + prev = None + for lim in limbs: + if lim[0] != prev: + segments.append([]) + prev = lim[0] + segments[-1].append(lim) + segments = [ Segment(molecule, limbs) for limbs in segments ] + + # Extrude segments one layer at a time + while segments: + s = segments.pop(0) + r = s.extrude(coords=self.coordinates, contact=self.contact) + if r is not None: + segments.extend(r) + + prev = len(missing) + missing = get_atoms_missing_coords(molecule) + #print(len(missing)) + + return molecule