diff --git a/bin/martinize2 b/bin/martinize2 index b4d22c7d3..be254b25d 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -174,8 +174,8 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde mappings=mappings, to_ff=to_ff, delete_unknown=delete_unknown, - attribute_keep=("cgsecstruct", "chain"), - attribute_must=("resname",), + attribute_keep=("cgsecstruct", "resname", "chain"), + attribute_must=("resname", "atype"), attribute_stash=("resid",), ).run_system(system) LOGGER.info("Averaging the coordinates.", type="step") @@ -976,6 +976,9 @@ def entry(): idrs=args.idr_tune, disordered_regions=args.water_idrs ) + if 'bondedtypes' in known_force_fields[args.to_ff].variables: + LOGGER.info("Generating implicit interactions for RTP force field", type='step') + vermouth.RTPPolisher().run_system(system) # Apply position restraints if required. if args.posres != "none": diff --git a/vermouth/forcefield.py b/vermouth/forcefield.py index 55e307129..1ab88e2c7 100644 --- a/vermouth/forcefield.py +++ b/vermouth/forcefield.py @@ -68,8 +68,8 @@ def __init__(self, directory=None, name=None): self.name = None self.citations = {} if directory is not None: - self.read_from(directory) self.name = os.path.basename(str(directory)) + self.read_from(directory) if name is not None: self.name = name if self.name is None: diff --git a/vermouth/gmx/itp.py b/vermouth/gmx/itp.py index 821232c31..ffe4d2f27 100644 --- a/vermouth/gmx/itp.py +++ b/vermouth/gmx/itp.py @@ -18,6 +18,10 @@ import copy import itertools +from collections import defaultdict +from ..log_helpers import StyleAdapter, get_logger + +LOGGER = StyleAdapter(get_logger(__name__)) __all__ = ['write_molecule_itp', ] @@ -55,6 +59,21 @@ def _interaction_sorting_key(interaction): return (conditional, group) +def _sort_atoms(atoms, name): + if name[:4] in ('bond', 'pair'): + return sorted(atoms) + elif name.startswith('angle'): + return atoms if atoms[0] < atoms[-1] else list(reversed(atoms)) + elif name.startswith('dihedral'): + return atoms if atoms[1] < atoms[2] else list(reversed(atoms)) + return atoms + +def _sort_interaction(atoms, name): + if name.startswith('angle'): + return (atoms[1], tuple(atoms)) + else: return (min(atoms), tuple(atoms)) + + def write_molecule_itp(molecule, outfile, header=(), moltype=None, post_section_lines=None, pre_section_lines=None): """ @@ -111,10 +130,16 @@ def write_molecule_itp(molecule, outfile, header=(), moltype=None, # Make sure the molecule contains the information required to write the # [atoms] section. The charge and mass can be ommited, if so gromacs take # them from the [atomtypes] section of the ITP file. + atoms_with_issues = defaultdict(list) for attribute in ('atype', 'resid', 'resname', 'atomname', 'charge_group'): - if not all([attribute in atom for _, atom in molecule.atoms]): - raise ValueError('Not all atom have a {}.'.format(attribute)) + for _, atom in molecule.atoms: + if attribute not in atom: + atoms_with_issues[attribute].append(atom) + if atoms_with_issues: + for attr, atoms in atoms_with_issues.items(): + LOGGER.error('The following atoms do not have a {}: {}', attr, atoms) + raise ValueError("Some atoms are missing required attributes.") # Get the maximum length of each atom field so we can align the fields. # Atom indexes are written as a consecutive series starting from 1. @@ -193,10 +218,12 @@ def write_molecule_itp(molecule, outfile, header=(), moltype=None, # object to distinguish them from the proper dihedrals. Yet, they # should be written under the [ dihedrals ] section of the ITP file. if name == 'impropers': - name = 'dihedrals' - outfile.write('[ {} ]\n'.format(name)) + section_name = 'dihedrals' + else: + section_name = name + outfile.write('[ {} ]\n'.format(section_name)) seen_sections.add(name) - for line in pre_section_lines.get(name, []): + for line in pre_section_lines.get(section_name, []): outfile.write(line + '\n') interactions_group_sorted = sorted( interactions, @@ -212,11 +239,12 @@ def write_molecule_itp(molecule, outfile, header=(), moltype=None, outfile.write('{} {}\n'.format(conditional_key, conditional[0])) if group: outfile.write('; {}\n'.format(group)) + interactions_in_group = sorted(interactions_in_group, key=lambda i: _sort_interaction(_sort_atoms(list(map(correspondence.get, i.atoms)), name), name)) for interaction in interactions_in_group: atoms = ['{atom_idx:>{max_length[idx]}}' - .format(atom_idx=correspondence[x], + .format(atom_idx=x, max_length=max_length) - for x in interaction.atoms] + for x in _sort_atoms(list(map(correspondence.get, interaction.atoms)), name)] parameters = ' '.join(str(x) for x in interaction.parameters) comment = '' if 'comment' in interaction.meta: diff --git a/vermouth/gmx/rtp.py b/vermouth/gmx/rtp.py index b4d4e92ed..5d3bc0f4b 100644 --- a/vermouth/gmx/rtp.py +++ b/vermouth/gmx/rtp.py @@ -17,11 +17,13 @@ """ import collections -import itertools +import copy import networkx as nx from ..molecule import Block, Link, Interaction -from .. import utils + +from ..log_helpers import StyleAdapter, get_logger +LOGGER = StyleAdapter(get_logger(__name__)) __all__ = ['read_rtp'] @@ -192,6 +194,7 @@ def wrapped(subsection, block): interactions.append(Interaction(atoms=atoms, parameters=parameters, meta={})) + block.add_nodes_from(atoms) block.interactions[interaction_name] = interactions return wrapped @@ -200,6 +203,7 @@ def _parse_bondedtypes(section): # Default taken from # 'src/gromacs/gmxpreprocess/resall.cpp::read_resall' in the Gromacs # source code. + defaults = _BondedTypes(bonds=1, angles=1, dihedrals=1, impropers=1, all_dihedrals=0, nrexcl=3, HH14=1, remove_dih=1) @@ -233,18 +237,6 @@ def _parse_bondedtypes(section): return bondedtypes -def _count_hydrogens(names): - return len([name for name in names if utils.first_alpha(name) == 'H']) - - -def _keep_dihedral(center, block, bondedtypes): - if (not bondedtypes.all_dihedrals) and block.has_dihedral_around(center): - return False - if bondedtypes.remove_dih and block.has_improper_around(center): - return False - return True - - def _complete_block(block, bondedtypes): """ Add information from the bondedtypes section to a block. @@ -254,28 +246,6 @@ def _complete_block(block, bondedtypes): """ block.make_edges_from_interactions() - # Generate missing dihedrals - # As pdb2gmx generates all the possible dihedral angles by default, - # RTP files are written assuming they will be generated. A RTP file - # have some control over these dihedral angles through the bondedtypes - # section. - all_dihedrals = [] - for center, dihedrals in itertools.groupby( - sorted(block.guess_dihedrals(), key=_dihedral_sorted_center), - _dihedral_sorted_center): - if _keep_dihedral(center, block, bondedtypes): - # TODO: Also sort the dihedrals by index. - # See src/gromacs/gmxpreprocess/gen_add.cpp::dcomp in the - # Gromacs source code (see version 2016.3 for instance). - atoms = sorted(dihedrals, key=_count_hydrogens)[0] - all_dihedrals.append(Interaction(atoms=atoms, parameters=[], meta={})) - # TODO: Sort the dihedrals by index - block.interactions['dihedrals'] = ( - block.interactions.get('dihedrals', []) + all_dihedrals - ) - - # TODO: generate 1-4 interactions between pairs of hydrogen atoms - # Add function types to the interaction parameters. This is done as a # post processing step to cluster as much interaction specific code # into this method. @@ -292,9 +262,9 @@ def _complete_block(block, bondedtypes): 'exclusions': 1, 'cmap': 1, } - for name, interactions in block.interactions.items(): - for interaction in interactions: - interaction.parameters.insert(0, functypes[name]) + for functype in functypes: + for interaction in block.interactions[functype]: + interaction.parameters.insert(0, functypes[functype]) # Set the nrexcl to the block. block.nrexcl = bondedtypes.nrexcl @@ -325,12 +295,15 @@ def _split_blocks_and_links(pre_blocks): Split an individual pre-block into a block and a link. """ blocks = {} - links = [] + all_links = [] for name, pre_block in pre_blocks.items(): - block, link = _split_block_and_link(pre_block) + block, links = _split_block_and_link(pre_block) blocks[name] = block - links.append(link) - return blocks, links + if links: + for link in links: + if link: + all_links.append(link) + return blocks, all_links def _split_block_and_link(pre_block): @@ -351,7 +324,7 @@ def _split_block_and_link(pre_block): ------- block: Block All the intra-residue information. - link: Link + links: List[Link] All the inter-residues information. """ block = Block(force_field=pre_block.force_field) @@ -370,44 +343,29 @@ def _split_block_and_link(pre_block): except AttributeError: pass - # Filter the particles from neighboring residues out of the block. - for atom in pre_block.atoms: - if not atom['atomname'].startswith('+-'): - atom['resname'] = pre_block.name - block.add_atom(atom) - link.add_node(atom['atomname']) - - # Create the edges of the link and block based on the edges in the pre-block. - # This will create too many edges in the link, but the useless ones will be - # pruned latter. - link.add_edges_from(pre_block.edges) - block.add_edges_from(edge for edge in pre_block.edges - if not any(node[0] in '+-' for node in edge)) - - # Split the interactions from the pre-block between the block (for - # intra-residue interactions) and the link (for inter-residues ones). - # The "relevant_atoms" set keeps track of what particles are - # involved in the link. This will allow to prune the link without - # iterating again through its interactions. - relevant_atoms = set() - for name, interactions in pre_block.interactions.items(): + links = [] + + for interaction_type, interactions in pre_block.interactions.items(): for interaction in interactions: - for_link = any(atom[0] in '+-' for atom in interaction.atoms) - if for_link: - link.interactions[name].append(interaction) - relevant_atoms.update(interaction.atoms) - else: - block.interactions[name].append(interaction) - - # Prune the link to keep only the edges and particles that are - # relevant. - nodes = set(link.nodes()) - link.remove_nodes_from(nodes - relevant_atoms) - # Some interactions do not generate nodes (impropers for instance). If a - # node is only described in one of such interactions, then the node never - # appears in the link. Here we make sure these nodes exists even if they - # are note connected. - link.add_nodes_from(relevant_atoms) + # Atoms that are not defined in the atoms section but only in interactions + # (because they're part of the neighbours), don't have an atomname + atomnames = [pre_block.nodes[n_idx].get('atomname') or n_idx for n_idx in interaction.atoms] + if any(atomname[0] in '+-' for atomname in atomnames): + link = Link(pre_block.subgraph(interaction.atoms)) + if not nx.is_connected(link): + LOGGER.info('Discarding {} between atoms {} for link' + ' defined in residue {} in force field {}' + ' because it is disconnected.', + interaction_type, ' '.join(interaction.atoms), + pre_block.name, pre_block.force_field.name, + type='inconsistent-data') + continue + links.append(link) + + block = pre_block + block.remove_nodes_from([n for n in pre_block if pre_block.nodes[n].get('atomname', n)[0] in '+-']) + for node in block: + block.nodes[node]['resname'] = block.name # Atoms from a links are matched against a molecule based on its node # attributes. The name is a primary criterion, but other criteria can be @@ -419,39 +377,48 @@ def _split_block_and_link(pre_block): # RTP files convey the order by prefixing the names with + or -. We need to # get rid of these prefixes. order = {'+': +1, '-': -1} - relabel_mapping = {} - for idx, node in enumerate(link.nodes()): - atomname = node - if node[0] in '+-': - link.nodes[node]['order'] = order[node[0]] - atomname = atomname[1:] - else: - link.nodes[node]['order'] = 0 - link.nodes[node]['resname'] = block.name - link.nodes[node]['atomname'] = atomname - relabel_mapping[node] = idx - nx.relabel_nodes(link, relabel_mapping, copy=False) - - # By relabelling the nodes, we lost the relations between interactions and - # nodes, so we need to relabel the atoms in the interactions - new_interactions = collections.defaultdict(list) - for name, interactions in link.interactions.items(): - for interaction in interactions: - atoms = tuple(relabel_mapping[atom] for atom in interaction.atoms) - new_interactions[name].append(Interaction( - atoms=atoms, - parameters=interaction.parameters, - meta=interaction.meta - )) - link.interactions = new_interactions + for link in links: + relabel_mapping = {} + # (Deep)copy interactions, since relabel_nodes removes/adds nodes, which + # wipes out all interactions otherwise. + interactions = copy.deepcopy(link.interactions) + remove_attrs = 'atype charge charge_group'.split() + for idx, node in enumerate(link.nodes()): + atomname = node + if node[0] in '+-': + link.nodes[node]['order'] = order[node[0]] + atomname = atomname[1:] + else: + link.nodes[node]['order'] = 0 + link.nodes[node]['resname'] = block.name + for attr in remove_attrs: + if attr in link.nodes[node]: + del link.nodes[node][attr] + link.nodes[node]['atomname'] = atomname + relabel_mapping[node] = idx + nx.relabel_nodes(link, relabel_mapping, copy=False) + + # By relabelling the nodes, we lost the relations between interactions and + # nodes, so we need to relabel the atoms in the interactions + new_interactions = collections.defaultdict(list) + for name, interactions in interactions.items(): + for interaction in interactions: + atoms = tuple(relabel_mapping[atom] for atom in interaction.atoms) + if not any(link.nodes[node]['order'] for node in atoms): + continue + new_interactions[name].append(Interaction( + atoms=atoms, + parameters=interaction.parameters, + meta=interaction.meta + )) + link.interactions = new_interactions + link.interactions = dict(link.interactions) # Revert the interactions back to regular dicts to avoid creating # keys when querying them. block.interactions = dict(block.interactions) - link.interactions = dict(link.interactions) - - return block, link + return block, links def _clean_lines(lines): @@ -462,11 +429,6 @@ def _clean_lines(lines): yield splitted[0] -def _dihedral_sorted_center(atoms): - #return sorted(atoms[1:-1]) - return atoms[1:-1] - - def read_rtp(lines, force_field): """ Read blocks and links from a Gromacs RTP file to populate a force field @@ -522,3 +484,4 @@ def read_rtp(lines, force_field): force_field.blocks.update(blocks) force_field.links.extend(links) + force_field.variables['bondedtypes'] = bondedtypes diff --git a/vermouth/map_input.py b/vermouth/map_input.py index d87f66d62..23a02452d 100644 --- a/vermouth/map_input.py +++ b/vermouth/map_input.py @@ -441,25 +441,23 @@ def read_mapping_directory(directory, force_fields): return dict(mappings) -def generate_self_mappings(blocks): +def generate_self_mappings(force_field): """ - Generate self mappings from a collection of blocks. + Generate self mappings from a modification for the blocks and modifications. A self mapping is a mapping that maps a force field to itself. Applying such mapping is applying a neutral transformation. Parameters ---------- - blocks: dict[str, networkx.Graph] - A dictionary of blocks with block names as keys and the blocks - themselves as values. The blocks must be instances of :class:`networkx.Graph` - with each node having an 'atomname' attribute. + force_field: vermouth.forcefield.ForceField + A force field containing Blocks and Modifications Returns ------- - mappings: dict[str, tuple] - A dictionary of mappings where the keys are the names of the blocks, - and the values are tuples like (mapping, weights, extra). + mappings: dict[~vermouth.map_parser.Mapping] + A dictionary of mappings where the keys are the names of the + blocks/modifications, and the values are Mappings. Raises ------ @@ -474,11 +472,18 @@ def generate_self_mappings(blocks): Generate self mappings for a list of force fields. """ mappings = {} - for name, block in blocks.items(): + for name, block in force_field.blocks.items(): mapping = Mapping(block, block, {idx: {idx: 1} for idx in block.nodes}, - {}, ff_from=block.force_field, ff_to=block.force_field, + {}, ff_from=force_field, ff_to=force_field, extra=[], type='block', names=(name,)) mappings[name] = mapping + for name, mod in force_field.modifications.items(): + for n in mod: + mod.nodes[n]['modifications'] = mod.nodes[n].get('modifications', []) + [mod] + mapping = Mapping(mod, mod, {idx: {idx: 1} for idx in mod.nodes}, + {}, ff_from=force_field, ff_to=force_field, + extra=[], type='modification', names=(name,)) + mappings[name] = mapping return mappings @@ -500,7 +505,7 @@ def generate_all_self_mappings(force_fields): mappings = collections.defaultdict(dict) for force_field in force_fields: name = force_field.name - mappings[name][name] = generate_self_mappings(force_field.blocks) + mappings[name][name] = generate_self_mappings(force_field) return dict(mappings) diff --git a/vermouth/map_parser.py b/vermouth/map_parser.py index 95460372e..6fe7ab0db 100644 --- a/vermouth/map_parser.py +++ b/vermouth/map_parser.py @@ -407,6 +407,7 @@ def add_mapping(self, attrs_from, attrs_to, weight): weight: float The weight associated with this partial mapping. """ + nodes_from = list(self.blocks_from.find_atoms(**attrs_from)) nodes_to = list(self.blocks_to.find_atoms(**attrs_to)) assert len(nodes_from) == len(nodes_to) == 1 diff --git a/vermouth/molecule.py b/vermouth/molecule.py index 7d4d151ab..a9e0e091c 100644 --- a/vermouth/molecule.py +++ b/vermouth/molecule.py @@ -17,6 +17,7 @@ import copy from functools import partial import itertools +import re import networkx as nx import numpy as np @@ -106,6 +107,23 @@ def match(self, node, key): return node.get(key) in self.value +class Regex(LinkPredicate): + """ + Test if an attribute matches the provided regular expression. + + Parameters + ---------- + value: re.Pattern + The regular expression + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.value = re.compile(self.value) + + def match(self, node, key): + return self.value.fullmatch(node.get(key, '')) is not None + + class NotDefinedOrNot(LinkPredicate): """ Test if an attribute is not the reference value. @@ -351,6 +369,18 @@ class Molecule(nx.Graph): node_dict_factory = OrderedDict def __init__(self, *args, **kwargs): + if args and isinstance(args[0], Molecule): + super().__init__(*args, **kwargs) + other = args[0] + self.meta = copy.deepcopy(other.meta) + self._force_field = other._force_field + self.nrexcl = other.nrexcl + self.interactions = copy.deepcopy(other.interactions) + self.citations = copy.deepcopy(other.citations) + self.log_entries = copy.deepcopy(other.log_entries) + self.max_node = other.max_node + self.box = other.box + return self.meta = kwargs.pop('meta', {}) self._force_field = kwargs.pop('force_field', None) self.nrexcl = kwargs.pop('nrexcl', None) @@ -742,7 +772,6 @@ def merge_molecule(self, molecule): # Renumber any existing formatting maps fmt_args = [{name: correspondence[old] for (name, old) in fmt_arg.items()} for fmt_arg in fmt_args] self.log_entries[loglevel][entry] += fmt_args + [correspondence] - return correspondence def share_moltype_with(self, other): diff --git a/vermouth/processors/__init__.py b/vermouth/processors/__init__.py index f5a5d8aca..ecc243510 100644 --- a/vermouth/processors/__init__.py +++ b/vermouth/processors/__init__.py @@ -45,3 +45,4 @@ from .annotate_mut_mod import AnnotateMutMod from .water_bias import ComputeWaterBias from .annotate_idrs import AnnotateIDRs +from .rtp_polisher import RTPPolisher diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py index 38b25fd18..4d59c15d0 100644 --- a/vermouth/processors/do_mapping.py +++ b/vermouth/processors/do_mapping.py @@ -18,6 +18,7 @@ molecule. """ from collections import defaultdict +from copy import deepcopy from itertools import product, combinations import networkx as nx @@ -274,14 +275,16 @@ def modification_matches(molecule, mappings): LOGGER.warning("Can't find modification mappings for the " "modifications {}. The following modification " "mappings are known: {}", - list(group), known_mod_mappings, + list(group), sorted(known_mod_mappings.keys()), type='unmapped-atom') continue needed_mod_mappings.update(covered_by) matches = [] # Sort on the tuple[str] type names of the mappings so that mappings that # define most modifications at the same time get processed first - for mod_name in sorted(needed_mod_mappings, key=len, reverse=True): + for mod_name in sorted(needed_mod_mappings, + key=lambda ms: (len(ms), len(known_mod_mappings[ms].block_from.nodes)), + reverse=True): mod_mapping = known_mod_mappings[mod_name] for mol_to_mod, modification, references in mod_mapping.map(molecule, node_match=ptm_resname_match): matches.append((mol_to_mod, modification, references)) @@ -414,6 +417,8 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol): mod_to_out = {} # Some nodes of modification will already exist. The question is # which, and which index they have in graph_out. + # We'll store these anchors for now + anchors = set() for mod_idx in modification: if not node_should_exist(modification, mod_idx): # Node does not exist yet. @@ -423,6 +428,7 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol): out_idx = max(graph_out) + 1 mod_to_out[mod_idx] = out_idx graph_out.add_node(out_idx, **modification.nodes[mod_idx]) + graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {})) else: # Node should already exist # We need to find the out_index of this node. Since the @@ -445,12 +451,20 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol): raise ValueError("No node found in molecule with " "atomname {}".format(modification.nodes[mod_idx]['atomname'])) # Undefined loop variable is guarded against by the else-raise above + anchors.add(mod_idx) mod_to_out[mod_idx] = out_idx # pylint: disable=undefined-loop-variable graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {})) # pylint: disable=undefined-loop-variable graph_out.nodes[out_idx]['modifications'] = graph_out.nodes[out_idx].get('modifications', []) if modification not in graph_out.nodes[out_idx]['modifications']: graph_out.nodes[out_idx]['modifications'].append(modification) + # FIXME Jank here to ensure the charge_group attributes look reasonable + charge_group_start = max(graph_out.nodes[mod_to_out[idx]].get('charge_group', 1) for idx in anchors) if anchors else 1 + for charge_group, mod_idx in enumerate(modification, charge_group_start): + out_idx = mod_to_out[mod_idx] + if 'charge_group' not in graph_out.nodes[out_idx]: + graph_out.nodes[out_idx]['charge_group'] = charge_group + for mol_idx in mol_to_mod: for mod_idx, weight in mol_to_mod[mol_idx].items(): out_idx = mod_to_out[mod_idx] @@ -541,8 +555,8 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), `molecule`. attribute_stash: tuple[str] The attributes that will always be transferred from the input molecule - to the produced graph, but prefixed with _old_.Thus they are new attributes - and are not conflicting with already defined attributes. + to the produced graph, but prefixed with _old_. Thus, they are new + attributes and are not conflicting with already defined attributes. Returns @@ -632,48 +646,45 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), to_remove = set() for out_idx in out_to_mol: mol_idxs = out_to_mol[out_idx].keys() - # Keep track of what bead comes from where + + # Time to collect the attributes to set. Start by grabbing all the + # attributes we already have. + node_attrs = deepcopy(graph_out.nodes[out_idx]) + + # In all cases, keep track of what bead comes from where subgraph = molecule.subgraph(mol_idxs) - graph_out.nodes[out_idx]['graph'] = subgraph + node_attrs['graph'] = subgraph weights = out_to_mol[out_idx] - graph_out.nodes[out_idx]['mapping_weights'] = weights + node_attrs['mapping_weights'] = weights + # Now we find attribute values from molecule. if out_idx in all_references: ref_idx = all_references[out_idx] - new_attrs = attrs_from_node(molecule.nodes[ref_idx], - attribute_keep+attribute_must+attribute_stash) - for attr, val in new_attrs.items(): - # Attrs in attribute_keep we always transfer, those in - # attribute_must we transfer only if they're not already in the - # created node - if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: - graph_out.nodes[out_idx].update(new_attrs) + mol_attrs = attrs_from_node(molecule.nodes[ref_idx], attribute_keep + attribute_stash) + for attr, val in mol_attrs.items(): + # This is if/if rather than if/elif on purpose. It could be that + # an attribute needs to be both stashed and kept if attr in attribute_stash: - graph_out.nodes[out_idx]["_old_"+attr] = val + node_attrs["_old_" + attr] = val + if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: + node_attrs[attr] = val else: attrs = defaultdict(list) + attrs_not_sane = [] for mol_idx in mol_idxs: - new_attrs = attrs_from_node(molecule.nodes[mol_idx], - attribute_keep+attribute_must+attribute_stash) - for attr, val in new_attrs.items(): + mol_attrs = attrs_from_node(molecule.nodes[mol_idx], attribute_keep + attribute_stash) + for attr, val in mol_attrs.items(): attrs[attr].append(val) - attrs_not_sane = [] - for attr, vals in attrs.items(): - if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: - if vals: - graph_out.nodes[out_idx][attr] = vals[0] - else: - # No nodes hat the attribute. - graph_out.nodes[out_idx][attr] = None - if attr in attribute_stash: - if vals: - graph_out.nodes[out_idx]["_old_"+attr] = vals[0] - else: - # No nodes hat the attribute. - graph_out.nodes[out_idx]["_old_"+attr] = None + for attr, vals in attrs.items(): if not are_all_equal(vals): attrs_not_sane.append(attr) + # This is if/if rather than if/elif on purpose. It could be that + # an attribute needs to be both stashed and kept + if attr in attribute_stash: + node_attrs["_old_" + attr] = vals[0] + if attr in attribute_keep or attr not in graph_out.nodes[out_idx]: + node_attrs[attr] = vals[0] if attrs_not_sane: LOGGER.warning('The attributes {} for atom {} are going to' @@ -682,6 +693,7 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), attrs_not_sane, format_atom_string(graph_out.nodes[out_idx]), type='inconsistent-data') + graph_out.nodes[out_idx].update(node_attrs) if graph_out.nodes[out_idx].get('atomname', '') is None: to_remove.add(out_idx) @@ -757,6 +769,34 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), for idx in other_uncovered], type='unmapped-atom') + # "None to one" mapping - Strictly speaking this cannot happen, but it could + # be that output particles map to only particles that were not present in + # the input structure. This probably causes massive issues downstream. + # For now, flag a sane warning, and at the same time transfer missing "must" + # attributes from nearby atoms. + for node in graph_out: + for attr in attribute_must: + if attr not in graph_out.nodes[node]: + # We can skip `node`, since if it had the attribute required we + # wouldn't be in this mess to begin with. + # Set a depth_limit to keep the runtime within bounds. + close_nodes = (v for (u, v) in nx.bfs_edges(graph_out, source=node, depth_limit=3)) + for template_node in close_nodes: + if attr in graph_out.nodes[template_node]: + graph_out.nodes[node][attr] = deepcopy(graph_out.nodes[template_node][attr]) + LOGGER.warning("Atom {} did not have a {}, so we took " + "it from atom {} instead.", + format_atom_string(graph_out.nodes[node]), + attr, + format_atom_string(graph_out.nodes[template_node]), + type="inconsistent-data") + break + else: # no break, attribute not found + LOGGER.warning("Atom {} does not have a {}, and we couldn't " + "find a nearby atom that does.", + format_atom_string(graph_out.nodes[node]), + attr, type="inconsistent-data") + for interaction_type in modified_interactions: for atoms, modifications in modified_interactions[interaction_type].items(): if len(modifications) != 1: diff --git a/vermouth/processors/rtp_polisher.py b/vermouth/processors/rtp_polisher.py new file mode 100644 index 000000000..6ede2c6bc --- /dev/null +++ b/vermouth/processors/rtp_polisher.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright 2018 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import defaultdict +import networkx as nx + +from ..molecule import Interaction +from .processor import Processor +from ..utils import first_alpha + + +def _generate_paths(graph, length): + if length < 1: + return + elif length == 1: + yield from graph.edges + yield from (e[::-1] for e in graph.edges) + return + for p in _generate_paths(graph, length-1): + for neighbour in set(graph[p[-1]]) - set(p): # Or should this be `graph[p[-1]] - p[-2]`? + yield p + (neighbour,) + return + + +def _keep_dihedral(dihedral, known_dihedral_centers, known_improper_centers, all_dihedrals, remove_dihedrals): + # https://manual.gromacs.org/current/reference-manual/file-formats.html#rtp + # gmx pdb2gmx automatically generates one proper dihedral for every + # rotatable bond, preferably on heavy atoms. When the [dihedrals] field is + # used, no other dihedrals will be generated for the bonds corresponding to + # the specified dihedrals. It is possible to put more than one dihedral on + # a rotatable bond. + # Column 5 : This controls the generation of dihedrals from the bonding. + # All possible dihedrals are generated automatically. A value of + # 1 here means that all these are retained. A value of + # 0 here requires generated dihedrals be removed if + # * there are any dihedrals on the same central atoms + # specified in the residue topology, or + # * there are other identical generated dihedrals + # sharing the same central atoms, or + # * there are other generated dihedrals sharing the + # same central bond that have fewer hydrogen atoms + # Column 8: Remove proper dihedrals if centered on the same bond + # as an improper dihedral + # https://gitlab.com/gromacs/gromacs/-/blob/main/src/gromacs/gmxpreprocess/gen_ad.cpp#L249 + center = tuple(dihedral[1:3]) + if (not all_dihedrals) and (center in known_dihedral_centers or center[::-1] in known_dihedral_centers): + return False + if remove_dihedrals and center in known_improper_centers: + return False + return True + + +def add_angles(mol, *, bond_type=5): + known_angles = {tuple(i.atoms) for i in mol.get_interaction("angles")} + for p in _generate_paths(mol, 2): + if p not in known_angles and p[::-1] not in known_angles: + mol.add_interaction('angles', atoms=p, parameters=[bond_type], meta={'comment': 'implicit angle'}) + known_angles.add(p) + + +def add_dihedrals_and_pairs(mol, *, bond_type=2, all_dihedrals=True, + HH14_pair=False, remove_dihedrals=True): + # Generate missing dihedrals and pair interactions + # As pdb2gmx generates all the possible dihedral angles by default, + # RTP files are written assuming they will be generated. A RTP file + # have some control over these dihedral angles through the bondedtypes + # section. + + explicit_dihedral_centers = {frozenset(dih.atoms[1:3]) for dih in mol.interactions.get('dihedrals', [])} + implicit_dihedrals = defaultdict(set) + explicit_improper_centers = {frozenset(dih.atoms[1:3]) for dih in mol.interactions.get('impropers', [])} + distances = dict(nx.all_pairs_shortest_path_length(mol, cutoff=3)) + + known_pairs = {frozenset(inter.atoms) for inter in mol.interactions.get('pairs', [])} + hydrogens = {n for n in mol if mol.nodes[n].get('element', first_alpha(mol.nodes[n]['atomname'])) == 'H'} + for path in _generate_paths(mol, 3): + center = frozenset(path[1:3]) + # https://manual.gromacs.org/current/reference-manual/file-formats.html#rtp + # gmx pdb2gmx automatically generates one proper dihedral for every + # rotatable bond, preferably on heavy atoms. When the [dihedrals] field is + # used, no other dihedrals will be generated for the bonds corresponding to + # the specified dihedrals. It is possible to put more than one dihedral on + # a rotatable bond. + # Column 5 : This controls the generation of dihedrals from the bonding. + # All possible dihedrals are generated automatically. A value of + # 1 here means that all these are retained. A value of + # 0 here requires generated dihedrals be removed if + # * there are any dihedrals on the same central atoms + # specified in the residue topology, or + # * there are other identical generated dihedrals + # sharing the same central atoms, or + # * there are other generated dihedrals sharing the + # same central bond that have fewer hydrogen atoms + # Column 8: Remove proper dihedrals if centered on the same bond + # as an improper dihedral + # https://gitlab.com/gromacs/gromacs/-/blob/main/src/gromacs/gmxpreprocess/gen_ad.cpp#L249 + if (not (center not in explicit_dihedral_centers and remove_dihedrals and center in explicit_improper_centers) + and path not in implicit_dihedrals[center] and path[::-1] not in implicit_dihedrals[center]): + implicit_dihedrals[center].add(path) + + pair = frozenset({path[0], path[-1]}) + if (HH14_pair or not (pair <= hydrogens)) and pair not in known_pairs and distances[path[0]][path[-1]] == 3: + # Pair interactions are generated for all pairs of atoms which are separated + # by 3 bonds (except pairs of hydrogens). + # TODO: correct for specified exclusions + mol.add_interaction('pairs', atoms=tuple(sorted(pair)), parameters=[1]) + known_pairs.add(pair) + + for center in implicit_dihedrals: + if all_dihedrals: + # Just add everything + # TODO: Also sort the dihedrals by index. + # See src/gromacs/gmxpreprocess/gen_add.cpp::dcomp in the + # Gromacs source code (see version 2016.3 for instance). + + dihedrals = [Interaction(atoms=p, parameters=[bond_type], meta={'comment': 'implicit dihedral'}) + for p in implicit_dihedrals[center]] + mol.interactions['dihedrals'] = mol.interactions.get('dihedrals', []) + dihedrals + else: + # Find the dihedral with the least amount of hydrogens + best_dihedral = min(implicit_dihedrals[center], + key=lambda p: sum(mol.nodes[idx].get('atomname', '') == 'H' for idx in p)) + mol.add_interaction('dihedrals', atoms=best_dihedral, parameters=[bond_type], meta={'comment': 'implicit dihedral'}) + + +class RTPPolisher(Processor): + def run_molecule(self, molecule): + bondedtypes = molecule.force_field.variables['bondedtypes'] + # bond_type = bondedtypes.bonds + angle_type = bondedtypes.angles + dihedral_type = bondedtypes.dihedrals + # improper_type = bondedtypes.impropers + all_dihedrals = bondedtypes.all_dihedrals + HH14_pair = bondedtypes.HH14 + remove_dihedrals = bondedtypes.remove_dih + add_angles(molecule, bond_type=angle_type) + add_dihedrals_and_pairs(molecule, bond_type=dihedral_type, all_dihedrals=all_dihedrals, HH14_pair=HH14_pair, remove_dihedrals=remove_dihedrals) + return molecule diff --git a/vermouth/rcsu/contact_map.py b/vermouth/rcsu/contact_map.py index 111f0c94f..328a2659e 100644 --- a/vermouth/rcsu/contact_map.py +++ b/vermouth/rcsu/contact_map.py @@ -40,7 +40,7 @@ def read_go_map(file_path): if len(tokens) == 0: continue - if tokens[0] == "R" and len(tokens) == 18: + if tokens[0] == "R" and len(tokens) >= 18: # this is a bad place to filter but follows # the old script if tokens[11] == "1" or (tokens[11] == "0" and tokens[14] == "1"): diff --git a/vermouth/tests/data/force_fields/universal-test/modifications.ff b/vermouth/tests/data/force_fields/universal-test/modifications.ff index ae2fe5414..cd69e7c0f 100644 --- a/vermouth/tests/data/force_fields/universal-test/modifications.ff +++ b/vermouth/tests/data/force_fields/universal-test/modifications.ff @@ -73,9 +73,12 @@ NE1 {"resname": "HIS", "element": "N"} HE1 {"resname": "HIS", "element": "H", "PTM_atom": true} ND1 {"resname": "HIS", "element": "N"} HD1 {"resname": "HIS", "element": "H", "PTM_atom": true} +CE1 {"resname": "HIS", "element": "C"} [ edges ] NE1 HE1 ND1 HD1 +CE1 NE1 +CE1 ND1 [ modification ] ASP0 diff --git a/vermouth/tests/test_do_mapping.py b/vermouth/tests/test_do_mapping.py index 3916af84e..b9dcc084f 100644 --- a/vermouth/tests/test_do_mapping.py +++ b/vermouth/tests/test_do_mapping.py @@ -475,7 +475,7 @@ def test_apply_mod_mapping(modified_molecule, modifications): """ graph_out = Molecule(force_field=FF_UNIVERSAL) graph_out.add_nodes_from([ - (0, {'atomname': 'A', 'resid': 1}) + (0, {'atomname': 'A', 'resid': 1, 'charge_group': 1}) ]) mol_to_out = {0: {0: 1}} out_to_mol = {0: {0: 1}} @@ -492,7 +492,7 @@ def test_apply_mod_mapping(modified_molecule, modifications): assert mol_to_out == {0: {0: 1}, 1: {1: 1}} assert out_to_mol == {0: {0: 1}, 1: {1: 1}} - graph_out.add_node(2, atomname='J', resid=2) + graph_out.add_node(2, atomname='J', resid=2, charge_group=2) mol_to_out[16] = {2: 1} out_to_mol[2] = {16: 1} diff --git a/vermouth/tests/test_map_input.py b/vermouth/tests/test_map_input.py index c21847450..5fe50fab4 100644 --- a/vermouth/tests/test_map_input.py +++ b/vermouth/tests/test_map_input.py @@ -522,15 +522,18 @@ def test_generate_self_mapping(): Test that :func:`vermouth.map_input.generate_self_mappings` works as expected. """ + force_field = vermouth.forcefield.ForceField(name='ff') # Build the input blocks blocks = { 'A0': vermouth.molecule.Block([['AA', 'BBB'], ['BBB', 'CCCC']]), 'B1': vermouth.molecule.Block([['BBB', 'CCCC'], ['BBB', 'E']]), } + force_field.blocks = blocks for name, block in blocks.items(): block.name = name for atomname, node in block.nodes.items(): node['atomname'] = atomname + # Build the expected output ref_mappings = { 'A0': ( @@ -547,10 +550,13 @@ def test_generate_self_mapping(): ), } # Actually test - mappings = vermouth.map_input.generate_self_mappings(blocks) + mappings = vermouth.map_input.generate_self_mappings(force_field) + for name, map in mappings.items(): + ref = ref_mappings[name] + assert map.mapping == ref[0] + assert map.block_to.extra == ref[1] + assert mappings.keys() == ref_mappings.keys() - for blockname in mappings: - assert mappings[blockname].mapping, mappings[blockname].extras == ref_mappings[blockname] def test_generate_all_self_mappings():