Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rtp parser - Abandon all hope, ye who review here #631

Open
wants to merge 29 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2bbba7b
Autogenerate all implicit angles
pckroon Sep 5, 2024
09425a3
Autogenerate modification auto-mappings
pckroon Jun 24, 2024
f9dab11
Fix docstring formatting
pckroon Jun 24, 2024
36132b5
Fix modification auto-mapping names
pckroon Jun 26, 2024
1a87fd2
Improve debug output of write_itp
pckroon Jun 26, 2024
8244193
Improve debug output of known modification mappings
pckroon Jun 26, 2024
68e5b80
Make node attribute propagation a bit more sane and make sure all nod…
pckroon Jun 26, 2024
0de4491
Find missing required node attributes at nearby nodes if needed
pckroon Jun 28, 2024
0a41eb8
Tweak attribute propagation for tests
pckroon Jun 28, 2024
b4b942e
Make sure all modification nodes get their charge_group
pckroon Jun 28, 2024
ca9b9a8
Make modification charge_groups slightly better
pckroon Jun 28, 2024
f77d3ef
Unbreak ffinput test
pckroon Jun 28, 2024
82fac8f
add charge groups to test_apply_mod_mapping nodes
pckroon Jun 28, 2024
f8f9764
Make PTM atoms also apply their 'replace' attribute
pckroon Jul 5, 2024
602e8b5
Sort (atoms in) interactions in itp files, attempt to generate implic…
pckroon Sep 10, 2024
521d529
Make it possible to deduplicate symmetric links
pckroon Sep 10, 2024
72a0b88
More interaction sorting in ITP files, try to teach links to deal wit…
pckroon Nov 4, 2024
5dde7dc
Cleanup trying to make links asymmetric
pckroon Nov 13, 2024
e46b4eb
Cleanup some debug statements
pckroon Nov 13, 2024
80966bc
Cleanup bin/martinize2 imports a bit
pckroon Nov 13, 2024
9b1014f
Move generation of implicit RTP interactions to separate processor
pckroon Nov 13, 2024
cbb71bd
Don't sort atom order in impropers
pckroon Nov 13, 2024
2dfece1
Merge branch 'master' into rtp_parser
pckroon Nov 14, 2024
fecda76
Merge branch 'master' into rtp_parser
pckroon Nov 25, 2024
a6a0f7b
Split up rtp links to interaction sized bites
pckroon Nov 25, 2024
cd98aef
Fix matching modifications mappings restricted by found modifications
pckroon Dec 10, 2024
e4fe2f3
Apply modifications mappings from large to small
pckroon Dec 10, 2024
060dca7
Undo mod map by mod name, annotate self mod mappings with the modific…
pckroon Dec 11, 2024
50f0916
drive-by fix of test ff
pckroon Dec 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions bin/martinize2
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def martinize(system, mappings, to_ff, delete_unknown=False, idrs=False, disorde
mappings=mappings,
to_ff=to_ff,
delete_unknown=delete_unknown,
attribute_keep=("cgsecstruct", "chain"),
attribute_must=("resname",),
attribute_keep=("cgsecstruct", "resname", "chain"),
attribute_must=("resname", "atype"),
attribute_stash=("resid",),
).run_system(system)
LOGGER.info("Averaging the coordinates.", type="step")
Expand Down Expand Up @@ -976,6 +976,9 @@ def entry():
idrs=args.idr_tune,
disordered_regions=args.water_idrs
)
if 'bondedtypes' in known_force_fields[args.to_ff].variables:
LOGGER.info("Generating implicit interactions for RTP force field", type='step')
vermouth.RTPPolisher().run_system(system)

# Apply position restraints if required.
if args.posres != "none":
Expand Down
2 changes: 1 addition & 1 deletion vermouth/forcefield.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def __init__(self, directory=None, name=None):
self.name = None
self.citations = {}
if directory is not None:
self.read_from(directory)
self.name = os.path.basename(str(directory))
self.read_from(directory)
if name is not None:
self.name = name
if self.name is None:
Expand Down
42 changes: 35 additions & 7 deletions vermouth/gmx/itp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@

import copy
import itertools
from collections import defaultdict
from ..log_helpers import StyleAdapter, get_logger

LOGGER = StyleAdapter(get_logger(__name__))

__all__ = ['write_molecule_itp', ]

Expand Down Expand Up @@ -55,6 +59,21 @@
return (conditional, group)


def _sort_atoms(atoms, name):
if name[:4] in ('bond', 'pair'):
return sorted(atoms)

Check warning on line 64 in vermouth/gmx/itp.py

View check run for this annotation

Codecov / codecov/patch

vermouth/gmx/itp.py#L64

Added line #L64 was not covered by tests
elif name.startswith('angle'):
return atoms if atoms[0] < atoms[-1] else list(reversed(atoms))

Check warning on line 66 in vermouth/gmx/itp.py

View check run for this annotation

Codecov / codecov/patch

vermouth/gmx/itp.py#L66

Added line #L66 was not covered by tests
elif name.startswith('dihedral'):
return atoms if atoms[1] < atoms[2] else list(reversed(atoms))

Check warning on line 68 in vermouth/gmx/itp.py

View check run for this annotation

Codecov / codecov/patch

vermouth/gmx/itp.py#L68

Added line #L68 was not covered by tests
return atoms

def _sort_interaction(atoms, name):
if name.startswith('angle'):
return (atoms[1], tuple(atoms))

Check warning on line 73 in vermouth/gmx/itp.py

View check run for this annotation

Codecov / codecov/patch

vermouth/gmx/itp.py#L73

Added line #L73 was not covered by tests
else: return (min(atoms), tuple(atoms))


def write_molecule_itp(molecule, outfile, header=(), moltype=None,
post_section_lines=None, pre_section_lines=None):
"""
Expand Down Expand Up @@ -111,10 +130,16 @@
# Make sure the molecule contains the information required to write the
# [atoms] section. The charge and mass can be ommited, if so gromacs take
# them from the [atomtypes] section of the ITP file.
atoms_with_issues = defaultdict(list)
for attribute in ('atype', 'resid', 'resname', 'atomname',
'charge_group'):
if not all([attribute in atom for _, atom in molecule.atoms]):
raise ValueError('Not all atom have a {}.'.format(attribute))
for _, atom in molecule.atoms:
if attribute not in atom:
atoms_with_issues[attribute].append(atom)

Check warning on line 138 in vermouth/gmx/itp.py

View check run for this annotation

Codecov / codecov/patch

vermouth/gmx/itp.py#L138

Added line #L138 was not covered by tests
if atoms_with_issues:
for attr, atoms in atoms_with_issues.items():
LOGGER.error('The following atoms do not have a {}: {}', attr, atoms)
raise ValueError("Some atoms are missing required attributes.")

Check warning on line 142 in vermouth/gmx/itp.py

View check run for this annotation

Codecov / codecov/patch

vermouth/gmx/itp.py#L141-L142

Added lines #L141 - L142 were not covered by tests

# Get the maximum length of each atom field so we can align the fields.
# Atom indexes are written as a consecutive series starting from 1.
Expand Down Expand Up @@ -193,10 +218,12 @@
# object to distinguish them from the proper dihedrals. Yet, they
# should be written under the [ dihedrals ] section of the ITP file.
if name == 'impropers':
name = 'dihedrals'
outfile.write('[ {} ]\n'.format(name))
section_name = 'dihedrals'

Check warning on line 221 in vermouth/gmx/itp.py

View check run for this annotation

Codecov / codecov/patch

vermouth/gmx/itp.py#L221

Added line #L221 was not covered by tests
else:
section_name = name
outfile.write('[ {} ]\n'.format(section_name))
seen_sections.add(name)
for line in pre_section_lines.get(name, []):
for line in pre_section_lines.get(section_name, []):
outfile.write(line + '\n')
interactions_group_sorted = sorted(
interactions,
Expand All @@ -212,11 +239,12 @@
outfile.write('{} {}\n'.format(conditional_key, conditional[0]))
if group:
outfile.write('; {}\n'.format(group))
interactions_in_group = sorted(interactions_in_group, key=lambda i: _sort_interaction(_sort_atoms(list(map(correspondence.get, i.atoms)), name), name))
for interaction in interactions_in_group:
atoms = ['{atom_idx:>{max_length[idx]}}'
.format(atom_idx=correspondence[x],
.format(atom_idx=x,
max_length=max_length)
for x in interaction.atoms]
for x in _sort_atoms(list(map(correspondence.get, interaction.atoms)), name)]
parameters = ' '.join(str(x) for x in interaction.parameters)
comment = ''
if 'comment' in interaction.meta:
Expand Down
191 changes: 77 additions & 114 deletions vermouth/gmx/rtp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
"""

import collections
import itertools
import copy
import networkx as nx

from ..molecule import Block, Link, Interaction
from .. import utils

from ..log_helpers import StyleAdapter, get_logger
LOGGER = StyleAdapter(get_logger(__name__))

__all__ = ['read_rtp']

Expand Down Expand Up @@ -192,6 +194,7 @@ def wrapped(subsection, block):
interactions.append(Interaction(atoms=atoms,
parameters=parameters,
meta={}))
block.add_nodes_from(atoms)
block.interactions[interaction_name] = interactions
return wrapped

Expand All @@ -200,6 +203,7 @@ def _parse_bondedtypes(section):
# Default taken from
# 'src/gromacs/gmxpreprocess/resall.cpp::read_resall' in the Gromacs
# source code.

defaults = _BondedTypes(bonds=1, angles=1, dihedrals=1,
impropers=1, all_dihedrals=0,
nrexcl=3, HH14=1, remove_dih=1)
Expand Down Expand Up @@ -233,18 +237,6 @@ def _parse_bondedtypes(section):
return bondedtypes


def _count_hydrogens(names):
return len([name for name in names if utils.first_alpha(name) == 'H'])


def _keep_dihedral(center, block, bondedtypes):
if (not bondedtypes.all_dihedrals) and block.has_dihedral_around(center):
return False
if bondedtypes.remove_dih and block.has_improper_around(center):
return False
return True


def _complete_block(block, bondedtypes):
"""
Add information from the bondedtypes section to a block.
Expand All @@ -254,28 +246,6 @@ def _complete_block(block, bondedtypes):
"""
block.make_edges_from_interactions()

# Generate missing dihedrals
# As pdb2gmx generates all the possible dihedral angles by default,
# RTP files are written assuming they will be generated. A RTP file
# have some control over these dihedral angles through the bondedtypes
# section.
all_dihedrals = []
for center, dihedrals in itertools.groupby(
sorted(block.guess_dihedrals(), key=_dihedral_sorted_center),
_dihedral_sorted_center):
if _keep_dihedral(center, block, bondedtypes):
# TODO: Also sort the dihedrals by index.
# See src/gromacs/gmxpreprocess/gen_add.cpp::dcomp in the
# Gromacs source code (see version 2016.3 for instance).
atoms = sorted(dihedrals, key=_count_hydrogens)[0]
all_dihedrals.append(Interaction(atoms=atoms, parameters=[], meta={}))
# TODO: Sort the dihedrals by index
block.interactions['dihedrals'] = (
block.interactions.get('dihedrals', []) + all_dihedrals
)

# TODO: generate 1-4 interactions between pairs of hydrogen atoms

# Add function types to the interaction parameters. This is done as a
# post processing step to cluster as much interaction specific code
# into this method.
Expand All @@ -292,9 +262,9 @@ def _complete_block(block, bondedtypes):
'exclusions': 1,
'cmap': 1,
}
for name, interactions in block.interactions.items():
for interaction in interactions:
interaction.parameters.insert(0, functypes[name])
for functype in functypes:
for interaction in block.interactions[functype]:
interaction.parameters.insert(0, functypes[functype])

# Set the nrexcl to the block.
block.nrexcl = bondedtypes.nrexcl
Expand Down Expand Up @@ -325,12 +295,15 @@ def _split_blocks_and_links(pre_blocks):
Split an individual pre-block into a block and a link.
"""
blocks = {}
links = []
all_links = []
for name, pre_block in pre_blocks.items():
block, link = _split_block_and_link(pre_block)
block, links = _split_block_and_link(pre_block)
blocks[name] = block
links.append(link)
return blocks, links
if links:
for link in links:
if link:
all_links.append(link)
return blocks, all_links


def _split_block_and_link(pre_block):
Expand All @@ -351,7 +324,7 @@ def _split_block_and_link(pre_block):
-------
block: Block
All the intra-residue information.
link: Link
links: List[Link]
All the inter-residues information.
"""
block = Block(force_field=pre_block.force_field)
Expand All @@ -370,44 +343,29 @@ def _split_block_and_link(pre_block):
except AttributeError:
pass

# Filter the particles from neighboring residues out of the block.
for atom in pre_block.atoms:
if not atom['atomname'].startswith('+-'):
atom['resname'] = pre_block.name
block.add_atom(atom)
link.add_node(atom['atomname'])

# Create the edges of the link and block based on the edges in the pre-block.
# This will create too many edges in the link, but the useless ones will be
# pruned latter.
link.add_edges_from(pre_block.edges)
block.add_edges_from(edge for edge in pre_block.edges
if not any(node[0] in '+-' for node in edge))

# Split the interactions from the pre-block between the block (for
# intra-residue interactions) and the link (for inter-residues ones).
# The "relevant_atoms" set keeps track of what particles are
# involved in the link. This will allow to prune the link without
# iterating again through its interactions.
relevant_atoms = set()
for name, interactions in pre_block.interactions.items():
links = []

for interaction_type, interactions in pre_block.interactions.items():
for interaction in interactions:
for_link = any(atom[0] in '+-' for atom in interaction.atoms)
if for_link:
link.interactions[name].append(interaction)
relevant_atoms.update(interaction.atoms)
else:
block.interactions[name].append(interaction)

# Prune the link to keep only the edges and particles that are
# relevant.
nodes = set(link.nodes())
link.remove_nodes_from(nodes - relevant_atoms)
# Some interactions do not generate nodes (impropers for instance). If a
# node is only described in one of such interactions, then the node never
# appears in the link. Here we make sure these nodes exists even if they
# are note connected.
link.add_nodes_from(relevant_atoms)
# Atoms that are not defined in the atoms section but only in interactions
# (because they're part of the neighbours), don't have an atomname
atomnames = [pre_block.nodes[n_idx].get('atomname') or n_idx for n_idx in interaction.atoms]
if any(atomname[0] in '+-' for atomname in atomnames):
link = Link(pre_block.subgraph(interaction.atoms))
if not nx.is_connected(link):
LOGGER.info('Discarding {} between atoms {} for link'
' defined in residue {} in force field {}'
' because it is disconnected.',
interaction_type, ' '.join(interaction.atoms),
pre_block.name, pre_block.force_field.name,
type='inconsistent-data')
continue
links.append(link)

block = pre_block
block.remove_nodes_from([n for n in pre_block if pre_block.nodes[n].get('atomname', n)[0] in '+-'])
for node in block:
block.nodes[node]['resname'] = block.name

# Atoms from a links are matched against a molecule based on its node
# attributes. The name is a primary criterion, but other criteria can be
Expand All @@ -419,39 +377,48 @@ def _split_block_and_link(pre_block):
# RTP files convey the order by prefixing the names with + or -. We need to
# get rid of these prefixes.
order = {'+': +1, '-': -1}
relabel_mapping = {}
for idx, node in enumerate(link.nodes()):
atomname = node
if node[0] in '+-':
link.nodes[node]['order'] = order[node[0]]
atomname = atomname[1:]
else:
link.nodes[node]['order'] = 0
link.nodes[node]['resname'] = block.name
link.nodes[node]['atomname'] = atomname
relabel_mapping[node] = idx
nx.relabel_nodes(link, relabel_mapping, copy=False)

# By relabelling the nodes, we lost the relations between interactions and
# nodes, so we need to relabel the atoms in the interactions
new_interactions = collections.defaultdict(list)
for name, interactions in link.interactions.items():
for interaction in interactions:
atoms = tuple(relabel_mapping[atom] for atom in interaction.atoms)
new_interactions[name].append(Interaction(
atoms=atoms,
parameters=interaction.parameters,
meta=interaction.meta
))
link.interactions = new_interactions

for link in links:
relabel_mapping = {}
# (Deep)copy interactions, since relabel_nodes removes/adds nodes, which
# wipes out all interactions otherwise.
interactions = copy.deepcopy(link.interactions)
remove_attrs = 'atype charge charge_group'.split()
for idx, node in enumerate(link.nodes()):
atomname = node
if node[0] in '+-':
link.nodes[node]['order'] = order[node[0]]
atomname = atomname[1:]
else:
link.nodes[node]['order'] = 0
link.nodes[node]['resname'] = block.name
for attr in remove_attrs:
if attr in link.nodes[node]:
del link.nodes[node][attr]
link.nodes[node]['atomname'] = atomname
relabel_mapping[node] = idx
nx.relabel_nodes(link, relabel_mapping, copy=False)

# By relabelling the nodes, we lost the relations between interactions and
# nodes, so we need to relabel the atoms in the interactions
new_interactions = collections.defaultdict(list)
for name, interactions in interactions.items():
for interaction in interactions:
atoms = tuple(relabel_mapping[atom] for atom in interaction.atoms)
if not any(link.nodes[node]['order'] for node in atoms):
continue
new_interactions[name].append(Interaction(
atoms=atoms,
parameters=interaction.parameters,
meta=interaction.meta
))
link.interactions = new_interactions
link.interactions = dict(link.interactions)

# Revert the interactions back to regular dicts to avoid creating
# keys when querying them.
block.interactions = dict(block.interactions)
link.interactions = dict(link.interactions)

return block, link
return block, links


def _clean_lines(lines):
Expand All @@ -462,11 +429,6 @@ def _clean_lines(lines):
yield splitted[0]


def _dihedral_sorted_center(atoms):
#return sorted(atoms[1:-1])
return atoms[1:-1]


def read_rtp(lines, force_field):
"""
Read blocks and links from a Gromacs RTP file to populate a force field
Expand Down Expand Up @@ -522,3 +484,4 @@ def read_rtp(lines, force_field):

force_field.blocks.update(blocks)
force_field.links.extend(links)
force_field.variables['bondedtypes'] = bondedtypes
Loading
Loading