diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..b465c1cb0 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,29 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3" + +formats: + - pdf + +# Build documentation in the docs/ directory with Sphinx +sphinx: + builder: html + fail_on_warning: true + configuration: doc/source/conf.py + +# We recommend specifying your dependencies to enable reproducible builds: +# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - method: pip + path: . + - requirements: requirements-docs.txt \ No newline at end of file diff --git a/README.md b/README.md index 77305b22b..015a6baef 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,11 @@ The documentation of the vermouth python library will come soon. year={2022}} ``` +## Documentation + +More complete documentation, including API documentation can be found at +https://vermouth-martinize.readthedocs.io/ + ## License Martinize2 and vermouth are distributed under the Apache 2.0 license. diff --git a/bin/martinize2 b/bin/martinize2 index fb4d620a5..2dde3aa9a 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -797,22 +797,22 @@ def entry(): if args.list_blocks: print("The following Blocks are known to force field {}:".format(args.from_ff)) - print(", ".join(known_force_fields[args.from_ff].blocks)) + print(", ".join(sorted(known_force_fields[args.from_ff].blocks))) print( "The following Modifications are known to force field {}:".format( args.from_ff ) ) - print(", ".join(known_force_fields[args.from_ff].modifications)) + print(", ".join(sorted(known_force_fields[args.from_ff].modifications))) print() print("The following Blocks are known to force field {}:".format(args.to_ff)) - print(", ".join(known_force_fields[args.to_ff].blocks)) + print(", ".join(sorted(known_force_fields[args.to_ff].blocks))) print( "The following Modifications are known to force field {}:".format( args.to_ff ) ) - print(", ".join(known_force_fields[args.to_ff].modifications)) + print(", ".join(sorted(known_force_fields[args.to_ff].modifications))) parser.exit() if args.elastic and args.govs_includes: diff --git a/doc/source/conf.py b/doc/source/conf.py index 318342a0c..893bb4a55 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -32,8 +32,8 @@ # The full version, including alpha/beta/rc tags release = get_distribution('vermouth').version # The short X.Y version -version = '.'.join(release.split('.')[:2]) - +# version = '.'.join(release.split('.')[:2]) +version = release # -- General configuration --------------------------------------------------- diff --git a/doc/source/data.rst b/doc/source/data.rst index e02d766d1..5eb53aca5 100644 --- a/doc/source/data.rst +++ b/doc/source/data.rst @@ -63,7 +63,7 @@ method. The approach where links only affect the parameters where they depend on the local structure makes it easier to reason about how the final topology is constructed, and the performance is better. -Besides nodes, edges and interactions links also describe non-edges, patterns +Besides nodes, edges, and interactions, links also describe non-edges, patterns and removed interactions. Non-edges and patterns are used when matching the link to a molecule. Where there is a non-edge in the link there cannot be an edge in the molecule, and the atoms involved do not need to be present in the molecule. @@ -83,7 +83,9 @@ atoms/particles that should already be described by the block and atoms that are only described by the modification. A modification can add or remove nodes, change node attributes, and add, change, -or remove interactions; much like a `Link`_. +or remove interactions; much like a `Link`_. Note that a modification *must* always +add at least one node. Otherwise there will be no unidentified nodes to be picked +up by the processor. Modifications can be defined through :ref:`.ff files `. See also: :ref:`Identify modifications `. @@ -101,11 +103,6 @@ Note that this is only a subset of a force field in the MD sense: a VerMoUTH non-bonded parameters (only the particle types are included), or functional forms. -The ``universal`` force field deserves special mention. If not overridden with -the ``-from`` flag this force field is used. This force field does not define -any MD parameters, but this is fine. Instead, this force field defines only atom -names and the associated connections. - Mapping ------- A :class:`~vermouth.map_parser.Mapping` describes how molecular fragments can diff --git a/doc/source/file_formats.rst b/doc/source/file_formats.rst index 3997466e7..73e4b0a6e 100644 --- a/doc/source/file_formats.rst +++ b/doc/source/file_formats.rst @@ -1,8 +1,8 @@ File formats ============ VerMoUTH introduces two new file formats. The ``.ff`` format for defining -:ref:`blocks `, :ref:`links ` and : -ref:`modifications `. Note that you can also define blocks +:ref:`blocks `, :ref:`links ` and +:ref:`modifications `. Note that you can also define blocks (and basic links) with Gromacs ``.itp`` and ``.rtp`` files. The ``.mapping`` format can be used to define :ref:`mappings `. Mappings that don't cross residue boundaries can also be defined using ``.map`` files. diff --git a/doc/source/general_overview.rst b/doc/source/general_overview.rst index a0bb7456c..c67dcb32d 100644 --- a/doc/source/general_overview.rst +++ b/doc/source/general_overview.rst @@ -26,18 +26,14 @@ pip. pip install vermouth -The behavior of the ``pip`` command can vary depending on the specificity of your +The behavior of the ``pip`` command can vary depending on the specifics of your python installation. See the `documentation on installing a python package `_ to learn more. -Vermouth has `SciPy `_ as *optional* dependency. If available -it will be used to accelerate the distance calculations when `making bonds -`_ - Quickstart ---------- -The CLI of martinize2 is very similar to that of martinize1, and can often be +The CLI of martinize2 is very similar to that of [martinize1]_, and can often be used as a drop-in replacement. For example: .. code-block:: bash @@ -45,7 +41,7 @@ used as a drop-in replacement. For example: martinize2 -f lysozyme.pdb -x cg_protein.pdb -o topol.top -ff martini3001 -dssp -elastic -This will read an atomistic ``lysozyme.pdb`` and produce a Martini3_ compatible +This will read an atomistic ``lysozyme.pdb`` and produce a [Martini3]_ compatible structure and topology at ``cg_protein.pdb`` and ``topol.top`` respectively. It will use the program [DSSP]_ to determine the proteins secondary structure (which influences the topology), and produce an elastic network. See ``martinize2 -h`` @@ -89,7 +85,8 @@ Kroon, P.C. (2020). Martinize 2 -- VerMoUTH. *Aggregate, automate, assemble* (pp References ---------- -.. [Martini3] P.C.T. Souza, R. Alessandri, J. Barnoud, S. Thallmair, I. Faustino, F. Grünewald, et al., Martini 3: a general purpose force field for coarse-grained molecular dynamics, Nat. Methods. 18 (2021) 382–388. doi:10.1038/s41592-021-01098-3. -.. [VMD] W. Humphrey, A. Dalke and K. Schulten, "VMD - Visual Molecular Dynamics", J. Molec. Graphics, 1996, vol. 14, pp. 33-38. http://www.ks.uiuc.edu/Research/vmd/. -.. [DSSP] - W.G. Touw, C. Baakman, J. Black, T.A.H. te Beek, E. Krieger, R.P. Joosten, et al., A series of PDB-related databanks for everyday needs, Nucleic Acids Res. 43 (2015) D364–D368. doi:10.1093/nar/gku1028. - - W. Kabsch, C. Sander, Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features., Biopolymers. 22 (1983) 2577–637. doi:10.1002/bip.360221211. \ No newline at end of file +.. [Martini3] P.C.T. Souza, R. Alessandri, J. Barnoud, S. Thallmair, I. Faustino, F. Grünewald, et al., Martini 3: a general purpose force field for coarse-grained molecular dynamics, Nat. Methods. 18 (2021) 382–388. https://doi.org/10.1038/s41592-021-01098-3 +.. [martinize1] de Jong, D. H., Singh, G., Bennett, W. F. D., Arnarez, C., Wassenaar, T. a, Schäfer, L. v., Periole, X., Tieleman, D. P., & Marrink, S. J. (2013). Improved Parameters for the Martini Coarse-Grained Protein Force Field. Journal of Chemical Theory and Computation, 9(1), 687–697. https://doi.org/10.1021/ct300646g +.. [VMD] W. Humphrey, A. Dalke and K. Schulten, "VMD - Visual Molecular Dynamics", J. Molec. Graphics, 1996, vol. 14, pp. 33-38. http://www.ks.uiuc.edu/Research/vmd/ +.. [DSSP] - W.G. Touw, C. Baakman, J. Black, T.A.H. te Beek, E. Krieger, R.P. Joosten, et al., A series of PDB-related databanks for everyday needs, Nucleic Acids Res. 43 (2015) D364–D368. https://doi.org/10.1093/nar/gku1028 + - W. Kabsch, C. Sander, Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features., Biopolymers. 22 (1983) 2577–637. https://doi.org/10.1002/bip.360221211 \ No newline at end of file diff --git a/doc/source/graph_algorithms.rst b/doc/source/graph_algorithms.rst index 6411d2f5c..6c8e7761d 100644 --- a/doc/source/graph_algorithms.rst +++ b/doc/source/graph_algorithms.rst @@ -47,7 +47,7 @@ the equivalence criteria. Subgraph isomorphism ++++++++++++++++++++ A subgraph isomorphism is a :ref:`graph_algorithms:graph isomorphism`, but -without the constraint that :math:`|H| = |G|`. Instead, :math:`|H| <= |G|` if +without the constraint that :math:`|H| = |G|`. Instead, :math:`|H| \le |G|` if :math:`H` is subgraph isomorphic to :math:`G`. Induced subgraph isomorphism diff --git a/doc/source/martinize2_workflow.rst b/doc/source/martinize2_workflow.rst index 418b74547..f9655f307 100644 --- a/doc/source/martinize2_workflow.rst +++ b/doc/source/martinize2_workflow.rst @@ -46,7 +46,7 @@ We take into account the following PDB records: ``MODEL`` and ``ENDMDL`` to determine which model to parse; ``ATOM`` and ``HETATM``; ``TER``, which can be used to separate molecules; ``CONECT``, which is used to add edges; and ``END``. -Will issue a ``pdb-alternate`` warning if any atoms in the PDB file have an +We issue a ``pdb-alternate`` warning if any atoms in the PDB file have an alternate conformation that is not 'A', since those will always be ignored. Relevant CLI options: ``-f``; ``-model``; ``-ignore``; ``-ignh``. @@ -67,28 +67,28 @@ same name, nor when there is no :ref:`data:Block` corresponding to the residue [#]_. Note that this will only ever create edges *within* residues. Edges will be added based on distance when they are close enough together, -except for a few exceptions (below). Atoms will be considered close enough based +except for a few exceptions (see below). Atoms will be considered close enough based on their element (taken from either the PDB file directly, or deduced from atom name [#]_). The distance threshold is multiplied by ``-bonds-fudge`` to allow for conformations that are slightly out-of-equilibrium. Edges will not be added from distances in two cases: 1) if edges could be added based on atom names no edges will be added between atoms that are not bonded in the reference -:ref:`data:Block`. 2) No edges will be added between residues if one of the -atoms involved is a hydrogen atom. Edges added this way are logged as debug -output. +:ref:`data:Block`. 2) If the edge would connect 2 residues, and at least one of +the atoms involved is a hydrogen atom. Edges added based on distance are logged +as debug output. If your input structure is far from equilibrium and adding edges based on distance is likely to produce erroneous results, make sure to provide ``CONECT`` records describing at least the edges between residues, and between atoms involved in modifications, such as termini and PTMs. -Will issue a ``general`` warning when it is requested to add edges based on atom +We issue a ``general`` warning when it is requested to add edges based on atom names, but this cannot be done for some reason. This commonly happens when your input structure is a homo multimer without ``TER`` record and identical residue numbers and chain identifiers across the monomers. In this case martinize2 cannot distinguish the atom "N", residue ALA1, chain "A" from the atom "N", -residue ALA1, chain "A" in the next monomer. The easiest solution is to place -strategic ``TER`` records in your PDB file. +residue ALA1, chain "A" in the next monomer. The easiest solution in this case +is to place strategic ``TER`` records in your PDB file. Relevant CLI options: ``-bond-from``; ``-bonds-fudge`` @@ -96,7 +96,7 @@ Relevant CLI options: ``-bond-from``; ``-bonds-fudge`` .. [#] The method for deriving the element from an atom name is extremely simplistic: the first letter is used. This will go wrong for two-letter elements such as 'Fe', 'Cl', and 'Cu'. In those cases, make sure your PDB - file specified the correct element. See also: + file specifies the correct element. See also: :func:`~vermouth.graph_utils.add_element_attr` Annotate mutations and modifications @@ -110,8 +110,8 @@ PTMs and termini. This is done in part by The ``-mutate`` option can be used to change the residue name of one or more residues. For example, you can specify ``-mutate PHE42:ALA`` to mutate all residues with residue name "PHE" and residue number 42 to "ALA". Or change all -"HSE" residues to "HIS": ``-mutate HSE:HIS``. Mutations can be specified in a -similar way. +"HSE" residues to "HIS": ``-mutate HSE:HIS``. Modifications can be specified in +a similar way. The specifications ``nter`` and ``cter`` can be used to quickly refer to all N- and C-terminal residues respectively [#]_. In addition, the CLI options @@ -127,8 +127,9 @@ Relevant CLI options: ``-mutate``, ``-modify``, ``-nter``, ``-cter``, ``-nt`` .. [#] N- and C-termini are defined as residues with 1 neighbour and having a higher or lower residue number than the neighbour, respectively. Note that - this does not include zwitterionic amino acids! - This also means that if your protein has a chain break you'll end up with + this definition also includes termini for non-proteins, but it does not + include zwitterionic amino acids! + This also means that if your polymer has a chain break you'll end up with more termini than you would otherwise expect. 2) Repair the input graph @@ -140,7 +141,7 @@ modifications such as PTMs. Repair graph ------------ The first step is to complete the graph so that it contains all atoms described -by the reference :ref:`data:Block`, and that all atoms have the correct names. +by the reference :ref:`data:Block`, and so that all atoms have the correct names. These blocks are taken from the input force field based on residue names (taking any mutations and modifications into account). :class:`~vermouth.processors.repair_graph.RepairGraph` takes care of all this. @@ -161,18 +162,19 @@ found. This sorting also speeds up the calculation significantly, so if you're working with a system containing large residues consider correcting some of the atom names. -Will issue an ``unknown-residue`` warning if no Block can be retrieved for a +We issue an ``unknown-residue`` warning if no :ref:`data:Block` can be retrieved for a given residue name. In this case the entire molecule will be removed from the system. Identify modifications ---------------------- -Secondly, all modifications are identified. `Repair graph`_ will also tag all +Secondly, all modifications are identified. `Repair graph`_ also tags all atoms it did not recognise, and those are processed by :class:`~vermouth.processors.canonicalize_modifications.CanonicalizeModifications`. -This is done by finding the solution where all unknown atoms are covered by the -atoms of exactly one :ref:`data:Modification`, where the modification must be an +Modifications are identified by finding the solution where all tagged atoms are +covered by the atoms of exactly one :ref:`data:Modification`, where the +modification must be an :ref:`induced subgraph ` of the molecule. Every modification must contain at least one "anchoring" atom, which is an atom that is also described by a :ref:`data:Block`. Unknown atoms are @@ -182,7 +184,7 @@ equal if their atom name is equal. Because modifications must be input structure there can be no missing atoms! After this step all atoms will have correct atom names, and any residues that -are include modifications will be labelled. This information is later used +include modifications will be labelled. This information is later used during the :ref:`resolution transformation ` An ``unknown-input`` warning will be issued if a modification cannot be @@ -199,7 +201,7 @@ The resolution transformation is done by your molecules at the target resolution, based on the available mappings. These mappings are read from the ``.map`` and ``.mapping`` files available in the library [#]_. See also :ref:`file_formats:File formats`. In essence these -mappings describe how molecular fragments (atoms and bonds) correspond to a +mappings describe how molecular fragments (nodes and edges) correspond to a block in the target force field. We find all the ways these mappings can fit onto the input molecule, and add the corresponding blocks and modifications to the resulting molecule. @@ -217,7 +219,7 @@ particles they map to in the output force field will also be connected. Interactions across separate blocks will be added in the next step. The processor will do some sanity checking on the resulting molecule, and issue -an ``unmapped-atom`` warning if there are modifications in the input molecule +an ``unmapped-atom`` warning if there are atoms in the input molecule for which no mapping can be found. In addition, this warning will also be issued if there are any non-hydrogen atoms that are not mapped to the output molecule. A more serious ``inconsistent-data`` warning will be issued for the following diff --git a/doc/source/processors.rst b/doc/source/processors.rst index 3ca9a69fc..e53c0425a 100644 --- a/doc/source/processors.rst +++ b/doc/source/processors.rst @@ -1,2 +1,18 @@ Processor ========= +:class:`Processors ` are relatively +simple. They form the fundamental steps of the martinize2 pipeline. Processors +are called via their :meth:`~vermouth.processors.processor.Processor.run_system` +method. The default implementation of this method iterates over the molecules +in the system, and runs the :meth:`~vermouth.processors.processor.Processor.run_molecule` +method on them. This means that implementations of Processors must implement +either a ``run_system`` method, or a ``run_molecule`` method. If the processor +can be run on independent molecules the ``run_molecule`` method is preferred; +``run_system`` should be used only for cases where the problem at hand cannot +be separated in tasks-per-molecule. + +In their ``run_molecule`` method Processor implementations are free to either +modify :class:`molecules ` or create new ones. +Either way, they must return a :class:`~vermouth.molecule.Molecule`. The +``run_system`` will be called with a :class:`~vermouth.system.System`, which +will be modified in place. diff --git a/doc/source/tutorials/1_simple_protein_aa/index.rst b/doc/source/tutorials/1_simple_protein_aa/index.rst deleted file mode 100644 index a3c9cf81c..000000000 --- a/doc/source/tutorials/1_simple_protein_aa/index.rst +++ /dev/null @@ -1,2 +0,0 @@ -Atomistic protein in solution -============================= diff --git a/doc/source/tutorials/2_simple_protein_cg/index.rst b/doc/source/tutorials/2_simple_protein_cg/index.rst deleted file mode 100644 index e55b6f180..000000000 --- a/doc/source/tutorials/2_simple_protein_cg/index.rst +++ /dev/null @@ -1,2 +0,0 @@ -Coarse-grained protein in solution -================================== diff --git a/doc/source/tutorials/3_membrane_protein/index.rst b/doc/source/tutorials/3_membrane_protein/index.rst deleted file mode 100644 index 6913597cf..000000000 --- a/doc/source/tutorials/3_membrane_protein/index.rst +++ /dev/null @@ -1,3 +0,0 @@ -Transmembrane protein -===================== -With cholesterol in the membrane \ No newline at end of file diff --git a/doc/source/tutorials/4_branched_polymer/index.rst b/doc/source/tutorials/4_branched_polymer/index.rst deleted file mode 100644 index 401cb981b..000000000 --- a/doc/source/tutorials/4_branched_polymer/index.rst +++ /dev/null @@ -1,2 +0,0 @@ -PAMAM: a hyperbranched polymer -============================== diff --git a/doc/source/tutorials/5_glycosylated_protein/index.rst b/doc/source/tutorials/5_glycosylated_protein/index.rst deleted file mode 100644 index 4de05d7d4..000000000 --- a/doc/source/tutorials/5_glycosylated_protein/index.rst +++ /dev/null @@ -1,2 +0,0 @@ -A glycosylated protein -====================== \ No newline at end of file diff --git a/doc/source/tutorials/6_adding_residues_links/files/ala-sep-ala.pdb b/doc/source/tutorials/6_adding_residues_links/files/ala-sep-ala.pdb new file mode 100644 index 000000000..74aedbbae --- /dev/null +++ b/doc/source/tutorials/6_adding_residues_links/files/ala-sep-ala.pdb @@ -0,0 +1,76 @@ +ATOM 1 N ALA 2 5.892 6.590 0.772 1.00 0.00 N1+ +ATOM 2 CA ALA 2 6.787 5.479 0.424 1.00 0.00 C +ATOM 3 C ALA 2 7.358 5.632 -0.971 1.00 0.00 C +ATOM 4 O ALA 2 6.678 6.097 -1.907 1.00 0.00 O +ATOM 5 CB ALA 2 6.008 4.158 0.573 1.00 0.00 C +ATOM 6 HA ALA 2 7.626 5.500 1.145 1.00 0.00 H +ATOM 7 1HB ALA 2 5.609 4.032 1.598 1.00 0.00 H +ATOM 8 2HB ALA 2 5.143 4.107 -0.117 1.00 0.00 H +ATOM 9 3HB ALA 2 6.644 3.281 0.368 1.00 0.00 H +ATOM 10 H01 ALA 2 5.554 6.479 1.717 1.00 0.00 H +ATOM 11 H02 ALA 2 6.398 7.464 0.706 1.00 0.00 H +ATOM 12 H03 ALA 2 5.108 6.609 0.131 1.00 0.00 H +ATOM 13 N SER 3 8.566 5.235 -1.183 1.00 0.00 N +ATOM 14 CA SER 3 9.087 4.998 -2.535 1.00 0.00 C +ATOM 15 C SER 3 10.146 3.913 -2.485 1.00 0.00 C +ATOM 16 O SER 3 11.339 4.171 -2.285 1.00 0.00 O +ATOM 17 CB SER 3 9.611 6.290 -3.232 1.00 0.00 C +ATOM 18 OG SER 3 10.708 6.960 -2.686 1.00 0.00 O +ATOM 19 P SER 3 10.949 7.716 -1.502 1.00 0.00 P +ATOM 20 O1 SER 3 11.988 7.174 -0.832 1.00 0.00 O +ATOM 21 O2 SER 3 9.680 8.015 -0.948 1.00 0.00 O +ATOM 22 O3 SER 3 11.233 9.084 -1.836 1.00 0.00 O1- +ATOM 23 H SER 3 9.168 5.042 -0.389 1.00 0.00 H +ATOM 24 HA SER 3 8.260 4.620 -3.166 1.00 0.00 H +ATOM 25 HB1 SER 3 8.790 7.006 -3.164 1.00 0.00 H +ATOM 26 HB2 SER 3 9.894 6.000 -4.252 1.00 0.00 H +ATOM 27 N ALA 4 9.773 2.677 -2.627 1.00 0.00 N +ATOM 28 CA ALA 4 10.719 1.563 -2.713 1.00 0.00 C +ATOM 29 C ALA 4 10.123 0.422 -3.507 1.00 0.00 C +ATOM 30 O ALA 4 10.604 0.050 -4.582 1.00 0.00 O +ATOM 31 CB ALA 4 11.101 1.150 -1.277 1.00 0.00 C +ATOM 32 O01 ALA 4 8.965 -0.245 -2.997 1.00 0.00 O1- +ATOM 33 H ALA 4 8.784 2.472 -2.691 1.00 0.00 H +ATOM 34 HA ALA 4 11.626 1.879 -3.258 1.00 0.00 H +ATOM 35 1HB ALA 4 11.557 1.994 -0.721 1.00 0.00 H +ATOM 36 2HB ALA 4 10.222 0.818 -0.694 1.00 0.00 H +ATOM 37 3HB ALA 4 11.835 0.329 -1.266 1.00 0.00 H +TER +CONECT 1 2 10 11 12 +CONECT 2 1 3 5 6 +CONECT 3 2 4 13 +CONECT 4 3 +CONECT 5 2 7 8 9 +CONECT 6 2 +CONECT 7 5 +CONECT 8 5 +CONECT 9 5 +CONECT 10 1 +CONECT 11 1 +CONECT 12 1 +CONECT 13 3 14 23 +CONECT 14 13 15 17 24 +CONECT 15 14 16 27 +CONECT 16 15 +CONECT 17 14 18 25 26 +CONECT 18 17 19 +CONECT 19 18 20 21 22 +CONECT 20 19 +CONECT 21 19 +CONECT 22 19 +CONECT 23 13 +CONECT 24 14 +CONECT 25 17 +CONECT 26 17 +CONECT 27 15 28 33 +CONECT 28 27 29 31 34 +CONECT 29 28 30 32 +CONECT 30 29 +CONECT 31 28 35 36 37 +CONECT 32 29 +CONECT 33 27 +CONECT 34 28 +CONECT 35 31 +CONECT 36 31 +CONECT 37 31 +END diff --git a/doc/source/tutorials/6_adding_residues_links/files/force_fields/charmm/sep.ff b/doc/source/tutorials/6_adding_residues_links/files/force_fields/charmm/sep.ff new file mode 100644 index 000000000..f367ae290 --- /dev/null +++ b/doc/source/tutorials/6_adding_residues_links/files/force_fields/charmm/sep.ff @@ -0,0 +1,33 @@ +[ moleculetype ] +SEP 3 + +[ atoms ] + 1 N 1 SEP N 1 0 + 2 H 1 SEP HN 2 0 + 3 C 1 SEP CA 3 0 + 4 H 1 SEP HA 4 0 + 5 C 1 SEP CB 5 0 + 6 H 1 SEP HB1 6 0 + 7 H 1 SEP HB2 7 0 + 8 O 1 SEP OG 8 0 + 9 C 1 SEP C 9 0 +10 O 1 SEP O 10 0 +11 P 1 SEP P 11 0 +12 O 1 SEP O1 12 0 +13 O 1 SEP O2 13 0 +14 O 1 SEP O3 14 -1 + +[ bonds ] + 3 5 + 5 8 + 1 2 + 1 3 + 3 9 + 3 4 + 5 6 + 5 7 + 9 10 + 8 11 +11 12 +11 13 +11 14 diff --git a/doc/source/tutorials/6_adding_residues_links/files/force_fields/charmm/sep.rtp b/doc/source/tutorials/6_adding_residues_links/files/force_fields/charmm/sep.rtp new file mode 100644 index 000000000..1c4aa2dfc --- /dev/null +++ b/doc/source/tutorials/6_adding_residues_links/files/force_fields/charmm/sep.rtp @@ -0,0 +1,56 @@ +[ bondedtypes ] +; Column 1 : default bondtype +; Column 2 : default angletype +; Column 3 : default proper dihedraltype +; Column 4 : default improper dihedraltype +; Column 5 : This controls the generation of dihedrals from the bonding. +; All possible dihedrals are generated automatically. A value of +; 1 here means that all these are retained. A value of +; 0 here requires generated dihedrals be removed if +; * there are any dihedrals on the same central atoms +; specified in the residue topology, or +; * there are other identical generated dihedrals +; sharing the same central atoms, or +; * there are other generated dihedrals sharing the +; same central bond that have fewer hydrogen atoms +; Column 6 : number of neighbors to exclude from non-bonded interactions +; Column 7 : 1 = generate 1,4 interactions between pairs of hydrogen atoms +; 0 = do not generate such +; Column 8 : 1 = remove proper dihedrals if found centered on the same +; bond as an improper dihedral +; 0 = do not generate such +; bondtype angletype dihedraltype impropertype all_dih nrexcl HH14 bRemoveDih + 1 5 9 2 1 3 1 0 + +[ SEP ] + [ atoms ] + N N 0 0 + HN H 0 1 + CA C 0 2 + HA H 0 3 + CB C 0 4 + HB1 H 0 5 + HB2 H 0 6 + OG O 0 7 + C C 0 8 + O O 0 9 + P P 0 10 + O1 O 0 11 + O2 O 0 12 + O3 O -1 13 + [ bonds ] + CB CA + OG CB + N HN + N CA + C CA + C +N + CA HA + CB HB1 + CB HB2 + O C + OG P + P O1 + P O2 + P O3 + diff --git a/doc/source/tutorials/6_adding_residues_links/files/force_fields/martini3001/sep.ff b/doc/source/tutorials/6_adding_residues_links/files/force_fields/martini3001/sep.ff new file mode 100644 index 000000000..86b5bb792 --- /dev/null +++ b/doc/source/tutorials/6_adding_residues_links/files/force_fields/martini3001/sep.ff @@ -0,0 +1,35 @@ +;[ macros ] +;protein_resnames "GLY|ALA|CYS|VAL|LEU|ILE|MET|PRO|HYP|ASN|GLN|ASP|ASPP|ASH|GLU|GLUP|GLH|THR|SER|LYS|LSN|LYN|ARG|HSE|HIS|HSD|HSP|HID|HIP|HIE|PHE|TYR|TRP|SEP" +;protein_resnames_non_pro "GLY|ALA|CYS|VAL|LEU|ILE|MET|ASN|GLN|ASP|ASPP|ASH|GLU|GLUP|GLH|THR|SER|LYS|LSN|LYN|ARG|HSE|HIS|HSD|HSP|HID|HIP|HIE|PHE|TYR|TRP|SEP" +;prot_default_bb_type P2 +;stiff_fc 1000000 + +;;; PHOSPHOSERINE +[ moleculetype ] +SEP 1 + +[ warning ] +THESE PARAMETERS ARE FOR DEMONSTRATION PURPOSES ONLY. DO NOT USE. + +[ atoms ] +; id type resnr residue atom cgnr charge + 1 P2 1 SEP BB 1 0 + 2 Q5n 1 SEP SC1 1 -1 + +[ bonds ] +BB SC1 1 0.33 5000 + +[ link ] +[ warning ] +THESE PARAMETERS ARE FOR DEMONSTRATION PURPOSES ONLY. DO NOT USE. +[ atoms ] +-BB {"resname": "ALA"} +BB {"resname": "SEP"} +SC1 {"resname": "SEP"} ++BB {"resname": "ALA"} +[ bonds ] +BB +BB 1 0.35 4000 +BB -BB 1 0.35 4000 +[ angles ] +-BB BB +BB 10 100 20 +-BB BB SC1 2 100 25 diff --git a/doc/source/tutorials/6_adding_residues_links/files/mappings/sep.charmm36.map b/doc/source/tutorials/6_adding_residues_links/files/mappings/sep.charmm36.map new file mode 100644 index 000000000..29871ec4b --- /dev/null +++ b/doc/source/tutorials/6_adding_residues_links/files/mappings/sep.charmm36.map @@ -0,0 +1,30 @@ +[ molecule ] +SEP + +[ from ] +charmm + +[ to ] +martini3001 + +[ martini ] +BB SC1 + +[ mapping ] +charmm + +[ atoms ] + 1 N BB + 2 HN BB + 3 CA BB + 4 HA !BB + 5 CB BB SC1 + 6 HB1 !SC1 + 7 HB2 !SC1 + 8 OG SC1 + 9 C BB +10 O BB +11 P SC1 +12 O1 SC1 +13 O2 SC1 +14 O3 SC1 diff --git a/doc/source/tutorials/6_adding_residues_links/index.rst b/doc/source/tutorials/6_adding_residues_links/index.rst index ac8802b71..effb5b135 100644 --- a/doc/source/tutorials/6_adding_residues_links/index.rst +++ b/doc/source/tutorials/6_adding_residues_links/index.rst @@ -1,2 +1,272 @@ Adding new residues and links ============================= +Occasionally you may need a topology containing a residue that is not yet +described by the force fields that ship with vermouth. In this case you will +need to create the required data files yourself, and point martinize2 to them +with the `-ff-dir` and `-map-dir` flags. The key thing to remember is that you +will need to add/edit *three* files. You need to describe your new residue in +the input force field (default charmm); the output force field, and the mapping +between the two. + +For this example we will add the required data files for a phosphorylated +serine residue (``OC(=O)C(N)COP(=O)(=O)[O-]``). Note that the parameters +presented here are for demonstration purposes only and not fit for actual +science or simulations! + +All input files for this tutorial can be found on `github `_. + +The input force field +--------------------- +The input force field is the force field best describing the structure and atom +names in your input PDB file. By default we use the charmm naming scheme. Since +the input force field will only be used to :ref:`repair ` +your input structure, only the atom names and edges are relevant. + +We'll start by creating a force fields folder we can use to create the tutorial +files; and in that folder we need to create a force field named ``charmm``:: + + mkdir -p force_fields/charmm + +Now we need to add the SEP :ref:`data:block` to our ``charmm`` folder. Let's +put it in the file ``force_fields/charmm/sep.ff``:: + + [ moleculetype ] + SEP 3 + + [ atoms ] + 1 N 1 SEP N 1 0 + 2 H 1 SEP HN 2 0 + 3 C 1 SEP CA 3 0 + 4 H 1 SEP HA 4 0 + 5 C 1 SEP CB 5 0 + 6 H 1 SEP HB1 6 0 + 7 H 1 SEP HB2 7 0 + 8 O 1 SEP OG 8 0 + 9 C 1 SEP C 9 0 + 10 O 1 SEP O 10 0 + 11 P 1 SEP P 11 0 + 12 O 1 SEP O1 12 0 + 13 O 1 SEP O2 13 0 + 14 O 1 SEP O3 14 -1 + + [ bonds ] + 3 5 + 5 8 + 1 2 + 1 3 + 3 9 + 3 4 + 5 6 + 5 7 + 9 10 + 8 11 + 11 12 + 11 13 + 11 14 + +This file looks a lot like an ITP file, and if you have one of those you can +simply drop it in and use it as is. In this case we didn't have an ITP file +for SEP yet, so we had to make one. Since we only need atom names and bonds +that's all we provide. Note that we added all hydrogens. Finally, if you +prefer, you could also provide a RTP file instead of ITP. + +The output force field +---------------------- +We also need to add the SEP :ref:`data:block` to the output force field. Of +course we'll use Martini 3 for this. Let's again start by making a martini3001 +folder:: + + mkdir -p force_fields/martini3001 + +Now to add the block to ``force_fields/martini3001/sep.ff``:: + + ;;; PHOSPHOSERINE + [ moleculetype ] + SEP 1 + + ; THESE PARAMETERS ARE FOR DEMONSTRATION PURPOSES ONLY. DO NOT USE. + [ atoms ] + ; id type resnr residue atom cgnr charge + 1 P2 1 SEP BB 1 0 + 2 Q5n 1 SEP SC1 1 -1 + + [ bonds ] + BB SC1 1 0.33 5000 + +At this point we can run ``martinize2 -ff-dir force_fields -list-blocks`` to +check whether our new SEP blocks are picked up. + +The mapping +----------- +Finally, we need to add the mapping describing how to get from charmm to +martini3001. We need to make a folder:: + + mkdir mappings + +In that folder, make a file ``mappings/sep.charmm36.map``:: + + [ molecule ] + SEP + + [ from ] + charmm + + [ to ] + martini3001 + + [ martini ] + BB SC1 + + [ mapping ] + charmm + + [ atoms ] + 1 N BB + 2 HN BB + 3 CA BB + 4 HA !BB + 5 CB BB SC1 + 6 HB1 !SC1 + 7 HB2 !SC1 + 8 OG SC1 + 9 C BB + 10 O BB + 11 P SC1 + 12 O1 SC1 + 13 O2 SC1 + 14 O3 SC1 + +A few things are worth noting here. The HA, HB1, and HB2 atoms are mentioned +here, but their mapping weight is 0, due to the exclamation point. In addition, +CB will contribute to BB and SC1 with equal weight. + +Ok, this great! At this point we can run ``martinize2``:: + + martinize2 -ff-dir force_fields -map-dir mappings -f ala-sep-ala.pdb -x AJA.pdb -o topol.top + +And inspect the resulting ``molecule_0.itp`` to make sure our final topology is +correct:: + + [ moleculetype ] + molecule_0 1 + + [ atoms ] + 1 Q5 1 ALA BB 1 1 + 2 TC3 1 ALA SC1 2 0.0 + 3 P2 2 SEP BB 3 0.0 + 4 Q5n 2 SEP SC1 3 -1.0 + 5 Q5 3 ALA BB 4 -1 + 6 TC3 3 ALA SC1 5 0.0 + + [ bonds ] + 3 4 1 0.33 5000 + + #ifdef FLEXIBLE + ; Side chain bonds + 1 2 1 0.270 1000000 + 5 6 1 0.270 1000000 + #endif + + [ constraints ] + #ifndef FLEXIBLE + ; Side chain bonds + 1 2 1 0.270 + 5 6 1 0.270 + #endif + +We can see that we end up with the correct non-bonded parameters for our SEP +residue, the C- and N-termini are looking good, and we have the BB-SC1 bond we +specified. + +There is a problem though, there are no bonds (or constraints) connecting the +SEP residue to its neighbouring ALA residues! + +The Links +--------- +In Vermouth and martinize2 we use :ref:`links ` to describe interactions +between residues. We need to these to the output force field---in this case +martini3001. + +We can add the following to ``force_fields/martini3001/sep.ff``:: + + [ link ] + [ bonds ] + BB {"resname": "SEP"} +BB {"resname": "ALA"} 1 0.35 4000 + + [ link ] + [ bonds ] + BB {"resname": "SEP"} -BB {"resname": "ALA"} 1 0.35 4000 + + [ link ] + [ angles ] + -BB {"resname": "ALA"} BB {"resname": "SEP"} +BB {"resname": "ALA"} 10 100 20 + + [ link ] + [ angles ] + -BB BB {"resname": "SEP"} SC1 2 100 25 + +Links are small molecular fragments. For example, the first one consists of 2 +BB beads. The first one has to be part of a SEP residue, and the second has to +be part of an ALA residue. In addition, the ``+`` means the second BB has to +have a resid of exactly one higher than the first BB. In our example, this link +will apply a backbone bond between the SEP residue and ALA3. + +The second link is almost identical, and applies a backbone bond between ALA1 +and SEP. The two angles work in a similar fashion. + +This would result in the following topology:: + + [ moleculetype ] + molecule_0 1 + + [ atoms ] + 1 Q5 1 ALA BB 1 1 + 2 TC3 1 ALA SC1 2 0.0 + 3 P2 2 SEP BB 3 0.0 + 4 Q5n 2 SEP SC1 3 -1.0 + 5 Q5 3 ALA BB 4 -1 + 6 TC3 3 ALA SC1 5 0.0 + + [ bonds ] + 3 4 1 0.33 5000 + 3 5 1 0.35 4000 + 3 1 1 0.35 4000 + + #ifdef FLEXIBLE + ; Side chain bonds + 1 2 1 0.270 1000000 + 5 6 1 0.270 1000000 + #endif + + [ constraints ] + #ifndef FLEXIBLE + ; Side chain bonds + 1 2 1 0.270 + 5 6 1 0.270 + #endif + + [ angles ] + 1 3 5 10 100 20 + 1 3 4 2 100 25 + +We now have bonds between the backbone beads, as well as the 2 angles we need. +In this case, since we don't intend to use this residue for anything other than +an ALA-SEP-ALA peptide, we can combine these links:: + + [ link ] + [ atoms ] + -BB {"resname": "ALA"} + BB {"resname": "SEP"} + SC1 {"resname": "SEP"} + +BB {"resname": "ALA"} + [ bonds ] + BB +BB 1 0.35 4000 + BB -BB 1 0.35 4000 + [ angles ] + -BB BB +BB 10 100 20 + -BB BB SC1 2 100 25 + +Which will produce the exact same topology. +If you *do* need to add a residue that can be used in any kind of protein +please take a look at how the Martini 3 force field is implemented, and deals +with e.g. the secondary structure dependence. diff --git a/doc/source/tutorials/7_adding_modifications/files/ala-sep-ala.pdb b/doc/source/tutorials/7_adding_modifications/files/ala-sep-ala.pdb new file mode 100644 index 000000000..74aedbbae --- /dev/null +++ b/doc/source/tutorials/7_adding_modifications/files/ala-sep-ala.pdb @@ -0,0 +1,76 @@ +ATOM 1 N ALA 2 5.892 6.590 0.772 1.00 0.00 N1+ +ATOM 2 CA ALA 2 6.787 5.479 0.424 1.00 0.00 C +ATOM 3 C ALA 2 7.358 5.632 -0.971 1.00 0.00 C +ATOM 4 O ALA 2 6.678 6.097 -1.907 1.00 0.00 O +ATOM 5 CB ALA 2 6.008 4.158 0.573 1.00 0.00 C +ATOM 6 HA ALA 2 7.626 5.500 1.145 1.00 0.00 H +ATOM 7 1HB ALA 2 5.609 4.032 1.598 1.00 0.00 H +ATOM 8 2HB ALA 2 5.143 4.107 -0.117 1.00 0.00 H +ATOM 9 3HB ALA 2 6.644 3.281 0.368 1.00 0.00 H +ATOM 10 H01 ALA 2 5.554 6.479 1.717 1.00 0.00 H +ATOM 11 H02 ALA 2 6.398 7.464 0.706 1.00 0.00 H +ATOM 12 H03 ALA 2 5.108 6.609 0.131 1.00 0.00 H +ATOM 13 N SER 3 8.566 5.235 -1.183 1.00 0.00 N +ATOM 14 CA SER 3 9.087 4.998 -2.535 1.00 0.00 C +ATOM 15 C SER 3 10.146 3.913 -2.485 1.00 0.00 C +ATOM 16 O SER 3 11.339 4.171 -2.285 1.00 0.00 O +ATOM 17 CB SER 3 9.611 6.290 -3.232 1.00 0.00 C +ATOM 18 OG SER 3 10.708 6.960 -2.686 1.00 0.00 O +ATOM 19 P SER 3 10.949 7.716 -1.502 1.00 0.00 P +ATOM 20 O1 SER 3 11.988 7.174 -0.832 1.00 0.00 O +ATOM 21 O2 SER 3 9.680 8.015 -0.948 1.00 0.00 O +ATOM 22 O3 SER 3 11.233 9.084 -1.836 1.00 0.00 O1- +ATOM 23 H SER 3 9.168 5.042 -0.389 1.00 0.00 H +ATOM 24 HA SER 3 8.260 4.620 -3.166 1.00 0.00 H +ATOM 25 HB1 SER 3 8.790 7.006 -3.164 1.00 0.00 H +ATOM 26 HB2 SER 3 9.894 6.000 -4.252 1.00 0.00 H +ATOM 27 N ALA 4 9.773 2.677 -2.627 1.00 0.00 N +ATOM 28 CA ALA 4 10.719 1.563 -2.713 1.00 0.00 C +ATOM 29 C ALA 4 10.123 0.422 -3.507 1.00 0.00 C +ATOM 30 O ALA 4 10.604 0.050 -4.582 1.00 0.00 O +ATOM 31 CB ALA 4 11.101 1.150 -1.277 1.00 0.00 C +ATOM 32 O01 ALA 4 8.965 -0.245 -2.997 1.00 0.00 O1- +ATOM 33 H ALA 4 8.784 2.472 -2.691 1.00 0.00 H +ATOM 34 HA ALA 4 11.626 1.879 -3.258 1.00 0.00 H +ATOM 35 1HB ALA 4 11.557 1.994 -0.721 1.00 0.00 H +ATOM 36 2HB ALA 4 10.222 0.818 -0.694 1.00 0.00 H +ATOM 37 3HB ALA 4 11.835 0.329 -1.266 1.00 0.00 H +TER +CONECT 1 2 10 11 12 +CONECT 2 1 3 5 6 +CONECT 3 2 4 13 +CONECT 4 3 +CONECT 5 2 7 8 9 +CONECT 6 2 +CONECT 7 5 +CONECT 8 5 +CONECT 9 5 +CONECT 10 1 +CONECT 11 1 +CONECT 12 1 +CONECT 13 3 14 23 +CONECT 14 13 15 17 24 +CONECT 15 14 16 27 +CONECT 16 15 +CONECT 17 14 18 25 26 +CONECT 18 17 19 +CONECT 19 18 20 21 22 +CONECT 20 19 +CONECT 21 19 +CONECT 22 19 +CONECT 23 13 +CONECT 24 14 +CONECT 25 17 +CONECT 26 17 +CONECT 27 15 28 33 +CONECT 28 27 29 31 34 +CONECT 29 28 30 32 +CONECT 30 29 +CONECT 31 28 35 36 37 +CONECT 32 29 +CONECT 33 27 +CONECT 34 28 +CONECT 35 31 +CONECT 36 31 +CONECT 37 31 +END diff --git a/doc/source/tutorials/7_adding_modifications/files/force_fields/charmm/mods.ff b/doc/source/tutorials/7_adding_modifications/files/force_fields/charmm/mods.ff new file mode 100644 index 000000000..f55e89867 --- /dev/null +++ b/doc/source/tutorials/7_adding_modifications/files/force_fields/charmm/mods.ff @@ -0,0 +1,15 @@ +[ modification ] +SER-phos +[ atoms ] +O1 {"element": "O", "PTM_atom": true} +O2 {"element": "O", "PTM_atom": true} +O3 {"element": "O", "PTM_atom": true} +P {"element": "P", "PTM_atom": true} +OG {"element": "O"} +HG1 {"element": "H", "replace": {"atomname": null}} +[ edges ] +OG P +OG HG1 +P O1 +P O2 +P O3 diff --git a/doc/source/tutorials/7_adding_modifications/files/force_fields/martini3001/modification.ff b/doc/source/tutorials/7_adding_modifications/files/force_fields/martini3001/modification.ff new file mode 100644 index 000000000..bc9fe67f7 --- /dev/null +++ b/doc/source/tutorials/7_adding_modifications/files/force_fields/martini3001/modification.ff @@ -0,0 +1,10 @@ +[ modification ] +SER-PO4 +[ atoms ] +BB {"PTM_atom": false} +SC1 {"PTM_atom": false, "resname": "SER", "replace": {"atype": "Q5n", "charge": -1}} +[ edges ] +BB SC1 +[ bonds ] +BB SC1 1 0.33 5000 + diff --git a/doc/source/tutorials/7_adding_modifications/files/mappings/SEP.mapping b/doc/source/tutorials/7_adding_modifications/files/mappings/SEP.mapping new file mode 100644 index 000000000..2ce608977 --- /dev/null +++ b/doc/source/tutorials/7_adding_modifications/files/mappings/SEP.mapping @@ -0,0 +1,45 @@ +[ modification ] +[ from ] +charmm +[ to ] +martini3001 +[ from blocks ] +SER-phos +[ to blocks ] +SER-PO4 +[ from nodes ] +N +HN +CA +HA +C +O +CB +HB1 +HB2 +[ from edges ] +N HN +N CA +CA HA +CA C +C O +CA CB +CB HB1 +CB HB2 +CB OG +[ mapping ] +N BB +HN BB +CA BB +HA BB 0 +C BB +O BB +CB BB +CB SC1 +HB1 SC1 0 +HB2 SC1 0 +OG SC1 +P SC1 +O1 SC1 +O2 SC1 +O3 SC1 diff --git a/doc/source/tutorials/7_adding_modifications/index.rst b/doc/source/tutorials/7_adding_modifications/index.rst index 108245c78..72fb6c3d9 100644 --- a/doc/source/tutorials/7_adding_modifications/index.rst +++ b/doc/source/tutorials/7_adding_modifications/index.rst @@ -1,2 +1,206 @@ Adding new modifications -======================== \ No newline at end of file +======================== +In :doc:`/tutorials/6_adding_residues_links/index` we added a whole new +:ref:`data:Block` in order to describe a phosphoserine residue. +This had the (dis)advantage that we had to redefine all the default serine +interactions and parameters as well as the inter-residue +:ref:`links `. There must be a better way! + +Fortunately there is. Rather than describing a whole new SEP +:ref:`data:Block` and all that entails we can instead describe +just the way the phosphorylation *modified* the normal SER residue. This is +exactly what :ref:`modifications ` are for. As before, we +need to add the new modification in 3 places: input force field, output force +field, and mapping between the two. Note that the parameters +presented here are for demonstration purposes only and not fit for actual +science or simulations! + +All input files for this tutorial can be found on `github `_. + +The input force field +--------------------- +During :ref:`repair ` the regular SER atoms +will be repaired, the missing hydrogen (HG) will be added (!), and the phosphate +atoms will be annotated as being "nonstandard". During +:ref:`martinize2_workflow:Identify modifications` the :ref:`processors:Processor` +will try to identify these tagged atoms by finding a minimal set of +modifications that describe all relevant atoms. For a modification to apply +here it must be subgraph isomorphic to the input structure. + +If we run ``martinize2 -f ala-sep-ala.pdb -o topol.top -x AJA.pdb`` we get, as +expected, the warning that not all modifications could be identified:: + + WARNING - unknown-input - Could not identify the modifications for residues ['SER3'], involving atoms ['21-O1', '22-O2', '23-O3', '24-P'] + +So let's define the modification in ``force_fields/charmm/modification.ff``:: + + ; THESE PARAMETERS ARE FOR DEMONSTRATION PURPOSES ONLY. DO NOT USE. + [ modification ] + SER-phos + [ atoms ] + O1 {"element": "O", "PTM_atom": true} + O2 {"element": "O", "PTM_atom": true} + O3 {"element": "O", "PTM_atom": true} + P {"element": "P", "PTM_atom": true} + OG {"element": "O"} + HG {"element": "H", "replace": {"atomname": null}} + [ edges ] + OG P + P O1 + P O2 + P O3 + +As before, the input force field does not define any MD parameters or +interactions. This modification contains nodes and edges. The edges are not +very interesting, and just define the connections between nodes. Nodes on the +other hand define 2 things: 1) the atom name as is should be (first column), +and 2) any constraints the node must satisfy during the subgraph isomorphism +as a JSON formatted mapping. The constraints should define at least 2 +properties: the ``element``, and ``PTM_atom``. The element property is +self explanatory, but the ``PTM_atom`` needs more explanation. + +Modifications contain *2* types of nodes: + +#. Nodes that are already described by the parent block (``PTM_atom`` is false, this is the default). We call these nodes "anchors". +#. Nodes that are not yet described by the parent block (``PTM_atom`` is true). + +In addition, it's worth noting that :ref:`repair ` +reconstructed the HG atom (see ``-write-repair``) since it's not in the input +PDB. We use the "replace" property to describe all node attributes that need +to change because of this modification. In this case we indicate that this atom +should be removed again, by setting its atomname to "null". You can use the +"replace" property to change *any* node property, including *e.g.* atom type +and charge. + +The output force field +---------------------- +We have to add a similar modification for the output force field in +``force_fields/martini3001/modification.ff``:: + + [ modification ] + ; THESE PARAMETERS ARE FOR DEMONSTRATION PURPOSES ONLY. DO NOT USE. + SER-PO4 + [ atoms ] + BB {"PTM_atom": false} + SC1 {"PTM_atom": false, "resname": "SER", "replace": {"atype": "Q5n", "charge": -1}} + [ bonds ] + BB SC1 1 0.33 5000 + +Nothing new here compared to the modification for the input force field. Note +that here we *do* define the simulation parameters, and we define a bond. + +The mapping +----------- +Finally, we need to add the mapping describing how to get from charmm to +martini3001 in ``mappings/SEP.mapping``:: + + ; THESE PARAMETERS ARE FOR DEMONSTRATION PURPOSES ONLY. DO NOT USE. + [ modification ] + [ from ] + charmm + [ to ] + martini3001 + [ from blocks ] + SER-phos + [ to blocks ] + SER-PO4 + [ from nodes ] + N + HN + CA + HA + C + O + CB + HB1 + HB2 + [ from edges ] + N HN + N CA + CA HA + CA C + C O + CA CB + CB HB1 + CB HB2 + CB OG + [ mapping ] + N BB + HN BB + CA BB + HA BB 0 + C BB + O BB + CB BB + CB SC1 + HB1 SC1 0 + HB2 SC1 0 + OG SC1 + P SC1 + O1 SC1 + O2 SC1 + O3 SC1 + +Firstly, notice that this is a different file format than the backwards format +we used before. In this case we have to define between which force fields we're +going to define a mapping (``charmm`` and ``martini3001``), and between which +modifications (or blocks) (``SER-phos`` and ``SER-PO4``). This mapping has to +define how to map the phosphate moiety (at least). This moiety will be mapped +to the SC1 bead, so we will need to describe the complete mapping for that bead. +In addition this mapping affects the mapping of the BB bead (since CB will now +also contribute in part to it). + +The charmm modification already define some nodes (see above), but not all the +nodes required to describe the complete mapping for the BB and SC1 nodes, so +these need to be described under `from nodes` and `from edges`. Finally, the +actual mapping section should be self explanatory. + +Now if we run ``martinize2 -f ala-sep-ala.pdb -x AJA.pdb -o topol.top -ff-dir force_fields/ -map-dir mappings/`` +we see ``INFO - general - Applying modification mapping ('SER-phos',)`` + +Now we need to check the produced itp file:: + + ; THESE PARAMETERS ARE FOR DEMONSTRATION PURPOSES ONLY. DO NOT USE. + [ atoms ] + 1 Q5 1 ALA BB 1 1 + 2 TC3 1 ALA SC1 2 0.0 + 3 P2 2 SER BB 3 0.0 + 4 Q5n 2 SER SC1 4 -1 + 5 Q5 3 ALA BB 5 -1 + 6 TC3 3 ALA SC1 6 0.0 + + [ bonds ] + 3 4 1 0.33 5000 + + ; Backbone bonds + 1 3 1 0.350 4000 + 3 5 1 0.350 4000 + + #ifdef FLEXIBLE + ; Side chain bonds + 1 2 1 0.270 1000000 + 5 6 1 0.270 1000000 + #endif + + [ constraints ] + #ifndef FLEXIBLE + ; Side chain bonds + 1 2 1 0.270 + 5 6 1 0.270 + #endif + + [ angles ] + ; BBB angles + 1 3 5 10 127 20 + + ; BBS angles regular martini + 1 3 4 2 100 25 + 3 5 6 2 100 25 + + ; First SBB regular martini + 2 1 3 2 100 25 + +What we see here is that the atom type and bond we specified in the +modification have been applied, and we can also no longer see the BB-SC1 bond +that comes with the normal serine residue (``BB SC1 1 0.287 7500``) is no +longer present. In addition, we find the usual backbone/protein interactions. diff --git a/doc/source/tutorials/index.rst b/doc/source/tutorials/index.rst index 668725d3b..688d590ce 100644 --- a/doc/source/tutorials/index.rst +++ b/doc/source/tutorials/index.rst @@ -1,15 +1,11 @@ Tutorials ========= -Here we will list all tutorials for vermouth and Martinize 2. +You can find some examples on how to use martinize 2 in the martinize-examples +repository: https://github.com/marrink-lab/martinize-examples .. Organize the tutorials in directories, so it's easier to keep their files together .. toctree:: - 1_simple_protein_aa/index - 2_simple_protein_cg/index - 3_membrane_protein/index - 4_branched_polymer/index - 5_glycosylated_protein/index 6_adding_residues_links/index 7_adding_modifications/index diff --git a/vermouth/map_parser.py b/vermouth/map_parser.py index 26c7ed89b..95460372e 100644 --- a/vermouth/map_parser.py +++ b/vermouth/map_parser.py @@ -685,7 +685,7 @@ def _blocks(self, line, lineno=0, direction=None, map_type=None): identifier := string that does not start with '!' res_attrs := json dict An "!" signifies that no block should be fetched based on the - the resname automatically. + resname automatically. Identifier *must* be unique for this mapping/direction. diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py index eaf3023d3..072e439d3 100644 --- a/vermouth/processors/do_mapping.py +++ b/vermouth/processors/do_mapping.py @@ -477,7 +477,7 @@ def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol): assert len(atoms) == len(interaction.atoms) interaction = interaction._replace(atoms=atoms) applied_interactions[interaction_type][tuple(atoms)].append(modification) - graph_out.add_interaction(interaction_type, **interaction._asdict()) + graph_out.add_or_replace_interaction(interaction_type, **interaction._asdict()) # Some jank needed here, since modification node indices are integers mod_atom_name_to_out = {}