diff --git a/polyply/src/big_smile_mol_processor.py b/polyply/src/big_smile_mol_processor.py
new file mode 100644
index 00000000..1801a437
--- /dev/null
+++ b/polyply/src/big_smile_mol_processor.py
@@ -0,0 +1,143 @@
+import re
+import networkx as nx
+import pysmiles
+from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
+                                           force_field_from_fragments)
+from polyply.src.map_to_molecule import MapToMolecule
+
+VALENCES = pysmiles.smiles_helper.VALENCES
+VALENCES.update({"H":(1,)})
+
+def compatible(left, right):
+    """
+    Check bonding descriptor compatibility according
+    to the BigSmiles syntax convetions.
+
+    Parameters
+    ----------
+    left: str
+    right: str
+
+    Returns
+    -------
+    bool
+    """
+    if left == right and left not in '> <':
+        return True
+    l, r = left[0], right[0]
+    if (l, r) == ('<', '>') or (l, r) == ('>', '<'):
+        return left[1:] == right[1:]
+    return False
+
+def generate_edge(source, target, bond_attribute="bonding"):
+    """
+    Given a source and a target graph, which have bonding
+    descriptors stored as node attributes, find a pair of
+    matching descriptors and return the respective nodes.
+    The function also returns the bonding descriptors. If
+    no bonding descriptor is found an instance of LookupError
+    is raised.
+
+    Parameters
+    ----------
+    source: :class:`nx.Graph`
+    target: :class:`nx.Graph`
+    bond_attribute: `abc.hashable`
+        under which attribute are the bonding descriptors
+        stored.
+
+    Returns
+    -------
+    ((abc.hashable, abc.hashable), (str, str))
+        the nodes as well as bonding descriptors
+
+    Raises
+    ------
+    LookupError
+        if no match is found
+    """
+    source_nodes = nx.get_node_attributes(source, bond_attribute)
+    target_nodes = nx.get_node_attributes(target, bond_attribute)
+    for source_node in source_nodes:
+        for target_node in target_nodes:
+            #print(source_node, target_node)
+            bond_sources = source_nodes[source_node]
+            bond_targets = target_nodes[target_node]
+            for bond_source in bond_sources:
+                for bond_target in bond_targets:
+                    #print(bond_source, bond_target)
+                    if compatible(bond_source, bond_target):
+                        return ((source_node, target_node), (bond_source, bond_target))
+    raise LookupError
+
+class DefBigSmileParser:
+    """
+    Parse an a string instance of a defined BigSmile,
+    which describes a polymer molecule.
+    """
+
+    def __init__(self, force_field):
+        self.force_field = force_field
+        self.meta_molecule = None
+        self.molecule = None
+
+    def edges_from_bonding_descrpt(self):
+        """
+        Make edges according to the bonding descriptors stored
+        in the node attributes of meta_molecule residue graph.
+        If a bonding descriptor is consumed it is removed from the list,
+        however, the meta_molecule edge gets an attribute with the
+        bonding descriptors that formed the edge. Later uncomsumed
+        bonding descriptors are replaced by hydrogen atoms.
+        """
+        for prev_node, node in nx.dfs_edges(self.meta_molecule):
+            prev_graph = self.meta_molecule.nodes[prev_node]['graph']
+            node_graph = self.meta_molecule.nodes[node]['graph']
+            edge, bonding = generate_edge(prev_graph,
+                                          node_graph)
+            # this is a bit of a workaround because at this stage the
+            # bonding list is actually shared between all residues of
+            # of the same type; so we first make a copy then we replace
+            # the list sans used bonding descriptor
+            prev_bond_list = prev_graph.nodes[edge[0]]['bonding'].copy()
+            prev_bond_list.remove(bonding[0])
+            prev_graph.nodes[edge[0]]['bonding'] = prev_bond_list
+            node_bond_list = node_graph.nodes[edge[1]]['bonding'].copy()
+            node_bond_list.remove(bonding[1])
+            node_graph.nodes[edge[1]]['bonding'] = node_bond_list
+            self.meta_molecule.molecule.add_edge(edge[0], edge[1], bonding=bonding)
+
+    def replace_unconsumed_bonding_descrpt(self):
+        """
+        We allow multiple bonding descriptors per atom, which
+        however, are not always consumed. In this case the left
+        over bonding descriptors are replaced by hydrogen atoms.
+        """
+        for node in self.meta_molecule.nodes:
+            graph = self.meta_molecule.nodes[node]['graph']
+            bonding = nx.get_node_attributes(graph, "bonding")
+            for node, bondings in bonding.items():
+                element = graph.nodes[node]['element']
+                hcount = VALENCES[element][0] -\
+                         self.meta_molecule.molecule.degree(node) + 1
+                attrs = {attr: graph.nodes[node][attr] for attr in ['resname', 'resid']}
+                attrs['element'] = 'H'
+                for new_id in range(1, hcount):
+                    new_node = len(self.meta_molecule.molecule.nodes) + 1
+                    graph.add_edge(node, new_node)
+                    attrs['atomname'] = "H" + str(new_id + len(graph.nodes))
+                    graph.nodes[new_node].update(attrs)
+                    self.meta_molecule.molecule.add_edge(node, new_node)
+                    self.meta_molecule.molecule.nodes[new_node].update(attrs)
+
+    def parse(self, big_smile_str):
+        res_pattern, residues = re.findall(r"\{[^\}]+\}", big_smile_str)
+        self.meta_molecule = res_pattern_to_meta_mol(res_pattern)
+        self.force_field = force_field_from_fragments(residues)
+        MapToMolecule(self.force_field).run_molecule(self.meta_molecule)
+        self.edges_from_bonding_descrpt()
+        self.replace_unconsumed_bonding_descrpt()
+        return self.meta_molecule
+
+# ToDo
+# - clean copying of bond-list attributes L100
diff --git a/polyply/src/big_smile_parsing.py b/polyply/src/big_smile_parsing.py
new file mode 100644
index 00000000..ec136bea
--- /dev/null
+++ b/polyply/src/big_smile_parsing.py
@@ -0,0 +1,367 @@
+from collections import defaultdict
+import re
+import numpy as np
+try:
+    import pysmiles
+except ImportError as error:
+    msg = ("You are using a functionality that requires "
+           "the pysmiles package. Use pip install pysmiles ")
+    raise ImportError(msg) from error
+import networkx as nx
+from vermouth.forcefield import ForceField
+from vermouth.molecule import Block
+from polyply.src.meta_molecule import MetaMolecule
+
+PATTERNS = {"bond_anchor": "\[\$.*?\]",
+            "place_holder": "\[\#.*?\]",
+            "annotation": "\|.*?\|",
+            "fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)',
+            "seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'}
+
+def _find_next_character(string, chars, start):
+    for idx, token in enumerate(string[start:]):
+        if token in chars:
+            return idx+start
+    return np.inf
+
+def _expand_branch(meta_mol, current, anchor, recipe):
+    prev_node = anchor
+    for bdx, (resname, n_mon) in enumerate(recipe):
+        if bdx == 0:
+            anchor = current
+        for _ in range(0, n_mon):
+            connection = [(prev_node, current)]
+            meta_mol.add_monomer(current,
+                                 resname,
+                                 connection)
+            prev_node = current
+            current += 1
+    prev_node = anchor
+    return meta_mol, current, prev_node
+
+def res_pattern_to_meta_mol(pattern):
+    """
+    Generate a :class:`polyply.MetaMolecule` from a
+    pattern string describing a residue graph with the
+    simplified big-smile syntax.
+
+    The syntax scheme consists of two curly braces
+    enclosing the residue graph sequence. It can contain
+    any enumeration of residues by writing them as if they
+    were smile atoms but the atomname is given by # + resname.
+    This input fomat can handle branching as well ,however,
+    macrocycles are currently not supported.
+
+    General Pattern
+    '{' + [#resname_1][#resname_2]... + '}'
+
+    In addition to plain enumeration any residue may be
+    followed by a '|' and an integer number that
+    specifies how many times the given residue should
+    be added within a sequence. For example, a pentamer
+    of PEO can be written as:
+
+    {[#PEO][#PEO][#PEO][#PEO][#PEO]}
+
+    or
+
+    {[#PEO]|5}
+
+    The block syntax also applies to branches. Here the convention
+    is that the complete branch including it's first anchoring
+    residue is repeated. For example, to generate a PMA-g-PEG
+    polymer containing 15 residues the following syntax is permitted:
+
+    {[#PMA]([#PEO][#PEO])|5}
+
+    Parameters
+    ----------
+    pattern: str
+        a string describing the meta-molecule
+
+    Returns
+    -------
+    :class:`polyply.MetaMolecule`
+    """
+    meta_mol = MetaMolecule()
+    current = 0
+    # stores one or more branch anchors; each next
+    # anchor belongs to a nested branch
+    branch_anchor = []
+    # used for storing composition protocol for
+    # for branches; each entry is a list of
+    # branches from extending from the anchor
+    # point
+    recipes = defaultdict(list)
+    # the previous node
+    prev_node = None
+    # do we have an open branch
+    branching = False
+    # each element in the for loop matches a pattern
+    # '[' + '#' + some alphanumeric name + ']'
+    for match in re.finditer(PATTERNS['place_holder'], pattern):
+        start, stop = match.span()
+        # we start a new branch when the residue is preceded by '('
+        # as in ... ([#PEO] ...
+        if pattern[start-1] == '(':
+            branching = True
+            branch_anchor.append(prev_node)
+            # the recipe for making the branch includes the anchor; which
+            # is hence the first residue in the list
+            recipes[branch_anchor[-1]] = [(meta_mol.nodes[prev_node]['resname'], 1)]
+        # here we check if the atom is followed by a expansion character '|'
+        # as in ... [#PEO]|
+        if stop < len(pattern) and pattern[stop] == '|':
+            # eon => end of next
+            # we find the next character that starts a new residue, ends a branch or
+            # ends the complete pattern
+            eon = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
+            # between the expansion character and the eon character
+            # is any number that correspnds to the number of times (i.e. monomers)
+            # that this atom should be added
+            n_mon = int(pattern[stop+1:eon])
+        else:
+            n_mon = 1
+
+        # the resname starts at the second character and ends
+        # one before the last according to the above pattern
+        resname = match.group(0)[2:-1]
+        # if this residue is part of a branch we store it in
+        # the recipe dict together with the anchor residue
+        # and expansion number
+        if branching:
+            recipes[branch_anchor[-1]].append((resname, n_mon))
+
+        # new we add new residue as often as required
+        connection = []
+        for _ in range(0, n_mon):
+            if prev_node is not None:
+                connection = [(prev_node, current)]
+            meta_mol.add_monomer(current,
+                                 resname,
+                                 connection)
+            prev_node = current
+            current += 1
+
+        # here we check if the residue considered before is the
+        # last residue of a branch (i.e. '...[#residue])'
+        # that is the case if the branch closure comes before
+        # any new atom begins
+        branch_stop = _find_next_character(pattern, ['['], stop) >\
+                      _find_next_character(pattern, [')'], stop)
+
+        # if the branch ends we reset the anchor
+        # and set branching False unless we are in
+        # a nested branch
+        if stop <= len(pattern) and branch_stop:
+            branching = False
+            prev_node = branch_anchor.pop()
+            if branch_anchor:
+                branching = True
+            #========================================
+            #       expansion for branches
+            #========================================
+            # We need to know how often the branch has
+            # to be added so we first identify the branch
+            # terminal character ')' called eon_a.
+            eon_a = _find_next_character(pattern, [')'], stop)
+            # Then we check if the expansion character
+            # is next.
+            if stop+1 < len(pattern) and pattern[eon_a+1] == "|":
+                # If there is one we find the beginning
+                # of the next branch, residue or end of the string
+                # As before all characters inbetween are a number that
+                # is how often the branch is expanded.
+                eon_b = _find_next_character(pattern, ['[', ')', '(', '}'], eon_a+1)
+                # the outermost loop goes over how often a the branch has to be
+                # added to the existing sequence
+                for idx in range(0,int(pattern[eon_a+2:eon_b])-1):
+                    prev_anchor = None
+                    skip = 0
+                    # in principle each branch can contain any number of nested branches
+                    # each branch is itself a recipe that has an anchor atom
+                    for ref_anchor, recipe in list(recipes.items())[len(branch_anchor):]:
+                        # starting from the first nested branch we have to do some
+                        # math to find the anchor atom relative to the first branch
+                        # we also skip the first residue in recipe, which is the
+                        # anchor residue. Only the outermost branch in an expansion
+                        # is expanded including the anchor. This allows easy description
+                        # of graft polymers.
+                        if prev_anchor:
+                            offset = ref_anchor - prev_anchor
+                            prev_node = prev_node + offset
+                            skip = 1
+                        # this function simply adds the residues of the paticular
+                        # branch
+                        meta_mol, current, prev_node = _expand_branch(meta_mol,
+                                                                      current=current,
+                                                                      anchor=prev_node,
+                                                                      recipe=recipe[skip:])
+                        # if this is the first branch we want to set the anchor
+                        # as the base anchor to which we jump back after all nested
+                        # branches have been added
+                        if prev_anchor is None:
+                            base_anchor = prev_node
+                        # store the previous anchor so we can do the math for nested
+                        # branches
+                        prev_anchor = ref_anchor
+                # all branches added; then go back to the base anchor
+                prev_node = base_anchor
+            # if all branches are done we need to reset the lists
+            # when all nested branches are completed
+            if len(branch_anchor) == 0:
+                recipes = defaultdict(list)
+    return meta_mol
+
+def tokenize_big_smile(big_smile):
+    """
+    Processes a BigSmile string by storing the
+    the BigSmile specific bonding descriptors
+    in a dict with reference to the atom they
+    refer to. Furthermore, a cleaned smile
+    string is generated with the BigSmile
+    specific syntax removed.
+
+    Parameters
+    ----------
+    smile: str
+        a BigSmile smiles string
+
+    Returns
+    -------
+    str
+        a canonical smiles string
+    dict
+        a dict mapping bonding descriptors
+        to the nodes within the smiles string
+    """
+    smile_iter = iter(big_smile)
+    bonding_descrpt = defaultdict(list)
+    smile = ""
+    node_count = 0
+    prev_node = 0
+    for token in smile_iter:
+        if token == '[':
+            peek = next(smile_iter)
+            if peek in ['$', '>', '<']:
+                bond_descrp = peek
+                peek = next(smile_iter)
+                while peek != ']':
+                    bond_descrp += peek
+                    peek = next(smile_iter)
+                bonding_descrpt[prev_node].append(bond_descrp)
+            else:
+                smile = smile + token + peek
+                prev_node = node_count
+                node_count += 1
+
+        elif token == '(':
+            anchor = prev_node
+            smile += token
+        elif token == ')':
+            prev_node = anchor
+            smile += token
+        else:
+            if token not in '] H @ . - = # $ : / \\ + - %'\
+                and not token.isdigit():
+                prev_node = node_count
+                node_count += 1
+            smile += token
+    return smile, bonding_descrpt
+
+def _rebuild_h_atoms(mol_graph):
+    # special hack around to fix
+    # pysmiles bug for a single
+    # atom molecule; we assume that the
+    # hcount is just wrong and set it to
+    # the valance number minus bonds minus
+    # bonding connectors
+    if len(mol_graph.nodes) == 1:
+        ele = mol_graph.nodes[0]['element']
+        # for N and P we assume the regular valency
+        hcount = pysmiles.smiles_helper.VALENCES[ele][0]
+        if mol_graph.nodes[0].get('bonding', False):
+            hcount -= 1
+        mol_graph.nodes[0]['hcount'] = hcount
+    else:
+        for node in mol_graph.nodes:
+            if mol_graph.nodes[node].get('bonding', False):
+                # get the degree
+                ele = mol_graph.nodes[0]['element']
+                # hcoung is the valance minus the degree minus
+                # the number of bonding descriptors
+                hcount = pysmiles.smiles_helper.VALENCES[ele][0] -\
+                         mol_graph.degree(node) -\
+                         len(mol_graph.nodes[node]['bonding'])
+
+                mol_graph.nodes[node]['hcount'] = hcount
+
+    pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)
+    return mol_graph
+
+def fragment_iter(fragment_str):
+    """
+    Iterates over fragments defined in a BigSmile string.
+    Fragments are named residues that consist of a single
+    smile string together with the BigSmile specific bonding
+    descriptors. The function returns the resname of a named
+    fragment as well as a plain nx.Graph of the molecule
+    described by the smile. Bonding descriptors are annotated
+    as node attributes with the keyword bonding.
+
+    Parameters
+    ----------
+    fragment_str: str
+        the string describing the fragments
+
+    Yields
+    ------
+    str, nx.Graph
+    """
+    for fragment in fragment_str[1:-1].split(','):
+        delim = fragment.find('=', 0)
+        resname = fragment[1:delim]
+        big_smile = fragment[delim+1:]
+        smile, bonding_descrpt = tokenize_big_smile(big_smile)
+
+        if smile == "H":
+            mol_graph = nx.Graph()
+            mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])
+            nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
+        else:
+            mol_graph = pysmiles.read_smiles(smile)
+            nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
+            # we need to rebuild hydrogen atoms now
+            _rebuild_h_atoms(mol_graph)
+
+        atomnames = {node[0]: node[1]['element']+str(node[0]) for node in mol_graph.nodes(data=True)}
+        nx.set_node_attributes(mol_graph, atomnames, 'atomname')
+        nx.set_node_attributes(mol_graph, resname, 'resname')
+        yield resname, mol_graph
+
+def force_field_from_fragments(fragment_str):
+    """
+    Collects the fragments defined in a BigSmile string
+    as :class:`vermouth.molecule.Blocks` in a force-field
+    object. Bonding descriptors are annotated as node
+    attribtues.
+
+    Parameters
+    ----------
+    fragment_str: str
+        string using BigSmile fragment syntax
+
+    Returns
+    -------
+    :class:`vermouth.forcefield.ForceField`
+    """
+    force_field = ForceField("big_smile_ff")
+    frag_iter = fragment_iter(fragment_str)
+    for resname, mol_graph in frag_iter:
+        mol_block = Block(mol_graph)
+        force_field.blocks[resname] = mol_block
+    return force_field
+
+# ToDos
+# - remove special case hydrogen line 327ff
+# - check rebuild_h and clean up
diff --git a/polyply/tests/test_big_smile_mol_proc.py b/polyply/tests/test_big_smile_mol_proc.py
new file mode 100644
index 00000000..b6fe8e03
--- /dev/null
+++ b/polyply/tests/test_big_smile_mol_proc.py
@@ -0,0 +1,106 @@
+import pytest
+import networkx as nx
+from vermouth.forcefield import ForceField
+from polyply.src.big_smile_mol_processor import (DefBigSmileParser,
+                                                 generate_edge)
+#import matplotlib.pyplot as plt
+@pytest.mark.parametrize('bonds_source, bonds_target, edge, btypes',(
+                        # single bond source each
+                        ({0: ["$"]},
+                         {3: ["$"]},
+                         (0, 3),
+                         ('$', '$')),
+                        # include a None
+                        ({0: ["$"], 1: []},
+                         {3: ["$"]},
+                         (0, 3),
+                         ('$', '$')),
+                        # multiple sources one match
+                        ({0: ['$1'], 2: ['$2']},
+                         {1: ['$2'], 3: ['$']},
+                         (2, 1),
+                         ('$2', '$2')),
+                        # left right selective bonding
+                        ({0: ['$'], 1: ['>'], 3: ['<']},
+                         {0: ['>'], 1: ['$5']},
+                         (3, 0),
+                         ('<', '>')),
+                        # left right selective bonding
+                        # with identifier
+                        ({0: ['$'], 1: ['>'], 3: ['<1']},
+                         {0: ['>'], 1: ['$5'], 2: ['>1']},
+                         (3, 2),
+                         ('<1', '>1')),
+
+))
+def test_generate_edge(bonds_source, bonds_target, edge, btypes):
+    source = nx.path_graph(5)
+    target = nx.path_graph(4)
+    nx.set_node_attributes(source, bonds_source, "bonding")
+    nx.set_node_attributes(target, bonds_target, "bonding")
+    new_edge, new_btypes = generate_edge(source, target, bond_attribute="bonding")
+    assert new_edge == edge
+    assert new_btypes == btypes
+
+
+@pytest.mark.parametrize('smile, ref_nodes, ref_edges',(
+                        # smiple linear seqeunce
+                        ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[$]COC[$],#OHter=[$][O]}",
+                        #           0 1             2 3 4 5 6 7 8
+                        [('OHter', 'O H'), ('PEO', 'C O C H H H H'),
+                        #        9 10 11 12 13 14 15         16 17
+                         ('PEO', 'C O C H H H H'), ('OHter', 'O H')],
+                        [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
+                         (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
+                         (11, 14), (11, 15), (11, 16), (16, 17)]),
+                        # smiple linear seqeunce with ionic bond
+                        ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[$]COC[$],#OHter=[$][O].[Na+]}",
+                        #           0 1             2 3 4 5 6 7 8
+                        [('OHter', 'O Na'), ('PEO', 'C O C H H H H'),
+                        #        9 10 11 12 13 14 15         16 17
+                         ('PEO', 'C O C H H H H'), ('OHter', 'O Na')],
+                        [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
+                         (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
+                         (11, 14), (11, 15), (11, 16), (16, 17)]),
+
+                        # uncomsumed bonding IDs; note that this is not the same
+                        # molecule as previous test case. Here one of the OH branches
+                        # and replaces an CH2 group with CH-OH
+                        ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[>][$1]COC[<],#OHter=[$1][O]}",
+                        [('OHter', 'O H'), ('PEO', 'C O C H H H H'),
+                         ('PEO', 'C O C H H H H'), ('OHter', 'O H')],
+                        [(0, 1), (0, 2), (2, 3), (2, 5), (2, 10), (3, 4),
+                         (4, 6), (4, 7), (4, 17), (8, 9), (8, 11), (8, 14),
+                         (8, 18), (9, 10), (10, 12), (10, 13), (14, 15)]),
+                        # simple branched sequence
+                        ("{[#Hter][#PE]([#PEO][#Hter])[#PE]([#PEO][#Hter])[#Hter]}.{#Hter=[$]H,#PE=[$]CC[$][$],#PEO=[$]COC[$]}",
+                        [('Hter', 'H'), ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'),
+                         ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'), ('Hter', 'H')],
+                        [(0, 1), (1, 2), (1, 3), (1, 4), (2, 5), (2, 6), (2, 14), (6, 7), (6, 9), (6, 10), (7, 8),
+                         (8, 11), (8, 12), (8, 13), (14, 15), (14, 16), (14, 17), (15, 18), (15, 19), (15, 27),
+                         (19, 20), (19, 22), (19, 23), (20, 21), (21, 24), (21, 25), (21, 26)]),
+                        # something with a ring
+                        #            012 34567
+                        #            890123456
+                        ("{[#Hter][#PS]|2[#Hter]}.{#PS=[$]CC[$]c1ccccc1,#Hter=[$]H}",
+                        [('Hter', 'H'), ('PS', 'C C C C C C C C H H H H H H H H'),
+                         ('PS', 'C C C C C C C C H H H H H H H H'), ('Hter', 'H')],
+                        [(0, 1), (1, 2), (1, 9), (1, 10), (2, 3), (2, 11), (2, 17),
+                         (3, 4), (3, 8), (4, 5), (4, 12), (5, 6), (5, 13), (6, 7),
+                         (6, 14), (7, 8), (7, 15), (8, 16), (17, 18), (17, 25),
+                         (17, 26), (18, 19), (18, 27), (18, 33), (19, 20), (19, 24),
+                         (20, 21), (20, 28), (21, 22), (21, 29), (22, 23), (22, 30),
+                         (23, 24), (23, 31), (24, 32)]),
+
+))
+def test_def_big_smile_parser(smile, ref_nodes, ref_edges):
+    ff = ForceField("new")
+    meta_mol = DefBigSmileParser(ff).parse(smile)
+#    nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element'))
+#    plt.show()
+    for node, ref in zip(meta_mol.nodes, ref_nodes):
+        assert meta_mol.nodes[node]['resname'] ==  ref[0]
+        block_graph = meta_mol.nodes[node]['graph']
+        elements = list(nx.get_node_attributes(block_graph, 'element').values())
+        assert elements == ref[1].split()
+    assert sorted(meta_mol.molecule.edges) == sorted(ref_edges)
diff --git a/polyply/tests/test_big_smile_parsing.py b/polyply/tests/test_big_smile_parsing.py
new file mode 100644
index 00000000..5c1491b8
--- /dev/null
+++ b/polyply/tests/test_big_smile_parsing.py
@@ -0,0 +1,230 @@
+import pytest
+import networkx as nx
+from polyply.src.big_smile_parsing import (res_pattern_to_meta_mol,
+                                           tokenize_big_smile,
+                                           fragment_iter)
+
+@pytest.mark.parametrize('smile, nodes, edges',(
+                        # smiple linear seqeunce
+                        ("{[#PMA][#PEO][#PMA]}",
+                        ["PMA", "PEO", "PMA"],
+                        [(0, 1), (1, 2)]),
+                        # simple branched sequence
+                        ("{[#PMA][#PMA]([#PEO][#PEO])[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PEO", "PMA"],
+                        [(0, 1), (1, 2), (2, 3), (1, 4)]),
+                        # simple sequence two branches
+                        ("{[#PMA][#PMA][#PMA]([#PEO][#PEO])([#CH3])[#PMA]}",
+                        ["PMA", "PMA", "PMA", "PEO", "PEO", "CH3", "PMA"],
+                        [(0, 1), (1, 2), (2, 3), (3, 4), (2, 5), (2, 6)]),
+                        # simple linear sequence with expansion
+                        ("{[#PMA]|3}",
+                        ["PMA", "PMA", "PMA"],
+                        [(0, 1), (1, 2)]),
+                        # simple branch expension
+                        ("{[#PMA]([#PEO][#PEO][#OHter])|3}",
+                        ["PMA", "PEO", "PEO", "OHter",
+                         "PMA", "PEO", "PEO", "OHter",
+                         "PMA", "PEO", "PEO", "OHter"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (0, 4), (4, 5), (5, 6), (6, 7),
+                         (4, 8), (8, 9), (9, 10), (10, 11)]
+                         ),
+                        # nested branched with expansion
+                        ("{[#PMA]([#PEO]|3)|2}",
+                        ["PMA", "PEO", "PEO", "PEO",
+                         "PMA", "PEO", "PEO", "PEO"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (0, 4), (4, 5), (5, 6), (6, 7)]
+                         ),
+                        # nested braching
+                        #     0     1      2    3      4      5    6
+                        ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PEO", "OH",
+                         "PEO", "PMA"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (3, 4), (3, 5), (1, 6)]
+                         ),
+                        # nested braching plus expansion
+                        #     0     1      2    3      4/5      6     7
+                        ("{[#PMA][#PMA]([#PEO][#PEO]([#OH]|2)[#PEO])[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PEO", "OH", "OH",
+                         "PEO", "PMA"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (3, 4), (4, 5), (3, 6), (1, 7)]
+                         ),
+                        # nested braching plus expansion incl. branch
+                        #     0     1      2    3      4      5
+                        #           6      7    8      9      10      11
+                        ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])|2[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PEO", "OH", "PEO",
+                         "PMA", "PEO", "PEO", "PEO", "OH", "PMA"],
+                        [(0, 1), (1, 2), (2, 3),
+                         (3, 4), (3, 5), (1, 6), (6, 7), (7, 8),
+                         (8, 9), (8, 10), (6, 11)]
+                         ),
+                        # nested braching plus expansion of nested branch
+                        # here the nested branch is expended
+                        #  0 - 1 - 10
+                        #      |
+                        #      2
+                        #      |
+                        #      3 {- 5 - 7 } - 9 -> the expanded fragment
+                        #      |    |   |
+                        #      4    6   8
+                        ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]}",
+                        ["PMA", "PMA", "PEO", "PQ", "OH",
+                         "PQ", "OH", "PQ", "OH", "PEO", "PMA"],
+                        [(0, 1), (1, 2), (1, 10),
+                         (2, 3), (3, 4), (3, 5), (5, 6),
+                         (5, 7), (7, 8), (7, 9)]
+                         ),
+                        # nested braching plus expansion of nested branch
+                        # here the nested branch is expended and a complete
+                        # new branch is added
+                        #          11   13
+                        #           |    |
+                        #  0 - 1 - 10 - 12
+                        #      |
+                        #      2
+                        #      |
+                        #      3 {- 5 - 7 } - 9 -> the expanded fragment
+                        #      |    |   |
+                        #      4    6   8
+                        ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]([#CH3])|2}",
+                        ["PMA", "PMA", "PEO", "PQ", "OH",
+                         "PQ", "OH", "PQ", "OH", "PEO", "PMA", "CH3", "PMA", "CH3"],
+                        [(0, 1), (1, 2), (1, 10),
+                         (2, 3), (3, 4), (3, 5), (5, 6),
+                         (5, 7), (7, 8), (7, 9), (10, 11), (10, 12), (12, 13)]
+                         ),
+))
+def test_res_pattern_to_meta_mol(smile, nodes, edges):
+    """
+    Test that the meta-molecule is correctly reproduced
+    from the simplified smile string syntax.
+    """
+    meta_mol = res_pattern_to_meta_mol(smile)
+    assert len(meta_mol.edges) == len(edges)
+    for edge in edges:
+        assert meta_mol.has_edge(*edge)
+    resnames = nx.get_node_attributes(meta_mol, 'resname')
+    assert nodes == list(resnames.values())
+
+@pytest.mark.parametrize('big_smile, smile, bonding',(
+                        # smiple symmetric bonding
+                        ("[$]COC[$]",
+                         "COC",
+                        {0: ["$"], 2: ["$"]}),
+                        # simple symmetric but with explicit hydrogen
+                        ("[$][CH2]O[CH2][$]",
+                         "[CH2]O[CH2]",
+                        {0: ["$"], 2: ["$"]}),
+                        # smiple symmetric bonding; multiple descript
+                        ("[$]COC[$][$1]",
+                         "COC",
+                        {0: ["$"], 2: ["$", "$1"]}),
+                        # named different bonding descriptors
+                        ("[$1]CCCC[$2]",
+                         "CCCC",
+                        {0: ["$1"], 3: ["$2"]}),
+                        # ring and bonding descriptors
+                        ("[$1]CC[$2]C1CCCCC1",
+                         "CCC1CCCCC1",
+                        {0: ["$1"], 1: ["$2"]}),
+                        # bonding descript. after branch
+                        ("C(COC[$1])[$2]CCC[$3]",
+                         "C(COC)CCC",
+                        {0: ["$2"], 3: ["$1"], 6: ["$3"]}),
+                        # left rigth bonding desciptors
+                        ("[>]COC[<]",
+                        "COC",
+                        {0: [">"], 2: ["<"]})
+))
+def test_tokenize_big_smile(big_smile, smile, bonding):
+    new_smile, new_bonding = tokenize_big_smile(big_smile)
+    assert new_smile == smile
+    assert new_bonding == bonding
+
+@pytest.mark.parametrize('fragment_str, nodes, edges',(
+                        # single fragment
+                        ("{#PEO=[$]COC[$]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 (6, {"atomname": "H6", "resname": "PEO", "element": "H"}),
+                                )},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}),
+                        # single fragment but with explicit hydrogen in smiles
+                        ("{#PEO=[$][CH2]O[CH2][$]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 (6, {"atomname": "H6", "resname": "PEO", "element": "H"}),
+                                )},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)]}),
+                        # test NH3 terminal
+                        ("{#AMM=N[$]}",
+                        {"AMM": ((0, {"atomname": "N0", "resname": "AMM", "bonding": ["$"], "element": "N"}),
+                                 (1, {"atomname": "H1", "resname": "AMM", "element": "H"}),
+                                 (2, {"atomname": "H2", "resname": "AMM", "element": "H"}),
+                                )},
+                        {"AMM": [(0, 1), (0, 2)]}),
+                        # single fragment + 1 terminal (i.e. only 1 bonding descrpt
+                        ("{#PEO=[$]COC[$],#OHter=[$][OH]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 (6, {"atomname": "H6", "resname": "PEO", "element": "H"}),
+                                 ),
+                         "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": ["$"], "element": "O"}),
+                                   (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5), (2, 6)],
+                         "OHter": [(0, 1)]}),
+                        # single fragment + 1 terminal but multiple bond descritp.
+                        # this adjust the hydrogen count
+                        ("{#PEO=[$]COC[$][$1],#OHter=[$][OH]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$", "$1"], "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 ),
+                         "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": ["$"], "element": "O"}),
+                                   (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5)],
+                         "OHter": [(0, 1)]}),
+                        # single fragment + 1 terminal but multiple bond descritp.
+                        # but explicit hydrogen in the smiles string
+                        ("{#PEO=[$][CH2]O[CH2][$][$1],#OHter=[$][OH]}",
+                        {"PEO": ((0, {"atomname": "C0", "resname": "PEO", "bonding": ["$"], "element": "C"}),
+                                 (1, {"atomname": "O1", "resname": "PEO", "element": "O"}),
+                                 (2, {"atomname": "C2", "resname": "PEO", "bonding": ["$", "$1"], "element": "C"}),
+                                 (3, {"atomname": "H3", "resname": "PEO", "element": "H"}),
+                                 (4, {"atomname": "H4", "resname": "PEO", "element": "H"}),
+                                 (5, {"atomname": "H5", "resname": "PEO", "element": "H"}),
+                                 ),
+                         "OHter": ((0, {"atomname": "O0", "resname": "OHter", "bonding": ["$"], "element": "O"}),
+                                   (1, {"atomname": "H1", "resname": "OHter", "element": "H"}))},
+                        {"PEO": [(0, 1), (1, 2), (0, 3), (0, 4), (2, 5)],
+                         "OHter": [(0, 1)]}),
+
+))
+def test_fragment_iter(fragment_str, nodes, edges):
+    for resname, mol_graph in fragment_iter(fragment_str):
+        assert len(mol_graph.nodes) == len(nodes[resname])
+        for node, ref_node in zip(mol_graph.nodes(data=True), nodes[resname]):
+           assert node[0] == ref_node[0]
+           for key in ref_node[1]:
+                assert ref_node[1][key] == node[1][key]
+        assert sorted(mol_graph.edges) == sorted(edges[resname])
diff --git a/requirements-tests.txt b/requirements-tests.txt
index 595a4902..03357910 100644
--- a/requirements-tests.txt
+++ b/requirements-tests.txt
@@ -4,3 +4,4 @@ pytest-cov
 pylint
 codecov
 tqdm
+pysmiles