Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sampler #17

Merged
merged 33 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
16af54a
init draft for sampler
fgrunewald Jul 9, 2024
77a58ed
make deepcopy when merging
fgrunewald Jul 9, 2024
e937860
update docstrings
fgrunewald Jul 10, 2024
aebf84c
address comments and refactor slightly
fgrunewald Jul 12, 2024
6220711
add seeds
fgrunewald Jul 12, 2024
c5ba1e3
refactor random seed
fgrunewald Jul 23, 2024
6842eb4
address style comments
fgrunewald Jul 23, 2024
f4f148a
Merge branch 'master' into sampler
fgrunewald Jul 23, 2024
6a4951d
adjust open bonds function to optionally take target nodes
fgrunewald Jul 24, 2024
eb62ea9
when annontating atomnames make meta_graph optional; otherwise go off…
fgrunewald Jul 24, 2024
705691b
the fragid for cgsmiles fragments should be 0 in agreement with the c…
fgrunewald Jul 24, 2024
6fec0bc
update function call set_atom_names_atomistic according to new args
fgrunewald Jul 24, 2024
5fbb282
refactor sample
fgrunewald Jul 24, 2024
eff05ef
add more tests
fgrunewald Jul 24, 2024
e345cc7
update handling of terminal addition
fgrunewald Jul 26, 2024
f883d19
finalize tests for init function
fgrunewald Jul 26, 2024
c662435
update sampler
fgrunewald Aug 14, 2024
1228ce7
Merge branch 'master' into sampler
fgrunewald Aug 29, 2024
0eaccc6
expose sampler
fgrunewald Aug 29, 2024
654fb9b
keep proper track of fragid when adding terminals
fgrunewald Aug 29, 2024
6dfdd6f
update sampler and change meaning of bonding operators
fgrunewald Sep 10, 2024
c378034
change meaning of bonding operators and fix in test
fgrunewald Sep 10, 2024
8f50245
change naming in sampler and update doc strings
fgrunewald Sep 10, 2024
efa1a8a
update docstring
fgrunewald Sep 10, 2024
ccd2901
address comments
fgrunewald Sep 10, 2024
716ebe8
update docstring
fgrunewald Sep 10, 2024
d241a96
fix doscstrings
fgrunewald Sep 12, 2024
286a161
fix doscstrings
fgrunewald Sep 12, 2024
4c062bb
fix doscstrings and spelling
fgrunewald Sep 12, 2024
d9e6a32
typo sphinx link
fgrunewald Sep 16, 2024
7d49154
update tests
fgrunewald Sep 18, 2024
4993722
update tests and remove print
fgrunewald Sep 18, 2024
9cb3a45
Update cgsmiles/sample.py
fgrunewald Sep 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 39 additions & 6 deletions cgsmiles/cgsmiles_utils.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,61 @@
from collections import defaultdict
import networkx as nx

def find_complementary_bonding_descriptor(bonding_descriptor):
def find_complementary_bonding_descriptor(bonding_descriptor, ellegible_descriptors=None):
"""
Given a bonding desciptor find the complementary match.
Given a bonding descriptor find the complementary match.
In the case of '$' prefixed descriptors this is just
the same and '>' or '<' get flipped to the other
symbol.

Parameters
----------
bonding_descriptor: str
ellegible_descriptors: list[str]
a list of allowed descriptors to match

Return
------
list[str]
"""
compl = []
if bonding_descriptor[0] == '$' and ellegible_descriptors:
for descriptor in ellegible_descriptors:
if descriptor[0] == '$' and descriptor[-1] == bonding_descriptor[-1]:
compl.append(descriptor)
return compl

if bonding_descriptor[0] == '<':
compl = '>' + bonding_descriptor[1:]
elif bonding_descriptor[0] == '>':
compl = '<' + bonding_descriptor[1:]
else:
compl = bonding_descriptor
return compl

if compl not in ellegible_descriptors:
msg = ("Bonding descriptor {compl} was not found in list of potential"
"matching descriptors.")
raise IOError(msg.format(compl=compl))

return [compl]

def find_open_bonds(molecule, target_nodes=None):
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
"""
Collect all nodes which have an open bonding descriptor and store
them as keys with a list of nodes as values.
Collect all nodes which have an open bonding descriptor
and store them as keys with a list of nodes as values.

Parameters
----------
molecule: nx.Graph
target_nodes: list[abc.hashable]
a list of node keys matching molecule

Return
------
dict
"""
if target_nodes is None:
target_nodes = list(molecule.nodes)
target_nodes = molecule

open_bonds_by_descriptor = defaultdict(list)
open_bonds = nx.get_node_attributes(molecule, 'bonding')
Expand Down
1 change: 0 additions & 1 deletion cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ def compute_mass(input_molecule):
the atomic mass
"""
molecule = input_molecule.copy()
print(molecule.nodes(data=True))
# we need to add the hydrogen atoms
# for computing the mass
rebuild_h_atoms(molecule)
Expand Down
41 changes: 27 additions & 14 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
set_atom_names_atomistic)
from .pysmiles_utils import rebuild_h_atoms

def compatible(left, right):
def compatible(left, right, legacy=False):
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
"""
Check bonding descriptor compatibility according
to the BigSmiles syntax conventions.
Expand All @@ -23,14 +23,23 @@ def compatible(left, right):
-------
bool
"""
if left == right and left[0] not in '> <':
return True
l, r = left[0], right[0]
if (l, r) == ('<', '>') or (l, r) == ('>', '<'):
return left[1:] == right[1:]
return False

def match_bonding_descriptors(source, target, bond_attribute="bonding"):
if legacy:
if left == right and left[0] not in '> <':
return True
l, r = left[0], right[0]
if (l, r) == ('<', '>') or (l, r) == ('>', '<'):
return left[1:] == right[1:]
return False
else:
if left[0] == right[0] == '$' or left[0] == right[0] == '!':
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
return True

l, r = left[0], right[0]
if (l, r) == ('<', '>') or (l, r) == ('>', '<'):
return True
return False

def match_bonding_descriptors(source, target, bond_attribute="bonding", legacy=False):
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
"""
Given a source and a target graph, which have bonding
descriptors stored as node attributes, find a pair of
Expand Down Expand Up @@ -65,7 +74,7 @@ def match_bonding_descriptors(source, target, bond_attribute="bonding"):
bond_targets = target_nodes[target_node]
for bond_source in bond_sources:
for bond_target in bond_targets:
if compatible(bond_source, bond_target):
if compatible(bond_source, bond_target, legacy=legacy):
return ((source_node, target_node), (bond_source, bond_target))
raise LookupError

Expand Down Expand Up @@ -141,7 +150,8 @@ class MoleculeResolver:
def __init__(self,
molecule_graph,
fragment_dicts,
last_all_atom=True):
last_all_atom=True,
legacy=False):
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

"""
Parameters
Expand All @@ -167,6 +177,7 @@ def __init__(self,
self.resolutions = len(self.fragment_dicts)
new_names = nx.get_node_attributes(self.molecule, "fragname")
nx.set_node_attributes(self.meta_graph, new_names, "atomname")
self.legacy = legacy

@staticmethod
def read_fragment_strings(fragment_strings, last_all_atom=True):
Expand Down Expand Up @@ -256,7 +267,8 @@ def edges_from_bonding_descrpt(self, all_atom=False):
node_graph = self.meta_graph.nodes[node]['graph']
try:
edge, bonding = match_bonding_descriptors(prev_graph,
node_graph)
node_graph,
legacy=self.legacy)
except LookupError:
continue
# remove used bonding descriptors
Expand Down Expand Up @@ -361,7 +373,7 @@ def resolve_all(self):
return meta_graph, graph

@classmethod
def from_string(cls, cgsmiles_str, last_all_atom=True):
def from_string(cls, cgsmiles_str, last_all_atom=True, legacy=False):
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
"""
Initiate a MoleculeResolver instance from a cgsmiles string.

Expand All @@ -384,7 +396,8 @@ def from_string(cls, cgsmiles_str, last_all_atom=True):
last_all_atom=last_all_atom)
resolver_obj = cls(molecule_graph=molecule,
fragment_dicts=fragment_dicts,
last_all_atom=last_all_atom)
last_all_atom=last_all_atom,
legacy=legacy)
return resolver_obj

@classmethod
Expand Down
Loading
Loading