Skip to content

Commit

Permalink
start integrating anisous everywhere
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesmkrieger committed Nov 3, 2023
1 parent 7dba36d commit 68479a0
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 51 deletions.
107 changes: 104 additions & 3 deletions prody/atomic/atomgroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

from prody import LOGGER, PY2K
from prody.kdtree import KDTree
from prody.utilities import checkCoords, rangeString, getDistance, copy
from prody.utilities import (checkCoords, checkAnisous,
rangeString, getDistance, copy)

from .atomic import Atomic
from .fields import ATOMIC_FIELDS, READONLY
Expand Down Expand Up @@ -130,7 +131,7 @@ class AtomGroup(Atomic):
'_donors', '_acceptors', '_nbexclusions', '_crossterms',
'_cslabels', '_acsi', '_n_csets', '_data',
'_fragments', '_flags', '_flagsts', '_subsets',
'_msa', '_sequenceMap']
'_msa', '_sequenceMap', '_anisous']

def __init__(self, title='Unnamed'):

Expand Down Expand Up @@ -170,6 +171,7 @@ def __init__(self, title='Unnamed'):
self._subsets = None
self._msa = None
self._sequenceMap = None
self._anisous = None

def __repr__(self):

Expand Down Expand Up @@ -238,12 +240,15 @@ def __add__(self, other):
if self._n_csets:
if self._n_csets == other._n_csets:
new.setCoords(np.concatenate((self._coords, other._coords), 1))
new.setAnisous(np.concatenate((self._anisous, other._anisous), 1))
if self._n_csets > 1:
LOGGER.info('All {0} coordinate sets are copied to '
'{1}.'.format(self._n_csets, new.getTitle()))
else:
new.setCoords(np.concatenate((self._getCoords(),
other._getCoords())))
new.setAnisous(np.concatenate((self._getCoords(),
other._getCoords())))
LOGGER.info('Active coordinate sets are copied to {0}.'
.format(new.getTitle()))
elif other._n_csets:
Expand Down Expand Up @@ -565,7 +570,101 @@ def _setCoords(self, coords, label='', overwrite=False):
self._setTimeStamp(acsi)
self._cslabels[acsi] = str(label)

def addCoordset(self, coords, label=None):
def getAnisous(self):
"""Returns a copy of anisous from active coordinate set."""

if self._anisous is not None:
return self._anisous[self._acsi].copy()

def _getAnisous(self):
"""Returns a view of anisous from active coordinate set."""

if self._anisous is not None:
return self._anisous[self._acsi]

def setAnisous(self, anisous, label=''):
"""Set anisous of atoms. *anisous* may be any array like object
or an object instance with :meth:`getAnisous` method. If the shape of
anisou array is ``(n_csets > 1, n_atoms, 3)``, it will replace all
coordinate sets and the active coordinate set index will reset to
zero. This situation can be avoided using :meth:`addCoordset`.
If shape of *coords* is ``(n_atoms, 3)`` or ``(1, n_atoms, 3)``, it
will replace the active coordinate set. *label* argument may be used
to label coordinate set(s). *label* may be a string or a list of
strings length equal to the number of coordinate sets."""

atoms = anisous
try:
if self._anisous is None and hasattr(atoms, '_getAnisous'):
anisous = atoms._getAnisous()
else:
anisous = atoms.getAnisous()
except AttributeError:
if self._anisous is None:
anisous = np.array(anisous)
else:
if anisous is None:
raise ValueError('anisous of {0} are not set'
.format(str(atoms)))

try:
checkAnisous(anisous, csets=True, dtype=(float, np.float32))
except TypeError:
raise TypeError('coords must be a numpy array or an '
'object with `getCoords` method')

self._setAnisous(anisous, label=label)

def _setAnisous(self, anisous, label='', overwrite=False):
"""Set anisous without data type checking. *coords* must
be a :class:`~numpy.ndarray`, but may have data type other than
:class:`~numpy.float64`, e.g. :class:`~numpy.float32`. *label*
argument may be used to label coordinate sets. *label* may be
a string or a list of strings length equal to the number of
coordinate sets."""

n_atoms = self._n_atoms
if n_atoms:
if anisous.shape[-2] != n_atoms:
raise ValueError('anisous array has incorrect number of atoms')
else:
self._n_atoms = n_atoms = anisous.shape[-2]

ndim = anisous.ndim
shape = anisous.shape
if self._anisous is None or overwrite or (ndim == 6 and shape[0] > 1):
if ndim == 2:
self._anisous = anisous.reshape((1, n_atoms, 6))
self._cslabels = [str(label)]
self._n_csets = n_csets = 1

else:
self._anisous = anisous
self._n_csets = n_csets = shape[0]

if isinstance(label, list):
if len(label) == n_csets:
self._cslabels = list(label)

else:
self._cslabels = [''] * n_csets
LOGGER.warn('Number of labels does not match number '
'of coordinate sets.')
else:
self._cslabels = [str(label)] * n_csets
self._acsi = 0
self._setTimeStamp()

else:
acsi = self._acsi
if ndim == 2:
self._anisous[acsi] = anisous
else:
self._anisous[acsi] = anisous[0]
self._setTimeStamp(acsi)
self._cslabels[acsi] = str(label)

def addCoordset(self, coords, label=None, anisous=None):
"""Add a coordinate set. *coords* argument may be an object with
:meth:`getCoordsets` method."""

Expand Down Expand Up @@ -596,6 +695,8 @@ def addCoordset(self, coords, label=None):

diff = coords.shape[0]
self._coords = np.concatenate((self._coords, coords), axis=0)
if anisous is not None and self._anisous is not None:
self._anisous = np.concatenate((self._anisous, anisous), axis=0)
self._n_csets = self._coords.shape[0]
timestamps = self._timestamps
self._timestamps = np.zeros(self._n_csets)
Expand Down
104 changes: 57 additions & 47 deletions prody/proteins/pdbfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,9 +840,12 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
if END:
coordinates.resize((acount, 3), refcheck=False)
if addcoords:
atomgroup.addCoordset(coordinates)
atomgroup.addCoordset(coordinates, anisous=anisou)
else:
atomgroup._setCoords(coordinates)
if isPDB and anisou is not None:
anisou.resize((acount, 6), refcheck=False)
atomgroup._setAnisous(anisou / 10000)
else:
coordsets = np.zeros((int(diff//acount+1), acount, 3))
coordsets[0] = coordinates[:acount]
Expand Down Expand Up @@ -880,9 +883,6 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
atomgroup.setElements(np.char.strip(elements))
from prody.utilities.misctools import getMasses
atomgroup.setMasses(getMasses(np.char.strip(elements)))
if anisou is not None:
anisou.resize((acount, 6), refcheck=False)
atomgroup.setAnisous(anisou / 10000)
if siguij is not None:
siguij.resize((acount, 6), refcheck=False)
atomgroup.setAnistds(siguij / 10000)
Expand All @@ -908,11 +908,11 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
anisou = np.zeros((alength, 6),
dtype=ATOMIC_FIELDS['anisou'].dtype)

# alt = line[16]
# if alt not in which_altlocs and which_altlocs != 'all':
# altloc[alt].append((line, i))
# i += 1
# continue
alt = line[16]
if alt not in which_altlocs and which_altlocs != 'all':
altloc[alt].append((line, i))
i += 1
continue
try:
index = acount - 1
anisou[index, 0] = line[28:35]
Expand Down Expand Up @@ -1026,55 +1026,65 @@ def _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames):
indices = {}
for key in altloc_keys:
xyz = atomgroup.getCoords()
anisou = atomgroup.getAnisous()
success = 0
lines = altloc[key]
for line, i in lines:
aan = line[12:16].strip()
arn = line[17:21].strip()
ach = line[21]
ari = int(line[22:26].split()[0])
rn, ids, ans = indices.get((ach, ari), (None, None, None))
if ids is None:
ids = indices.get(ach, None)
if line.startswith('ATOM '):
aan = line[12:16].strip()
arn = line[17:21].strip()
ach = line[21]
ari = int(line[22:26].split()[0])
rn, ids, ans = indices.get((ach, ari), (None, None, None))
if ids is None:
ids = (chainids == ach).nonzero()[0]
indices[ach] = ids
ids = ids[resnums[ids] == ari]
if len(ids) == 0:
ids = indices.get(ach, None)
if ids is None:
ids = (chainids == ach).nonzero()[0]
indices[ach] = ids
ids = ids[resnums[ids] == ari]
if len(ids) == 0:
LOGGER.warn("failed to parse altloc {0} at line {1}, "
"residue not present for altloc 'A'".format(
repr(key), i+1))
continue
rn = resnames[ids[0]]
ans = atomnames[ids]
indices[(ach, ari)] = (rn, ids, ans)
if rn != arn:
LOGGER.warn("failed to parse altloc {0} at line {1}, "
"residue not present for altloc 'A'".format(
repr(key), i+1))
"residue name mismatch (expected {2}, "
"parsed {3})".format(repr(key), i+1, repr(rn),
repr(arn)))
continue
rn = resnames[ids[0]]
ans = atomnames[ids]
indices[(ach, ari)] = (rn, ids, ans)
if rn != arn:
LOGGER.warn("failed to parse altloc {0} at line {1}, "
"residue name mismatch (expected {2}, "
"parsed {3})".format(repr(key), i+1, repr(rn),
repr(arn)))
continue
index = ids[(ans == aan).nonzero()[0]]
if len(index) != 1:
LOGGER.warn("failed to parse altloc {0} at line {1}, atom"
" {2} not found in the residue"
.format(repr(key), i+1, repr(aan)))
continue
try:
xyz[index[0], 0] = float(line[30:38])
xyz[index[0], 1] = float(line[38:46])
xyz[index[0], 2] = float(line[46:54])
except:
LOGGER.warn('failed to parse altloc {0} at line {1}, could'
' not read coordinates'.format(repr(key), i+1))
continue
success += 1
index = ids[(ans == aan).nonzero()[0]]
if len(index) != 1:
LOGGER.warn("failed to parse altloc {0} at line {1}, atom"
" {2} not found in the residue"
.format(repr(key), i+1, repr(aan)))
continue
try:
xyz[index[0], 0] = float(line[30:38])
xyz[index[0], 1] = float(line[38:46])
xyz[index[0], 2] = float(line[46:54])
except:
LOGGER.warn('failed to parse altloc {0} at line {1}, could'
' not read coordinates'.format(repr(key), i+1))
continue
success += 1
elif line.startswith('ANISOU'):
try:
anisou[index[0], 0] = float(line[30:38])
anisou[index[0], 1] = float(line[38:46])
anisou[index[0], 2] = float(line[46:54])
except:
LOGGER.warn('failed to parse altloc {0} at line {1}, could'
' not read coordinates'.format(repr(key), i+1))
LOGGER.info('{0} out of {1} altloc {2} lines were parsed.'
.format(success, len(lines), repr(key)))
if success > 0:
LOGGER.info('Altloc {0} is appended as a coordinate set to '
'atomgroup {1}.'.format(repr(key), atomgroup.getTitle()))
atomgroup.addCoordset(xyz, label='altloc ' + key)
atomgroup.addCoordset(xyz, label='altloc ' + key, anisous=anisou)


#HELIXLINE = ('HELIX %3d %3s %-3s %1s %4d%1s %-3s %1s %4d%1s%2d'
Expand Down
56 changes: 55 additions & 1 deletion prody/utilities/checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from numpy import any, float32, tile

__all__ = ['checkCoords', 'checkWeights', 'checkTypes']
__all__ = ['checkCoords', 'checkWeights', 'checkTypes', 'checkAnisous']

COORDS_NDIM = set([2])
CSETS_NDIMS = set([2, 3])
Expand Down Expand Up @@ -135,3 +135,57 @@ def incr(n, i):
.format(repr(arg), tstr, repr(type(val).__name__)))

return True

A_CSETS_NDIMS = set([2, 6])

def checkAnisous(anisous, csets=False, natoms=None, dtype=(float, float32),
name='anisous'):
"""Returns **True** if shape, dimensionality, and data type of *anisous*
array are as expected.
:arg anisous: anisous array
:arg csets: whether multiple coordinate sets (i.e. ``.ndim in (2, 3)``) are
allowed, default is **False**
:arg natoms: number of atoms, if **None** number of atoms is not checked
:arg dtype: allowed data type(s), default is ``(float, numpy.float32)``,
if **None** data type is not checked
:arg name: name of the coordinate argument
:raises: :exc:`TypeError` when *anisous* is not an instance of
:class:`numpy.ndarray`
:raises: :exc:`ValueError` when wrong shape, dimensionality, or data type
is encountered"""

try:
ndim, shape = anisous.ndim, anisous.shape
except AttributeError:
raise TypeError('anisous must be a numpy.ndarray instance')

ndims = A_CSETS_NDIMS if csets else COORDS_NDIM
if ndim not in ndims:
raise ValueError(str(name) + '.ndim must be ' +
' or '.join([str(d) for d in ndims]))

elif shape[-1] != 6:
raise ValueError(str(name) + '.shape[-1] must be 6')

elif natoms and shape[-2] != natoms:
raise ValueError(str(name) + '.shape[-2] must match number of atoms')

elif dtype:
if isinstance(dtype, type) and anisous.dtype != dtype:
raise ValueError(str(name) + '.dtype must be ' + dtype.__name__)
elif anisous.dtype not in dtype:
if len(dtype) > 1:
msg = ', '.join([repr(dt.__name__) for dt in dtype[:-1]]
) + ', or ' + repr(dtype[-1].__name__)
else:
msg = dtype[0].__name__
raise ValueError(str(name) + '.dtype must be ' + msg)

return True

0 comments on commit 68479a0

Please sign in to comment.