From 8446cd3f3fe2dc43398bac506061e2500247474a Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 18 Oct 2023 16:27:24 +0100 Subject: [PATCH] fix dali filtering --- prody/database/dali.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/prody/database/dali.py b/prody/database/dali.py index 522887665..5a4925e0e 100644 --- a/prody/database/dali.py +++ b/prody/database/dali.py @@ -402,9 +402,9 @@ def filter(self, cutoff_len=None, cutoff_rmsd=None, cutoff_Z=None, cutoff_identi cutoff_len = 0 elif not isinstance(cutoff_len, (float, int)): raise TypeError('cutoff_len must be a float or an integer') - elif cutoff_len <= 1 and cutoff_len > 0: + elif cutoff_len <= 1 and cutoff_len >= 0: cutoff_len = int(cutoff_len*self._max_index) - elif cutoff_len <= self._max_index and cutoff_len > 0: + elif cutoff_len <= self._max_index and cutoff_len > 1: cutoff_len = int(cutoff_len) else: raise ValueError('cutoff_len must be a float between 0 and 1, or an int not greater than the max length') @@ -455,13 +455,13 @@ def filter(self, cutoff_len=None, cutoff_rmsd=None, cutoff_Z=None, cutoff_identi pdbListAll = self._pdbListAll missing_ind_dict = dict() ref_indices_set = set(range(self._max_index)) + query = self._pdbId+self._chain filterListLen = [] filterListRMSD = [] filterListZ = [] filterListIdentity = [] - # keep the first PDB (query PDB) - for pdb_chain in pdbListAll[1:]: + for pdb_chain in pdbListAll: temp_dict = daliInfo[pdb_chain] # filter: len_align, identity, rmsd, Z if temp_dict['len_align'] < cutoff_len: @@ -503,7 +503,10 @@ def filter(self, cutoff_len=None, cutoff_rmsd=None, cutoff_Z=None, cutoff_identi filterDict = {'len': filterListLen, 'rmsd': filterListRMSD, 'Z': filterListZ, 'identity': filterListIdentity} self._filterList = filterList self._filterDict = filterDict - self._pdbList = [self._pdbListAll[0]] + [item for item in self._pdbListAll[1:] if not item in filterList] + if query in self._pdbListAll: + self._pdbList = [query] + [item for item in self._pdbListAll if item not in filterList] + else: + self._pdbList = [item for item in self._pdbListAll if item not in filterList] LOGGER.info(str(len(filterList)) + ' PDBs have been filtered out from '+str(len(pdbListAll))+' Dali hits (remaining: '+str(len(pdbListAll)-len(filterList))+').') return self._pdbList