From 4ce04181be4ba18b15ffb5772e7ea20e64e0e6d0 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 31 Jan 2024 18:39:16 +0100 Subject: [PATCH 1/3] add kmedoid clustering --- prody/utilities/catchall.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/prody/utilities/catchall.py b/prody/utilities/catchall.py index cc0a94e6b..6948c91ea 100644 --- a/prody/utilities/catchall.py +++ b/prody/utilities/catchall.py @@ -11,7 +11,8 @@ __all__ = ['calcTree', 'clusterMatrix', 'showLines', 'showMatrix', 'reorderMatrix', 'findSubgroups', 'getCoords', 'getLinkage', 'getTreeFromLinkage', 'clusterSubfamilies', - 'calcRMSDclusters', 'calcGromosClusters', 'calcGromacsClusters'] + 'calcRMSDclusters', 'calcGromosClusters', 'calcGromacsClusters', + 'calcKmedoidClusters'] class LinkageError(Exception): pass @@ -1048,3 +1049,14 @@ def calcRMSDclusters(rmsd_matrix, c, labels=None): calcGromosClusters = calcRMSDclusters calcGromacsClusters = calcRMSDclusters + +def calcKmedoidClusters(distances, nClusters): + try: + import kmedoids + except ImportError: + raise ImportError('Please install kmedoids to run this function') + + c = kmedoids.fasterpam(distances, nClusters) + labels = c.labels + _, counts = np.unique(labels, return_counts=True) + return c.medoids, labels, counts From 4e7b595f51f8899aba0f1cb38d0fa05141a22c23 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Fri, 2 Feb 2024 18:16:23 +0100 Subject: [PATCH 2/3] swap kmedoids to sklearn_extra --- prody/utilities/catchall.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/prody/utilities/catchall.py b/prody/utilities/catchall.py index 6948c91ea..a7bda5629 100644 --- a/prody/utilities/catchall.py +++ b/prody/utilities/catchall.py @@ -1050,13 +1050,14 @@ def calcRMSDclusters(rmsd_matrix, c, labels=None): calcGromosClusters = calcRMSDclusters calcGromacsClusters = calcRMSDclusters -def calcKmedoidClusters(distances, nClusters): +def calcKmedoidClusters(coordsets, nClusters): try: - import kmedoids + from sklearn_extra.cluster import KMedoids except ImportError: - raise ImportError('Please install kmedoids to run this function') + raise ImportError('Please install sklearn_extra to run this function') - c = kmedoids.fasterpam(distances, nClusters) - labels = c.labels + X = coordsets.reshape(coordsets.shape[0], -1) + c = KMedoids(n_clusters=nClusters, random_state=0).fit(X) + labels = c.labels_ _, counts = np.unique(labels, return_counts=True) - return c.medoids, labels, counts + return c.medoid_indices_, labels, counts From 73902a2125c8f6275e0debeb3ec0618f532ac5eb Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 7 Feb 2024 19:25:27 +0100 Subject: [PATCH 3/3] fix error for scikit-learn-extra --- prody/utilities/catchall.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/utilities/catchall.py b/prody/utilities/catchall.py index a7bda5629..09ca28d1a 100644 --- a/prody/utilities/catchall.py +++ b/prody/utilities/catchall.py @@ -1054,7 +1054,7 @@ def calcKmedoidClusters(coordsets, nClusters): try: from sklearn_extra.cluster import KMedoids except ImportError: - raise ImportError('Please install sklearn_extra to run this function') + raise ImportError('Please install scikit-learn-extra to run this function') X = coordsets.reshape(coordsets.shape[0], -1) c = KMedoids(n_clusters=nClusters, random_state=0).fit(X)