diff --git a/umap/parametric_umap.py b/umap/parametric_umap.py index a2d766b3..c9756fd5 100644 --- a/umap/parametric_umap.py +++ b/umap/parametric_umap.py @@ -376,7 +376,11 @@ def _fit_embed_data(self, X, n_epochs, init, random_state): def __getstate__(self): # this function supports pickling, making sure that objects can be pickled - return dict((k, v) for (k, v) in self.__dict__.items() if should_pickle(k, v) and k != "optimizer") + return dict( + (k, v) + for (k, v) in self.__dict__.items() + if should_pickle(k, v) and k != "optimizer" + ) def save(self, save_location, verbose=True): @@ -907,11 +911,10 @@ def load_ParametricUMAP(save_location, verbose=True): print("Pickle of ParametricUMAP model loaded from {}".format(model_output)) # Work around optimizer not pickling anymore (since tf 2.4) - class_name = model._optimizer_dict['name'] + class_name = model._optimizer_dict["name"] OptimizerClass = getattr(tf.keras.optimizers, class_name) model.optimizer = OptimizerClass.from_config(model._optimizer_dict) - # load encoder encoder_output = os.path.join(save_location, "encoder") if os.path.exists(encoder_output): diff --git a/umap/tests/test_umap_ops.py b/umap/tests/test_umap_ops.py index 964ab75b..96e7cbc5 100644 --- a/umap/tests/test_umap_ops.py +++ b/umap/tests/test_umap_ops.py @@ -101,6 +101,7 @@ def test_multi_component_layout_precomputed(): assert_less(error, 15.0, msg="Multi component embedding to far astray") + @pytest.mark.parametrize("num_isolates", [1, 5]) @pytest.mark.parametrize("metric", ["jaccard", "hellinger", "cosine"]) @pytest.mark.parametrize("force_approximation", [True, False]) @@ -137,7 +138,8 @@ def test_disconnected_data(num_isolates, metric, force_approximation): isolated_vertices = disconnected_vertices(model) assert isolated_vertices[10] == True number_of_nan = np.sum(np.isnan(model.embedding_[isolated_vertices])) - assert number_of_nan >= num_isolates*model.n_components + assert number_of_nan >= num_isolates * model.n_components + @pytest.mark.parametrize("num_isolates", [1]) @pytest.mark.parametrize("sparse", [True, False]) @@ -166,6 +168,7 @@ def test_disconnected_data_precomputed(num_isolates, sparse): number_of_nan = np.sum(np.isnan(model.embedding_[isolated_vertices])) assert number_of_nan >= num_isolates * model.n_components + # --------------- # Umap Transform # -------------- diff --git a/umap/umap_.py b/umap/umap_.py index 45407e7e..2b02de13 100644 --- a/umap/umap_.py +++ b/umap/umap_.py @@ -415,7 +415,7 @@ def compute_membership_strengths( continue # We didn't get the full knn for i # If applied to an adjacency matrix points shouldn't be similar to themselves. # If applied to an incidence matrix (or bipartite) then the row and column indices are different. - if (bipartite==False) & (knn_indices[i, j] == i): + if (bipartite == False) & (knn_indices[i, j] == i): val = 0.0 elif knn_dists[i, j] - rhos[i] <= 0.0 or sigmas[i] == 0.0: val = 1.0 @@ -1258,6 +1258,7 @@ def init_transform(indices, weights, embedding): return result + def init_graph_transform(graph, embedding): """Given a bipartite graph representing the 1-simplices and strengths between the new points and the original data set along with an embedding of the original points @@ -1295,10 +1296,15 @@ def init_graph_transform(graph, embedding): result[row_index, :] = embedding[col_index, :] break for d in range(embedding.shape[1]): - result[row_index, d] += graph[row_index, col_index] / num_neighbours * embedding[col_index, d] + result[row_index, d] += ( + graph[row_index, col_index] + / num_neighbours + * embedding[col_index, d] + ) return result + @numba.njit() def init_update(current_init, n_original_samples, indices): for i in range(n_original_samples, indices.shape[0]): @@ -2737,10 +2743,10 @@ def transform(self, X): # This was a very specially constructed graph with constant degree. # That lets us do fancy unpacking by reshaping the csr matrix indices # and data. Doing so relies on the constant degree assumption! - #csr_graph = normalize(graph.tocsr(), norm="l1") - #inds = csr_graph.indices.reshape(X.shape[0], self._n_neighbors) - #weights = csr_graph.data.reshape(X.shape[0], self._n_neighbors) - #embedding = init_transform(inds, weights, self.embedding_) + # csr_graph = normalize(graph.tocsr(), norm="l1") + # inds = csr_graph.indices.reshape(X.shape[0], self._n_neighbors) + # weights = csr_graph.data.reshape(X.shape[0], self._n_neighbors) + # embedding = init_transform(inds, weights, self.embedding_) # This is less fast code than the above numba.jit'd code. # It handles the fact that our nearest neighbour graph can now contain variable numbers of vertices. csr_graph = graph.tocsr()