Skip to content

Commit

Permalink
Merge pull request #1156 from AMS-Hippo/master
Browse files Browse the repository at this point in the history
Small default functions for landmark-UMAP
  • Loading branch information
lmcinnes authored Oct 25, 2024
2 parents f123b91 + e76da1b commit 51f75a3
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 64 deletions.
139 changes: 75 additions & 64 deletions notebooks/MNIST_Landmarks.ipynb

Large diffs are not rendered by default.

72 changes: 72 additions & 0 deletions umap/parametric_umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def __init__(
self.global_correlation_loss_weight = global_correlation_loss_weight
self.landmark_loss_fn = landmark_loss_fn
self.landmark_loss_weight = landmark_loss_weight
self.prev_epoch_X = None
self.window_vals = None

self.reconstruction_validation = (
reconstruction_validation # holdout data for reconstruction acc
Expand Down Expand Up @@ -174,6 +176,16 @@ def fit(self, X, y=None, precomputed_distances=None, landmark_positions=None):
The desired position in low-dimensional space of each sample in X.
Points that are not landmarks should have nan coordinates.
"""
if (self.prev_epoch_X is not None) & (landmark_positions is None):
# Add the landmark points for training, then make a landmark vector.
landmark_positions = np.stack(
[np.array([np.nan, np.nan])]*X.shape[0] + list(
self.transform(
self.prev_epoch_X
)
)
)
X = np.concatenate((X, self.prev_epoch_X))

if landmark_positions is not None:
len_X = len(X)
Expand Down Expand Up @@ -230,6 +242,16 @@ def fit_transform(
The desired position in low-dimensional space of each sample in X.
Points that are not landmarks should have nan coordinates.
"""
if (self.prev_epoch_X is not None) & (landmark_positions is None):
# Add the landmark points for training, then make a landmark vector.
landmark_positions = np.stack(
[np.array([np.nan, np.nan])]*X.shape[0] + list(
self.transform(
self.prev_epoch_X
)
)
)
X = np.concatenate((X, self.prev_epoch_X))

if landmark_positions is not None:
len_X = len(X)
Expand Down Expand Up @@ -473,6 +495,56 @@ def save(self, save_location, verbose=True):
if verbose:
print("Pickle of ParametricUMAP model saved to {}".format(model_output))

def add_landmarks(
self,
X,
sample_pct=0.01,
sample_mode="uniform",
landmark_loss_weight=0.01,
idx=None,
):
"""Add some points from a dataset X as "landmarks."
Parameters
----------
X : array, shape (n_samples, n_features)
Old data to be retained.
sample_pct : float, optional
Percentage of old data to use as landmarks.
sample_mode : str, optional
Method for sampling points. Allows "uniform" and "predefined."
landmark_loss_weight : float, optional
Multiplier for landmark loss function.
"""
self.sample_pct = sample_pct
self.sample_mode = sample_mode
self.landmark_loss_weight = landmark_loss_weight

if self.sample_mode == "uniform":
self.prev_epoch_idx = list(
np.random.choice(
range(X.shape[0]), int(X.shape[0]*sample_pct), replace=False
)
)
self.prev_epoch_X = X[self.prev_epoch_idx]
elif self.sample_mode == "predetermined":
if idx is None:
raise ValueError(
"Choice of sample_mode is not supported."
)
else:
self.prev_epoch_idx = idx
self.prev_epoch_X = X[self.prev_epoch_idx]

else:
raise ValueError(
"Choice of sample_mode is not supported."
)

def remove_landmarks(self):
self.prev_epoch_X = None

def to_ONNX(self, save_location):
"""Exports trained parametric UMAP as ONNX."""
# Extract encoder
Expand Down

0 comments on commit 51f75a3

Please sign in to comment.