-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[project] add boilder plate for recomendation engine | [refactor] fol…
…der strcuture
- Loading branch information
Showing
166 changed files
with
3,167 additions
and
7 deletions.
There are no files selected for viewing
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
"""Mixture model for collaborative filtering""" | ||
from typing import NamedTuple, Tuple | ||
import numpy as np | ||
from matplotlib import pyplot as plt | ||
from matplotlib.patches import Circle, Arc | ||
|
||
|
||
class GaussianMixture(NamedTuple): | ||
"""Tuple holding a gaussian mixture""" | ||
mu: np.ndarray # (K, d) array - each row corresponds to a gaussian component mean | ||
var: np.ndarray # (K, ) array - each row corresponds to the variance of a component | ||
p: np.ndarray # (K, ) array = each row corresponds to the weight of a component | ||
|
||
|
||
def init(X: np.ndarray, K: int, | ||
seed: int = 0) -> Tuple[GaussianMixture, np.ndarray]: | ||
"""Initializes the mixture model with random points as initial | ||
means and uniform assingments | ||
Args: | ||
X: (n, d) array holding the data | ||
K: number of components | ||
seed: random seed | ||
Returns: | ||
mixture: the initialized gaussian mixture | ||
post: (n, K) array holding the soft counts | ||
for all components for all examples | ||
""" | ||
np.random.seed(seed) | ||
n, _ = X.shape | ||
p = np.ones(K) / K | ||
|
||
# select K random points as initial means | ||
mu = X[np.random.choice(n, K, replace=False)] | ||
var = np.zeros(K) | ||
# Compute variance | ||
for j in range(K): | ||
var[j] = ((X - mu[j])**2).mean() | ||
|
||
mixture = GaussianMixture(mu, var, p) | ||
post = np.ones((n, K)) / K | ||
|
||
return mixture, post | ||
|
||
|
||
def plot(X: np.ndarray, mixture: GaussianMixture, post: np.ndarray, | ||
title: str): | ||
"""Plots the mixture model for 2D data""" | ||
_, K = post.shape | ||
|
||
percent = post / post.sum(axis=1).reshape(-1, 1) | ||
fig, ax = plt.subplots() | ||
ax.title.set_text(title) | ||
ax.set_xlim((-20, 20)) | ||
ax.set_ylim((-20, 20)) | ||
r = 0.25 | ||
color = ["r", "b", "k", "y", "m", "c"] | ||
for i, point in enumerate(X): | ||
theta = 0 | ||
for j in range(K): | ||
offset = percent[i, j] * 360 | ||
arc = Arc(point, | ||
r, | ||
r, | ||
0, | ||
theta, | ||
theta + offset, | ||
edgecolor=color[j]) | ||
ax.add_patch(arc) | ||
theta += offset | ||
for j in range(K): | ||
mu = mixture.mu[j] | ||
sigma = np.sqrt(mixture.var[j]) | ||
circle = Circle(mu, sigma, color=color[j], fill=False) | ||
ax.add_patch(circle) | ||
legend = "mu = ({:0.2f}, {:0.2f})\n stdv = {:0.2f}".format( | ||
mu[0], mu[1], sigma) | ||
ax.text(mu[0], mu[1], legend) | ||
plt.axis('equal') | ||
plt.show() | ||
|
||
|
||
def rmse(X, Y): | ||
return np.sqrt(np.mean((X - Y)**2)) | ||
|
||
def bic(X: np.ndarray, mixture: GaussianMixture, | ||
log_likelihood: float) -> float: | ||
"""Computes the Bayesian Information Criterion for a | ||
mixture of gaussians | ||
Args: | ||
X: (n, d) array holding the data | ||
mixture: a mixture of spherical gaussian | ||
log_likelihood: the log-likelihood of the data | ||
Returns: | ||
float: the BIC for this mixture | ||
""" | ||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
"""Mixture model for matrix completion""" | ||
from typing import Tuple | ||
import numpy as np | ||
from scipy.special import logsumexp | ||
from common import GaussianMixture | ||
|
||
|
||
def estep(X: np.ndarray, mixture: GaussianMixture) -> Tuple[np.ndarray, float]: | ||
"""E-step: Softly assigns each datapoint to a gaussian component | ||
Args: | ||
X: (n, d) array holding the data, with incomplete entries (set to 0) | ||
mixture: the current gaussian mixture | ||
Returns: | ||
np.ndarray: (n, K) array holding the soft counts | ||
for all components for all examples | ||
float: log-likelihood of the assignment | ||
""" | ||
raise NotImplementedError | ||
|
||
|
||
|
||
def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture, | ||
min_variance: float = .25) -> GaussianMixture: | ||
"""M-step: Updates the gaussian mixture by maximizing the log-likelihood | ||
of the weighted dataset | ||
Args: | ||
X: (n, d) array holding the data, with incomplete entries (set to 0) | ||
post: (n, K) array holding the soft counts | ||
for all components for all examples | ||
mixture: the current gaussian mixture | ||
min_variance: the minimum variance for each gaussian | ||
Returns: | ||
GaussianMixture: the new gaussian mixture | ||
""" | ||
raise NotImplementedError | ||
|
||
|
||
def run(X: np.ndarray, mixture: GaussianMixture, | ||
post: np.ndarray) -> Tuple[GaussianMixture, np.ndarray, float]: | ||
"""Runs the mixture model | ||
Args: | ||
X: (n, d) array holding the data | ||
post: (n, K) array holding the soft counts | ||
for all components for all examples | ||
Returns: | ||
GaussianMixture: the new gaussian mixture | ||
np.ndarray: (n, K) array holding the soft counts | ||
for all components for all examples | ||
float: log-likelihood of the current assignment | ||
""" | ||
raise NotImplementedError | ||
|
||
|
||
def fill_matrix(X: np.ndarray, mixture: GaussianMixture) -> np.ndarray: | ||
"""Fills an incomplete matrix according to a mixture model | ||
Args: | ||
X: (n, d) array of incomplete data (incomplete entries =0) | ||
mixture: a mixture of gaussians | ||
Returns | ||
np.ndarray: a (n, d) array with completed data | ||
""" | ||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
"""Mixture model based on kmeans""" | ||
from typing import Tuple | ||
import numpy as np | ||
from common import GaussianMixture | ||
|
||
|
||
def estep(X: np.ndarray, mixture: GaussianMixture) -> np.ndarray: | ||
"""E-step: Assigns each datapoint to the gaussian component with the | ||
closest mean | ||
Args: | ||
X: (n, d) array holding the data | ||
mixture: the current gaussian mixture | ||
Returns: | ||
np.ndarray: (n, K) array holding the soft counts | ||
for all components for all examples | ||
""" | ||
n, _ = X.shape | ||
K, _ = mixture.mu.shape | ||
post = np.zeros((n, K)) | ||
|
||
for i in range(n): | ||
tiled_vector = np.tile(X[i, :], (K, 1)) | ||
sse = ((tiled_vector - mixture.mu)**2).sum(axis=1) | ||
j = np.argmin(sse) | ||
post[i, j] = 1 | ||
|
||
return post | ||
|
||
|
||
def mstep(X: np.ndarray, post: np.ndarray) -> Tuple[GaussianMixture, float]: | ||
"""M-step: Updates the gaussian mixture. Each cluster | ||
yields a component mean and variance. | ||
Args: X: (n, d) array holding the data | ||
post: (n, K) array holding the soft counts | ||
for all components for all examples | ||
Returns: | ||
GaussianMixture: the new gaussian mixture | ||
float: the distortion cost for the current assignment | ||
""" | ||
n, d = X.shape | ||
_, K = post.shape | ||
|
||
n_hat = post.sum(axis=0) | ||
p = n_hat / n | ||
|
||
cost = 0 | ||
mu = np.zeros((K, d)) | ||
var = np.zeros(K) | ||
|
||
for j in range(K): | ||
mu[j, :] = post[:, j] @ X / n_hat[j] | ||
sse = ((mu[j] - X)**2).sum(axis=1) @ post[:, j] | ||
cost += sse | ||
var[j] = sse / (d * n_hat[j]) | ||
|
||
return GaussianMixture(mu, var, p), cost | ||
|
||
|
||
def run(X: np.ndarray, mixture: GaussianMixture, | ||
post: np.ndarray) -> Tuple[GaussianMixture, np.ndarray, float]: | ||
"""Runs the mixture model | ||
Args: | ||
X: (n, d) array holding the data | ||
post: (n, K) array holding the soft counts | ||
for all components for all examples | ||
Returns: | ||
GaussianMixture: the new gaussian mixture | ||
np.ndarray: (n, K) array holding the soft counts | ||
for all components for all examples | ||
float: distortion cost of the current assignment | ||
""" | ||
|
||
prev_cost = None | ||
cost = None | ||
while (prev_cost is None or prev_cost - cost > 1e-4): | ||
prev_cost = cost | ||
post = estep(X, mixture) | ||
mixture, cost = mstep(X, post) | ||
|
||
return mixture, post, cost |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import numpy as np | ||
import kmeans | ||
import common | ||
import naive_em | ||
import em | ||
|
||
X = np.loadtxt("toy_data.txt") | ||
|
||
# TODO: Your code here |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
"""Mixture model using EM""" | ||
from typing import Tuple | ||
import numpy as np | ||
from common import GaussianMixture | ||
|
||
|
||
|
||
def estep(X: np.ndarray, mixture: GaussianMixture) -> Tuple[np.ndarray, float]: | ||
"""E-step: Softly assigns each datapoint to a gaussian component | ||
Args: | ||
X: (n, d) array holding the data | ||
mixture: the current gaussian mixture | ||
Returns: | ||
np.ndarray: (n, K) array holding the soft counts | ||
for all components for all examples | ||
float: log-likelihood of the assignment | ||
""" | ||
raise NotImplementedError | ||
|
||
|
||
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: | ||
"""M-step: Updates the gaussian mixture by maximizing the log-likelihood | ||
of the weighted dataset | ||
Args: | ||
X: (n, d) array holding the data | ||
post: (n, K) array holding the soft counts | ||
for all components for all examples | ||
Returns: | ||
GaussianMixture: the new gaussian mixture | ||
""" | ||
raise NotImplementedError | ||
|
||
|
||
def run(X: np.ndarray, mixture: GaussianMixture, | ||
post: np.ndarray) -> Tuple[GaussianMixture, np.ndarray, float]: | ||
"""Runs the mixture model | ||
Args: | ||
X: (n, d) array holding the data | ||
post: (n, K) array holding the soft counts | ||
for all components for all examples | ||
Returns: | ||
GaussianMixture: the new gaussian mixture | ||
np.ndarray: (n, K) array holding the soft counts | ||
for all components for all examples | ||
float: log-likelihood of the current assignment | ||
""" | ||
raise NotImplementedError |
Oops, something went wrong.