Skip to content

Commit

Permalink
[project] add boilder plate for recomendation engine | [refactor] fol…
Browse files Browse the repository at this point in the history
…der strcuture
  • Loading branch information
N0-man committed Aug 17, 2024
1 parent f8dae03 commit 72c96ea
Show file tree
Hide file tree
Showing 166 changed files with 3,167 additions and 7 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
101 changes: 101 additions & 0 deletions Project_Recommendation_System/netflix/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""Mixture model for collaborative filtering"""
from typing import NamedTuple, Tuple
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.patches import Circle, Arc


class GaussianMixture(NamedTuple):
"""Tuple holding a gaussian mixture"""
mu: np.ndarray # (K, d) array - each row corresponds to a gaussian component mean
var: np.ndarray # (K, ) array - each row corresponds to the variance of a component
p: np.ndarray # (K, ) array = each row corresponds to the weight of a component


def init(X: np.ndarray, K: int,
seed: int = 0) -> Tuple[GaussianMixture, np.ndarray]:
"""Initializes the mixture model with random points as initial
means and uniform assingments
Args:
X: (n, d) array holding the data
K: number of components
seed: random seed
Returns:
mixture: the initialized gaussian mixture
post: (n, K) array holding the soft counts
for all components for all examples
"""
np.random.seed(seed)
n, _ = X.shape
p = np.ones(K) / K

# select K random points as initial means
mu = X[np.random.choice(n, K, replace=False)]
var = np.zeros(K)
# Compute variance
for j in range(K):
var[j] = ((X - mu[j])**2).mean()

mixture = GaussianMixture(mu, var, p)
post = np.ones((n, K)) / K

return mixture, post


def plot(X: np.ndarray, mixture: GaussianMixture, post: np.ndarray,
title: str):
"""Plots the mixture model for 2D data"""
_, K = post.shape

percent = post / post.sum(axis=1).reshape(-1, 1)
fig, ax = plt.subplots()
ax.title.set_text(title)
ax.set_xlim((-20, 20))
ax.set_ylim((-20, 20))
r = 0.25
color = ["r", "b", "k", "y", "m", "c"]
for i, point in enumerate(X):
theta = 0
for j in range(K):
offset = percent[i, j] * 360
arc = Arc(point,
r,
r,
0,
theta,
theta + offset,
edgecolor=color[j])
ax.add_patch(arc)
theta += offset
for j in range(K):
mu = mixture.mu[j]
sigma = np.sqrt(mixture.var[j])
circle = Circle(mu, sigma, color=color[j], fill=False)
ax.add_patch(circle)
legend = "mu = ({:0.2f}, {:0.2f})\n stdv = {:0.2f}".format(
mu[0], mu[1], sigma)
ax.text(mu[0], mu[1], legend)
plt.axis('equal')
plt.show()


def rmse(X, Y):
return np.sqrt(np.mean((X - Y)**2))

def bic(X: np.ndarray, mixture: GaussianMixture,
log_likelihood: float) -> float:
"""Computes the Bayesian Information Criterion for a
mixture of gaussians
Args:
X: (n, d) array holding the data
mixture: a mixture of spherical gaussian
log_likelihood: the log-likelihood of the data
Returns:
float: the BIC for this mixture
"""
raise NotImplementedError
71 changes: 71 additions & 0 deletions Project_Recommendation_System/netflix/em.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Mixture model for matrix completion"""
from typing import Tuple
import numpy as np
from scipy.special import logsumexp
from common import GaussianMixture


def estep(X: np.ndarray, mixture: GaussianMixture) -> Tuple[np.ndarray, float]:
"""E-step: Softly assigns each datapoint to a gaussian component
Args:
X: (n, d) array holding the data, with incomplete entries (set to 0)
mixture: the current gaussian mixture
Returns:
np.ndarray: (n, K) array holding the soft counts
for all components for all examples
float: log-likelihood of the assignment
"""
raise NotImplementedError



def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture,
min_variance: float = .25) -> GaussianMixture:
"""M-step: Updates the gaussian mixture by maximizing the log-likelihood
of the weighted dataset
Args:
X: (n, d) array holding the data, with incomplete entries (set to 0)
post: (n, K) array holding the soft counts
for all components for all examples
mixture: the current gaussian mixture
min_variance: the minimum variance for each gaussian
Returns:
GaussianMixture: the new gaussian mixture
"""
raise NotImplementedError


def run(X: np.ndarray, mixture: GaussianMixture,
post: np.ndarray) -> Tuple[GaussianMixture, np.ndarray, float]:
"""Runs the mixture model
Args:
X: (n, d) array holding the data
post: (n, K) array holding the soft counts
for all components for all examples
Returns:
GaussianMixture: the new gaussian mixture
np.ndarray: (n, K) array holding the soft counts
for all components for all examples
float: log-likelihood of the current assignment
"""
raise NotImplementedError


def fill_matrix(X: np.ndarray, mixture: GaussianMixture) -> np.ndarray:
"""Fills an incomplete matrix according to a mixture model
Args:
X: (n, d) array of incomplete data (incomplete entries =0)
mixture: a mixture of gaussians
Returns
np.ndarray: a (n, d) array with completed data
"""
raise NotImplementedError
87 changes: 87 additions & 0 deletions Project_Recommendation_System/netflix/kmeans.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Mixture model based on kmeans"""
from typing import Tuple
import numpy as np
from common import GaussianMixture


def estep(X: np.ndarray, mixture: GaussianMixture) -> np.ndarray:
"""E-step: Assigns each datapoint to the gaussian component with the
closest mean
Args:
X: (n, d) array holding the data
mixture: the current gaussian mixture
Returns:
np.ndarray: (n, K) array holding the soft counts
for all components for all examples
"""
n, _ = X.shape
K, _ = mixture.mu.shape
post = np.zeros((n, K))

for i in range(n):
tiled_vector = np.tile(X[i, :], (K, 1))
sse = ((tiled_vector - mixture.mu)**2).sum(axis=1)
j = np.argmin(sse)
post[i, j] = 1

return post


def mstep(X: np.ndarray, post: np.ndarray) -> Tuple[GaussianMixture, float]:
"""M-step: Updates the gaussian mixture. Each cluster
yields a component mean and variance.
Args: X: (n, d) array holding the data
post: (n, K) array holding the soft counts
for all components for all examples
Returns:
GaussianMixture: the new gaussian mixture
float: the distortion cost for the current assignment
"""
n, d = X.shape
_, K = post.shape

n_hat = post.sum(axis=0)
p = n_hat / n

cost = 0
mu = np.zeros((K, d))
var = np.zeros(K)

for j in range(K):
mu[j, :] = post[:, j] @ X / n_hat[j]
sse = ((mu[j] - X)**2).sum(axis=1) @ post[:, j]
cost += sse
var[j] = sse / (d * n_hat[j])

return GaussianMixture(mu, var, p), cost


def run(X: np.ndarray, mixture: GaussianMixture,
post: np.ndarray) -> Tuple[GaussianMixture, np.ndarray, float]:
"""Runs the mixture model
Args:
X: (n, d) array holding the data
post: (n, K) array holding the soft counts
for all components for all examples
Returns:
GaussianMixture: the new gaussian mixture
np.ndarray: (n, K) array holding the soft counts
for all components for all examples
float: distortion cost of the current assignment
"""

prev_cost = None
cost = None
while (prev_cost is None or prev_cost - cost > 1e-4):
prev_cost = cost
post = estep(X, mixture)
mixture, cost = mstep(X, post)

return mixture, post, cost
9 changes: 9 additions & 0 deletions Project_Recommendation_System/netflix/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import numpy as np
import kmeans
import common
import naive_em
import em

X = np.loadtxt("toy_data.txt")

# TODO: Your code here
53 changes: 53 additions & 0 deletions Project_Recommendation_System/netflix/naive_em.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Mixture model using EM"""
from typing import Tuple
import numpy as np
from common import GaussianMixture



def estep(X: np.ndarray, mixture: GaussianMixture) -> Tuple[np.ndarray, float]:
"""E-step: Softly assigns each datapoint to a gaussian component
Args:
X: (n, d) array holding the data
mixture: the current gaussian mixture
Returns:
np.ndarray: (n, K) array holding the soft counts
for all components for all examples
float: log-likelihood of the assignment
"""
raise NotImplementedError


def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture:
"""M-step: Updates the gaussian mixture by maximizing the log-likelihood
of the weighted dataset
Args:
X: (n, d) array holding the data
post: (n, K) array holding the soft counts
for all components for all examples
Returns:
GaussianMixture: the new gaussian mixture
"""
raise NotImplementedError


def run(X: np.ndarray, mixture: GaussianMixture,
post: np.ndarray) -> Tuple[GaussianMixture, np.ndarray, float]:
"""Runs the mixture model
Args:
X: (n, d) array holding the data
post: (n, K) array holding the soft counts
for all components for all examples
Returns:
GaussianMixture: the new gaussian mixture
np.ndarray: (n, K) array holding the soft counts
for all components for all examples
float: log-likelihood of the current assignment
"""
raise NotImplementedError
Loading

0 comments on commit 72c96ea

Please sign in to comment.