[proj2] setup mnist code and notebook

N0-man · Jun 28, 2024 · ec17cad · ec17cad
1 parent 5d5a5a4
commit ec17cad
Show file tree

Hide file tree

Showing 125 changed files with 1,805 additions and 1 deletion.
diff --git a/Project 2 Digit Recognition/mnist/Datasets/mnist.pkl.gz b/Project 2 Digit Recognition/mnist/Datasets/mnist.pkl.gz
diff --git a/Project 2 Digit Recognition/mnist/Datasets/test_labels_mini.txt.gz b/Project 2 Digit Recognition/mnist/Datasets/test_labels_mini.txt.gz
diff --git a/Project 2 Digit Recognition/mnist/Datasets/test_multi_digit_mini.pkl.gz b/Project 2 Digit Recognition/mnist/Datasets/test_multi_digit_mini.pkl.gz
diff --git a/Project 2 Digit Recognition/mnist/Datasets/train_labels_mini.txt.gz b/Project 2 Digit Recognition/mnist/Datasets/train_labels_mini.txt.gz
diff --git a/Project 2 Digit Recognition/mnist/Datasets/train_multi_digit_mini.pkl.gz b/Project 2 Digit Recognition/mnist/Datasets/train_multi_digit_mini.pkl.gz
diff --git a/Project 2 Digit Recognition/mnist/mnist.ipynb b/Project 2 Digit Recognition/mnist/mnist.ipynb
diff --git a/Project 2 Digit Recognition/mnist/part1/cubic_features_checker.py b/Project 2 Digit Recognition/mnist/part1/cubic_features_checker.py
@@ -0,0 +1,44 @@
+import sys
+sys.path.append("..")
+import utils
+from utils import *
+import numpy as np
+from features import cubic_features
+
+def verify_cubic_features1D():
+    X=np.array([[np.sqrt(3)],[0]])
+    X_cube=np.sort(cubic_features(X))
+    X_correct = np.array([[ 1., np.sqrt(9), np.sqrt(27), np.sqrt(27)],[0., 0., 0., 1.]]);
+
+    if np.all(np.absolute(X_cube-X_correct) < 1.0e-6):
+        print ("Verifying cubic features of 1 dimension: Passed")
+    else:
+        print ("Verifying cubic features of 1 dimension: Failed")
+
+
+def verify_cubic_features2D():
+    X=np.array([[np.sqrt(3),np.sqrt(3)],[0,0]])
+    X_cube=np.sort(cubic_features(X))
+    X_correct = np.array([[1., 3., 3., 5.19615242, 5.19615242, 5.19615242, 5.19615242, 7.34846923, 9., 9.],
+                          [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
+
+    if np.all(np.absolute(X_cube-X_correct) < 1.0e-6):
+        print ("Verifying cubic features of 2 dimensions: Passed")
+    else:
+        print ("Verifying cubic features of 2 dimensions: Failed")
+
+
+def verify_cubic_features2D2():
+    X=np.array([[np.sqrt(3),0],[0,np.sqrt(3)]])
+    X_cube=np.sort(cubic_features(X))
+    X_correct = np.array([[0., 0., 0., 0., 0., 0., 1., 3., 5.19615242, 5.19615242],
+                          [0., 0., 0., 0., 0., 0., 1., 3., 5.19615242, 5.19615242]])
+
+    if np.all(np.absolute(X_cube-X_correct) < 1.0e-6):
+        print ("Verifying cubic features of 2 dimensions asymmetric vectors: Passed")
+    else:
+        print ("Verifying cubic features of 2 dimensions asymmetric vectors: Failed")
+
+verify_cubic_features1D()
+verify_cubic_features2D()
+verify_cubic_features2D2()
diff --git a/Project 2 Digit Recognition/mnist/part1/features.py b/Project 2 Digit Recognition/mnist/part1/features.py
@@ -0,0 +1,157 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def project_onto_PC(X, pcs, n_components, feature_means):
+    """
+    Given principal component vectors pcs = principal_components(X)
+    this function returns a new data array in which each sample in X
+    has been projected onto the first n_components principcal components.
+    """
+    # TODO: first center data using the feature_means
+    # TODO: Return the projection of the centered dataset
+    #       on the first n_components principal components.
+    #       This should be an array with dimensions: n x n_components.
+    # Hint: these principal components = first n_components columns
+    #       of the eigenvectors returned by principal_components().
+    #       Note that each eigenvector is already be a unit-vector,
+    #       so the projection may be done using matrix multiplication.
+    raise NotImplementedError
+
+
+### Functions which are already complete, for you to use ###
+
+def cubic_features(X):
+    """
+    Returns a new dataset with features given by the mapping
+    which corresponds to the cubic kernel.
+    """
+    n, d = X.shape  # dataset size, input dimension
+    X_withones = np.ones((n, d + 1))
+    X_withones[:, :-1] = X
+    new_d = 0  # dimension of output
+    new_d = int((d + 1) * (d + 2) * (d + 3) / 6)
+
+    new_data = np.zeros((n, new_d))
+    col_index = 0
+    for x_i in range(n):
+        X_i = X[x_i]
+        X_i = X_i.reshape(1, X_i.size)
+
+        if d > 2:
+            comb_2 = np.matmul(np.transpose(X_i), X_i)
+
+            unique_2 = comb_2[np.triu_indices(d, 1)]
+            unique_2 = unique_2.reshape(unique_2.size, 1)
+            comb_3 = np.matmul(unique_2, X_i)
+            keep_m = np.zeros(comb_3.shape)
+            index = 0
+            for i in range(d - 1):
+                keep_m[index + np.arange(d - 1 - i), i] = 0
+
+                tri_keep = np.triu_indices(d - 1 - i, 1)
+
+                correct_0 = tri_keep[0] + index
+                correct_1 = tri_keep[1] + i + 1
+
+                keep_m[correct_0, correct_1] = 1
+                index += d - 1 - i
+
+            unique_3 = np.sqrt(6) * comb_3[np.nonzero(keep_m)]
+
+            new_data[x_i, np.arange(unique_3.size)] = unique_3
+            col_index = unique_3.size
+
+    for i in range(n):
+        newdata_colindex = col_index
+        for j in range(d + 1):
+            new_data[i, newdata_colindex] = X_withones[i, j]**3
+            newdata_colindex += 1
+            for k in range(j + 1, d + 1):
+                new_data[i, newdata_colindex] = X_withones[i, j]**2 * X_withones[i, k] * (3**(0.5))
+                newdata_colindex += 1
+
+                new_data[i, newdata_colindex] = X_withones[i, j] * X_withones[i, k]**2 * (3**(0.5))
+                newdata_colindex += 1
+
+                if k < d:
+                    new_data[i, newdata_colindex] = X_withones[i, j] * X_withones[i, k] * (6**(0.5))
+                    newdata_colindex += 1
+
+    return new_data
+
+
+def center_data(X):
+    """
+    Returns a centered version of the data, where each feature now has mean = 0
+
+    Args:
+        X - n x d NumPy array of n data points, each with d features
+
+    Returns:
+        - (n, d) NumPy array X' where for each i = 1, ..., n and j = 1, ..., d:
+        X'[i][j] = X[i][j] - means[j]       
+	- (d, ) NumPy array with the columns means
+
+    """
+    feature_means = X.mean(axis=0)
+    return (X - feature_means), feature_means
+
+
+def principal_components(centered_data):
+    """
+    Returns the principal component vectors of the data, sorted in decreasing order
+    of eigenvalue magnitude. This function first calculates the covariance matrix
+    and then finds its eigenvectors.
+
+    Args:
+        centered_data - n x d NumPy array of n data points, each with d features
+
+    Returns:
+        d x d NumPy array whose columns are the principal component directions sorted
+        in descending order by the amount of variation each direction (these are
+        equivalent to the d eigenvectors of the covariance matrix sorted in descending
+        order of eigenvalues, so the first column corresponds to the eigenvector with
+        the largest eigenvalue
+    """
+    scatter_matrix = np.dot(centered_data.transpose(), centered_data)
+    eigen_values, eigen_vectors = np.linalg.eig(scatter_matrix)
+    # Re-order eigenvectors by eigenvalue magnitude:
+    idx = eigen_values.argsort()[::-1]
+    eigen_values = eigen_values[idx]
+    eigen_vectors = eigen_vectors[:, idx]
+    return eigen_vectors
+
+
+###Correction note:  Differing from the release, this function takes an extra input feature_means.
+
+def plot_PC(X, pcs, labels, feature_means):
+    """
+    Given the principal component vectors as the columns of matrix pcs,
+    this function projects each sample in X onto the first two principal components
+    and produces a scatterplot where points are marked with the digit depicted in
+    the corresponding image.
+    labels = a numpy array containing the digits corresponding to each image in X.
+    """
+    pc_data = project_onto_PC(X, pcs, n_components=2, feature_means=feature_means)
+    text_labels = [str(z) for z in labels.tolist()]
+    fig, ax = plt.subplots()
+    ax.scatter(pc_data[:, 0], pc_data[:, 1], alpha=0, marker=".")
+    for i, txt in enumerate(text_labels):
+        ax.annotate(txt, (pc_data[i, 0], pc_data[i, 1]))
+    ax.set_xlabel('PC 1')
+    ax.set_ylabel('PC 2')
+    plt.show()
+
+
+###Correction note:  Differing from the release, this function takes an extra input feature_means.
+
+def reconstruct_PC(x_pca, pcs, n_components, X, feature_means):
+    """
+    Given the principal component vectors as the columns of matrix pcs,
+    this function reconstructs a single image from its principal component
+    representation, x_pca.
+    X = the original data to which PCA was applied to get pcs.
+    """
+    x_reconstructed = np.dot(x_pca, pcs[:, range(n_components)].T) + feature_means
+    return x_reconstructed
diff --git a/Project 2 Digit Recognition/mnist/part1/kernel.py b/Project 2 Digit Recognition/mnist/part1/kernel.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+### Functions for you to fill in ###
+
+
+
+def polynomial_kernel(X, Y, c, p):
+    """
+        Compute the polynomial kernel between two matrices X and Y::
+            K(x, y) = (<x, y> + c)^p
+        for each pair of rows x in X and y in Y.
+
+        Args:
+            X - (n, d) NumPy array (n datapoints each with d features)
+            Y - (m, d) NumPy array (m datapoints each with d features)
+            c - a coefficient to trade off high-order and low-order terms (scalar)
+            p - the degree of the polynomial kernel
+
+        Returns:
+            kernel_matrix - (n, m) Numpy array containing the kernel matrix
+    """
+    # YOUR CODE HERE
+    raise NotImplementedError
+
+
+
+def rbf_kernel(X, Y, gamma):
+    """
+        Compute the Gaussian RBF kernel between two matrices X and Y::
+            K(x, y) = exp(-gamma ||x-y||^2)
+        for each pair of rows x in X and y in Y.
+
+        Args:
+            X - (n, d) NumPy array (n datapoints each with d features)
+            Y - (m, d) NumPy array (m datapoints each with d features)
+            gamma - the gamma parameter of gaussian function (scalar)
+
+        Returns:
+            kernel_matrix - (n, m) Numpy array containing the kernel matrix
+    """
+    # YOUR CODE HERE
+    raise NotImplementedError
diff --git a/Project 2 Digit Recognition/mnist/part1/linear_regression.py b/Project 2 Digit Recognition/mnist/part1/linear_regression.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+### Functions for you to fill in ###
+
+def closed_form(X, Y, lambda_factor):
+    """
+    Computes the closed form solution of linear regression with L2 regularization
+
+    Args:
+        X - (n, d + 1) NumPy array (n datapoints each with d features plus the bias feature in the first dimension)
+        Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
+            data point
+        lambda_factor - the regularization constant (scalar)
+    Returns:
+        theta - (d + 1, ) NumPy array containing the weights of linear regression. Note that theta[0]
+        represents the y-axis intercept of the model and therefore X[0] = 1
+    """
+    # YOUR CODE HERE
+    raise NotImplementedError
+
+### Functions which are already complete, for you to use ###
+
+def compute_test_error_linear(test_x, Y, theta):
+    test_y_predict = np.round(np.dot(test_x, theta))
+    test_y_predict[test_y_predict < 0] = 0
+    test_y_predict[test_y_predict > 9] = 9
+    return 1 - np.mean(test_y_predict == Y)