-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_kmeans.mojo
executable file
·96 lines (77 loc) · 3.27 KB
/
run_kmeans.mojo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from mojo_kmeans import Matrix, Kmeans
from mojo_kmeans.utils import list_to_matrix
from time import now
from python import Python
def main():
Python.add_to_path(".")
py_kmeans = Python.import_module("python_kmeans")
py_utils = Python.import_module("python_kmeans.utils")
np = Python.import_module("numpy")
sklearn_datasets = Python.import_module("sklearn.datasets")
sklearn_cluster = Python.import_module("sklearn.cluster")
onnxruntime = Python.import_module("onnxruntime")
n_clusters = 6
n_samples = 3000
n_features = 500
plot_result = True
verbose = True
X = sklearn_datasets.make_blobs(n_samples=n_samples,
cluster_std=4,
centers=n_clusters,
n_features=n_features,
return_centers=True,
random_state=int(49))
data = Matrix.from_numpy(X[0])
# Common arguments:
max_iterations = 100
print("\n======== Mojo Kmeans ========")
mojo_model = Kmeans(k=n_clusters)
t = now()
mojo_centroids = mojo_model.fit(data)
t_mojo = Float64(now()-t)/1_000_000
print('Mojo Kmeans complete (ms):',t_mojo)
print("\n======== Python Kmeans ========")
py_model = py_kmeans.Kmeans(k=n_clusters)
t = now()
py_centroids = py_model.fit(X[0])
t_py = Float64(now()-t)/1_000_000
print('Python Kmeans complete (ms):',t_py)
print("\n======== SKLearn Kmeans ========")
verbose_num = 1
if not verbose:
verbose_num = 0
sklearn_model = sklearn_cluster.KMeans(n_clusters=n_clusters,
max_iter=max_iterations,
verbose=verbose_num,
tol=0)
t = now()
sklearn_centroids = sklearn_model.fit(X[0])
t_sklearn = Float64(now()-t)/1_000_000
print('Python Kmeans complete (ms):',t_sklearn)
print("\n======== ONNX Kmeans ========")
sess = onnxruntime.InferenceSession('./onnx_kmeans/kmeans.onnx')
feed = Python.dict()
feed['data'] = X[0]
feed['n_clusters'] = np.array(n_clusters, np.int64)
feed['max_iterations'] = np.array(max_iterations, np.int64)
feed['tol'] = np.array(1e-4, np.float64)
t = now()
output_onnxruntime = sess.run(None, feed)
t_onnx = Float64(now()-t)/1_000_000
print('ONNX Kmeans complete (ms):',t_onnx)
print()
print("Config:")
print("n_clusters =",n_clusters,"\nn_samples = ",n_samples,"\nn_features = ",n_features)
print()
print("Speedup Mojo vs. Python:",t_py/t_mojo)
print("Speedup Mojo vs. SKLearn:",t_sklearn/t_mojo)
print("Speedup Mojo vs. ONNX:",t_onnx/t_mojo)
print()
print("Comparing final inertia:")
print("Mojo kmeans final inertia:", mojo_model.inertia)
print("Python kmeans final inertia:", py_model.inertia)
print("SKlearn kmeans final inertia:", sklearn_model.inertia_)
print("ONNX kmeans final inertia:", output_onnxruntime[1])
if plot_result:
mojo_centroids_matrix = list_to_matrix[data.dtype](mojo_centroids).to_numpy()
py_utils.plot_clusters(X[0], X[1], mojo_centroids_matrix, py_centroids, output_onnxruntime[0], X[2])