-
Notifications
You must be signed in to change notification settings - Fork 114
/
Copy pathPython - NLP Word2Vec
48 lines (40 loc) · 1.45 KB
/
Python - NLP Word2Vec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as colors
import matplotlib.cm as cmx
import matplotlib as mpl
cmap = plt.cm.jet
DATA=np.array([[ 0, 0, 0.18, 0.5],
[ 0, 0 , 0.18, 0.6],
[ 0, 0 , 0.2, 0.58],
[ 0, 0, 0.41, 0.5],
[ 0, 0, 0.73, 0.2]])
cNorm = colors.Normalize(vmin=np.min(DATA[:,3])+.2, vmax=np.max(DATA[:,3]))
scalarMap = cmx.ScalarMappable(norm=cNorm,cmap=cmap)
names=np.array(["Woman","Man","Boy","Cat","Car"])
plt.subplots()
plt.figure(figsize=(9,6))
for idx in range(0,len(DATA[:,1])):
colorVal = scalarMap.to_rgba(DATA[idx,3])
plt.arrow(DATA[idx,0], #x1
DATA[idx,1], # y1
DATA[idx,2]-DATA[idx,0], # x2 - x1
DATA[idx,3]-DATA[idx,1], # y2 - y1
color=colorVal,head_width=0.012, head_length=0.02)
for i,names in enumerate (names):
plt.annotate(names, (DATA[i][2],DATA[i][3]))
plt.annotate("HUMAN CLUSTER", (0.1,0.7),color='r')
plt.annotate("ANIMAL CLUSTER", (0.35,0.6),color='b')
plt.annotate("OBJECT CLUSTER", (0.7,0.3))
plt.title("WORD2VEC")
plt.show()
names=np.array(["Woman","Man","Boy","Cat","Car"])
print("KNN Similarity with Man")
for i in range(0,5):
print(names[i],":",1-plt.mlab.dist(DATA[i],DATA[1]))
print("")
c=[]
for i in range(0,5):
c.append(np.sqrt(DATA[i][2]**2+DATA[i][3]**2))
for i in range(0,5):
print("Cosine Similarity with Man:",names[i],1-c[1]*((plt.mlab.dist(DATA[i],DATA[1])/c[i])))