-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtfjs-stuff.js
102 lines (91 loc) · 3.52 KB
/
tfjs-stuff.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/**
* References:
*
* https://github.com/tensorflow/tfjs-models/tree/master/universal-sentence-encoder
* https://towardsdatascience.com/how-to-build-a-textual-similarity-analysis-web-app-aa3139d4fb71
* https://github.com/jinglescode/demos/tree/master/src/app/components/nlp-sentence-encoder
* https://towardsdatascience.com/how-to-measure-distances-in-machine-learning-13a396aa34ce
* https://en.wikipedia.org/wiki/Cosine_similarity
*
* */
// require("@tensorflow/tfjs-node");
// const use = require("@tensorflow-models/universal-sentence-encoder");
// <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
// <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/universal-sentence-encoder"></script>
// const sentence1 = "How's it going?";
// const sentence2 = "How are you?";
// useModel(sentence1, sentence2, output);
function useModel(sentence1, sentence2, callback) {
// uses Universal Sentence Encoder (U.S.E.):
use.load().then((model) => {
embedSentences(model, sentence1, sentence2, callback);
});
}
function embedSentences(model, sentence1, sentence2, callback) {
const sentences = [sentence1, sentence2];
model.embed(sentences).then((embeddings) => {
const embeds = embeddings.arraySync();
const sentence1Embedding = embeds[0];
const sentence2Embedding = embeds[1];
getSimilarityPercent(sentence1Embedding, sentence2Embedding, callback);
});
}
function getSimilarityPercent(embed1, embed2, callback) {
const similarity = cosineSimilarity(embed1, embed2);
// cosine similarity -> % when doing text comparison, since cannot have -ve term frequencies: https://en.wikipedia.org/wiki/Cosine_similarity
if (callback) callback(similarity);
return similarity;
}
function cosineSimilarity(a, b) {
// https://towardsdatascience.com/how-to-build-a-textual-similarity-analysis-web-app-aa3139d4fb71
const magnitudeA = Math.sqrt(dotProduct(a, a));
const magnitudeB = Math.sqrt(dotProduct(b, b));
if (magnitudeA && magnitudeB) {
// https://towardsdatascience.com/how-to-measure-distances-in-machine-learning-13a396aa34ce
return dotProduct(a, b) / (magnitudeA * magnitudeB);
} else {
return 0;
}
}
function dotProduct(a, b) {
let sum = 0;
for (let i = 0; i < a.length; i++) {
sum += a[i] * b[i];
}
return sum;
}
// -------------------------------------------------
const runFromCli = typeof require !== "undefined" && require.main === module;
if (runFromCli) {
useModelToEmbedAllSentences(["cat", "dog"]);
}
function useModelToEmbedAllSentences(sentences, callback) {
require("@tensorflow/tfjs-node");
const use = require("@tensorflow-models/universal-sentence-encoder");
const fs = require("fs");
// uses Universal Sentence Encoder (U.S.E.):
use.load().then((model) => {
embedAllSentences(model, sentences, fs);
});
}
function embedAllSentences(model, sentences, fs) {
model.embed(sentences).then((embeddings) => {
const embeds = embeddings.arraySync();
if (fs) {
for (let i = 0; i < embeds.length; i++) {
const sentence = sentences[i];
const embed = embeds[i];
const addNewLine = i === 0 ? "" : "\n";
fs.appendFile("words.txt", addNewLine + sentence, function (err) {
if (err) throw err;
console.log(`Added word ${i}!`);
});
fs.appendFile("embeddings.txt", addNewLine + embed, function (err) {
if (err) throw err;
console.log(`Added embedding ${i}!`);
});
}
console.log("Done adding all words and embeddings (mapped by index).");
}
});
}