-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathword-cloud.js
49 lines (39 loc) · 1.63 KB
/
word-cloud.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
'use strict';
const TwitterKoreanProcessor = require('node-twitter-korean-text');
const fs = require('fs');
const _ = require('lodash');
const text = fs.readFileSync('./results.txt', 'utf8');
const LIMIT_WORDS_NUMBER = 13;
// 인간지능 TF-IDF .....
let isAllowKey = (word) => {
const excludeWords = ['것', '알', '위', '감', '앞', '내', '날', '데', '로', '약', '쪽', '줄', '입', '중', '약', '곳', '해', '치', '파', '더', '게', '전', '반', '니', '뿐', '늘', '함', '거나', '온', '로서', '도', '의', '분', '땐', '해도', '대해', '로써', '때', '수', '그', '및', '에서', '은', '남'];
return !_.includes(excludeWords, word);
}
TwitterKoreanProcessor.normalize(text).then(result => {
TwitterKoreanProcessor.tokenize(result).then((token) => {
TwitterKoreanProcessor.tokensToJsonArray(token, true).then(results => {
let nouns = [];
for(let i=0; i < results.length; i++) {
const temp = results[i];
if (temp["koreanPos"] === 'Noun') {
nouns.push(temp["text"]);
}
}
const nounsCount = _.countBy(nouns);
const cloudData = [];
_.forEach(nounsCount, (value, key) => {
if (isAllowKey(key)) {
cloudData.push({"text": key, "weight": value});
}
})
const sliceData = cloudData.slice(0, LIMIT_WORDS_NUMBER);
const dir = __dirname + '/src/data';
if (!fs.existsSync(dir))
fs.mkdirSync(dir);
return fs.writeFile(`${dir}/nouns.json`, JSON.stringify(sliceData), (err) => {
if(err) throw err;
console.log('Nouns File write completed');
})
})
});
});