Skip to content

Commit

Permalink
Merge pull request #11 from sergiomrebelo/modularise-nlp-utils
Browse files Browse the repository at this point in the history
Modularise nlp utils #9
  • Loading branch information
sergiomrebelo authored Jan 29, 2023
2 parents 832b5e9 + 0cb6bf6 commit 320933f
Show file tree
Hide file tree
Showing 34 changed files with 9,618 additions and 1,819 deletions.
30 changes: 18 additions & 12 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ import express from 'express';
import dotenv from 'dotenv';

import cors from 'cors';
import {setup, classification, lexicon} from "./nlp-utils/nlp_utils.mjs";
import * as NLP from "./nlp-utils/nlp_utils.mjs";
import * as CLASSIFIER from "./nlp-utils/ml-emotion-analysis/ml-emotion-analysis.mjs";
import * as LEXICON from "./nlp-utils/lexicon-emotion-analysis/lexicon-emotion-analysis.mjs";
import sentenceTokeniser from "./nlp-utils/sentence-tokeniser/sentence-tokeniser.mjs";

const APP = express();
const PORT = process.env.PORT || "8000";
Expand All @@ -15,24 +16,26 @@ APP.use(express.urlencoded({extended: true}));
APP.use(express.static('src/public'));


APP.listen(PORT, () => {
APP.listen(PORT, async () => {
await CLASSIFIER.config(process.env.LANGUAGE_TRANSLATOR_IAM_APIKEY, process.env.LANGUAGE_TRANSLATOR_URL);
await LEXICON.config(process.env.MW_API_KEY, process.env.LANGUAGE_TRANSLATOR_IAM_APIKEY, process.env.LANGUAGE_TRANSLATOR_URL);
console.info(`👂at port ${PORT}`);
setup(process.env.MW_API_KEY, process.env.LANGUAGE_TRANSLATOR_IAM_APIKEY, process.env.LANGUAGE_TRANSLATOR_URL);
});

APP.get("/lines/:delimiter/:lang/:input/", async (req, res) => {
const delimiter = req.params.delimiter;
const text = req.params.input;
const sentences = text.split(delimiter);
const lang = req.params.lang;
const results = await analysis(text, lang, sentences);
const results = await analysis(sentences, lang);
res.status(results[0]).send(JSON.stringify(results[1]));
});

APP.get("/text/:lang/:input", async (req, res) => {
const text = req.params.input;
const lang = req.params.lang;
const results = await analysis(text, lang);
const sentences = (await _sentenceTokenizer(text)).flat();
const results = await analysis(sentences, lang);
res.status(results[0]).send(JSON.stringify(results[1]));
});

Expand All @@ -48,6 +51,11 @@ const errHandler = (code, msg) => {
}
}

const _sentenceTokenizer = async (text) => {
return sentenceTokeniser(text);
}

// TODO: NLP UTILS..
const _lexiconGlobalResults = async (sentences) => {

// compute global lexicon value
Expand All @@ -69,18 +77,16 @@ const _lexiconGlobalResults = async (sentences) => {
return res.length === 0 ? [['neutral', 1]] : res;
}

const analysis = async (text, lang, sentences = []) => {
const analysis = async (sentences = [], lang) => {
const text = sentences.flat().join(' ');
// classification analysis
const classificationResults = await classification(text, lang);
const classificationResults = await CLASSIFIER.classification(text, lang);
if (!classificationResults.success) return [400, errHandler(400, `Error in the classification method`)];

// sentence tokenizer (if necessary)
if (sentences.length === 0) sentences = (await NLP.sentenceTokenizer(text)).flat();

// lexicon-based analysis
let lexiconResults = { "global": null, "sentences": [] };
for (const sentence of sentences) {
const res = await lexicon(sentence, lang, false);
const res = await LEXICON.lexicon(sentence, lang, false);
lexiconResults.sentences.push(res);
if (!res.success) return [400, errHandler(400, `Error in the lexicon-based method (msg: ${res.msg})`)];;
}
Expand Down
200 changes: 200 additions & 0 deletions src/nlp-utils/lexicon-emotion-analysis/History.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
export default class History {
constructor(txt) {
this.data = txt.split(' ').map((w,i ) => [w, i, false, [[w]]]);
this.active = this.data;
}

updateMultiple = (previous, current = []) => {
for (let tokens of previous) {
for (let token of tokens.split(' ')) {
for (const entry of this.data) {
if (entry[0] === token) {
entry[3].push(current);
if (current.length === 0) {
entry[2] = true;
}
}
}
}
}
}

update = (previous, current = "") => {
// remove space in the end if exist
let word = previous.trim();
current = current.trim();
for (const entry of this.data) {
if (entry[3][entry[3].length-1][0] === word) {
entry[3].push(current.split(" "));
if (current === "") {
entry[2] = true;
}
return true;
}
}
return false;
}

remove = (previous) => {
this.updateMultiple(previous, []);
}

getActive = () => {
return this.data.filter(entry => entry[2] === false);
}

clean = () => {
for (let entry of this.data) {
let i = entry[3].length-1;
let current = entry[3][i];
if (current.length === 0 || current === '' || current === ' ' ) {
entry[2] = true;
}
}
}

updateActivePos = (detected = [], processed, info, remove=false) => {
this.active = this.getActive();
const pos = this.pos(detected.map((v) => [v[0], v[1]]), processed);
let counter = 0, indexCounter = 0;
for (let entry of this.active) {
let current = entry[3][entry[3].length-1];
for (let j = 0; j<current.length; j++) {
if (counter === pos[indexCounter]) {
if (remove === false) {
// replace the word by another
let n = [...entry[3][this.data[entry[1]][3].length - 1]];
n[j] = info[indexCounter];
entry[3].push(n);
} else {
// remove the word
entry[3].push(['']);
entry[2] = true;
}
indexCounter++;
}
counter++;
}

}
}

updateActive = (processed) => {
this.clean();
this.active = this.getActive();
let counter = 0;
for (let j = 0; j<this.active.length; j++) {
const words = this.active[j][3][this.active[j][3].length-1];
let update = [];
for (let word of words) {
if (word !== '') {
update.push (processed[counter]);
counter++;
}
}
this.active[j][3].push(update);
}
}

updateLemmas = (pLemmas = [], pTokens = []) => {
this.active = this.getActive();
let counter = 0;
let _raw = [];

for (let i=0; i<pLemmas.length; i++) {
for (let j=0; j<pTokens[i].length; j++) {
// for saving in history purposes
// lemma, raw, position on sentences array
_raw.push([pLemmas[i][j], pTokens[i][j], [i,j]]);
}
}

let saved = [];

for (let w = 0; w<_raw.length; w++) {
let word = _raw[w];
// DEBUG
// console.log ("history", this.data[counter], "active", this.active[counter], "counter", counter);

const entry = this.active[counter][3][this.active[counter][3].length - 1];
if (entry.length === 1) {
if (word[1] === entry[0]) {
// const id = this.active[counter][1];
this.active[counter][3].push([word[0]]);
counter++;
} else if ((this.active.length-1) > (counter+1)) {
const next = counter + 1;
const nextEntry = this.active[next][3][this.active[next][3].length - 1];
if (nextEntry !== undefined && _raw[w+1] !== undefined) {
if (_raw[w+1][1] === nextEntry[0]) {
// const id = this.active[counter][1];
this.active[counter][3].push(saved);
saved = [];
counter++;
}
}
}
} else {
saved.push(word[0]);
if (saved.length === entry.length) {
this.active[counter][3].push(saved);
saved = [];
counter++;
}
}
}

}

pos = (arr = [0,0], processed) => {
const out = [];
for (let a of arr) {
let current = 0;
if (a[0] === 0) {
current = a[1];
} else if (a[0] > 0) {
for (let i=0; i<a[0]; i++) {
current += processed.sentences[i].tokens.length;
}
current += a[1];
}
out.push(current);
}
return out;
}

removeBasedOnMap = (map) => {
this.active = this.getActive();
let counter = 0;
for (let entry of this.active) {
let current = entry[3][entry[3].length-1];
let toRemove = [];
for (let word of current) {
toRemove.push(map[counter]);
counter++;
}
let arr = [];
for (let index in toRemove) {
const pos = toRemove[index];
if (pos === true) {
arr.push(current[index]);
}
}
entry[3].push(arr);
if (arr.length === 0) {
entry[2] = true;
}
}
}

log = () => {
for (let entry of this.data) {
console.log (`raw="${entry[0]}`, `deleted=${entry[2]}`, entry[3], `(length:${entry[3].length})}`);
// console.log(`entry no. ${entry[1]}: {raw="${entry[0]}" deleted=${entry[2]} history=[${entry[3]}] (length:${entry[3].length})}`);
}
}

get = () => {
return this.data;
}
}

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions src/nlp-utils/lexicon-emotion-analysis/data/emoticons.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
":)": "smile",
": )": "smile",
":-)": "smile",
"(:": "smile",
"( :": "smile",
"(-:": "smile",
":’)": "smile",
":d": "laugh",
": d": "laugh",
":-d": "laugh",
"xd": "laugh",
"x-d": "laugh",
"xd": "laugh",
"x-d": "laugh",
"<3": "love",
":*": "love",
";-)": "affection",
";)": "affection",
";-d": "affection",
";d": "affection",
"(;": "affection",
"(-;": "affection",
":-(": "sad",
": (": "sad",
":(": "sad",
"):": "sad",
")-": "sad",
":’(": "cry"
}
Loading

0 comments on commit 320933f

Please sign in to comment.