swm-fp · nuguziii · Nov 12, 2019 · Nov 12, 2019 · Nov 6, 2019 · Nov 6, 2019
diff --git a/NLP/src/app.py b/NLP/src/app.py
@@ -3,13 +3,22 @@
 # flask REST API code
 #===========================
 from flask import Flask
-from flask_cors import CORS, cross_origin
+from flask_cors import CORS
 from flask import request
 from flask import json
 from flask import Response
-import csv
+import sentry_sdk
+from sentry_sdk.integrations.flask import FlaskIntegration
 
-from utils import keyword_extractor
+from body import *
+from getTagList import *
+from nounExtractor import *
+from preprocessing import *
+
+sentry_sdk.init(
+    dsn="https://[email protected]/1810010",
+    integrations=[FlaskIntegration()]
+)
 
 app = Flask(__name__)
 cors = CORS(app)
@@ -23,24 +32,23 @@ def get():
         '''
         arrayList = 
         {
-            title : ""r
+            title : ""
             url : ""
-            memo : ["", "", ...]
-            highlight : ["", "", ...]
         }
         '''
         result = {}
-        result["title"] = data["title"]
-        result["url"] = data["url"]
-        #result["keywords"], result["result"] = keyword_extractor(data["title"], "")
+        title = data["title"]
+        url = data["url"]
+
+        title_nouns, body_nouns = nouns_extractor(title, url, nlpapi_num=3)
+        TF = TF_score(title_nouns, body_nouns)
+        tags = Total_score(TF, seletAllTagsFromDB(), alpha=1, beta=1, test=False)
 
-        res = json.dumps(result, ensure_ascii=False).encode('utf8')
+        result["version"] = "1.0"
+        result["tags"] = tags
 
-        with open('./data.csv', 'a', encoding='utf-8') as csvfile:
-            fieldnames = ['title', 'url', 'keywords', 'result']
-            wr = csv.DictWriter(csvfile, fieldnames=fieldnames)
-            wr.writeheader()
-            #wr.writerow(result)
+        res = json.dumps(result, ensure_ascii=False).encode('utf8')
+        print(res)
 
         return Response(res, content_type='application/json; charset=utf-8')
 

diff --git a/NLP/src/body.py b/NLP/src/body.py
@@ -15,6 +15,7 @@ def spider(url):
         return ''.join(data)
     except Exception as ex:
         print("::CRAWLING ERROR::\n",ex)
+        return ""
 
 '''
 urls = []

diff --git a/NLP/src/getTagList.py b/NLP/src/getTagList.py
@@ -8,12 +8,9 @@
 def TF_score(title, body):
 
     words = {}
-    try:
-        for w, c in Counter(body).most_common(1000):
-            if notNoStopWords(w):
-                words[w] = c
-    except Exception as e:
-        print("::body nouns ERROR::\n", e)
+    for w, c in Counter(body).most_common(1000):
+        if notNoStopWords(w):
+            words[w] = c
 
     for w, c in Counter(title).most_common(10):
         if notNoStopWords(w):

diff --git a/NLP/src/nounExtractor.py b/NLP/src/nounExtractor.py
@@ -16,7 +16,7 @@ def getEngNouns(sen):
 
 def nlp_apis(api_num, sen):
     sen = clean_sentence(sen)
-    api_dict = {1:Kkma(), 2:Twitter(), 3:Hannanum(), 4:Komoran(), 5:Mecab('/usr/local/lib/mecab/dic/mecab-ko-dic')}
+    api_dict = {1:Kkma(), 2:Twitter(), 3:Hannanum(), 4:Komoran(), 5:Mecab()}
 
     module = api_dict[api_num]
 
@@ -26,10 +26,6 @@ def nlp_apis(api_num, sen):
 
 def nouns_extractor(title, url, nlpapi_num=5):
     title_nouns, _ = nlp_apis(nlpapi_num, title)
-    try:
-        body_nouns, _ = nlp_apis(nlpapi_num, spider(url))
-
-    except Exception as e:
-        print("::body nouns ERROR::\n", e)
+    body_nouns, _ = nlp_apis(nlpapi_num, spider(url))
 
     return title_nouns, body_nouns
diff --git a/NLP/src/receiveMessageFromSQS.py b/NLP/src/receiveMessageFromSQS.py
@@ -1,30 +1,58 @@
 import boto3
+from boto3.dynamodb.conditions import Key, Attr
+from ast import literal_eval
+
+from body import *
+from getTagList import *
+from nounExtractor import *
+from preprocessing import *
 
 # Create SQS client
 sqs = boto3.client('sqs', region_name='ap-northeast-2')
 
 queue_url = 'https://sqs.ap-northeast-2.amazonaws.com/616448378550/newQueue'
+dynamodb = boto3.resource('dynamodb', region_name='ap-northeast-2')
+table = dynamodb.Table('UrlTag')
+
+while 1:
+    # Receive message from SQS queue
+    response = sqs.receive_message(
+        QueueUrl=queue_url,
+        AttributeNames=[
+            'SentTimestamp'
+        ],
+        MaxNumberOfMessages=1,
+        MessageAttributeNames=[
+            'All'
+        ],
+        VisibilityTimeout=0,
+        WaitTimeSeconds=0
+    )
+
+    try:
+        message = response['Messages'][0]
+        receipt_handle = message['ReceiptHandle']
+        body = literal_eval(message["Body"])
+
+        url = body["url"]
+        title = body["title"]
+
+        title_nouns, body_nouns = nouns_extractor(title, url)
+        TF = TF_score(title_nouns, body_nouns)
+        tags = Total_score(TF, seletAllTagsFromDB(), alpha=1, beta=1, test=False)
+
+        response = table.put_item(
+            Item={
+                    'url': url,
+                    'tags': tags
+                }
+        )
 
-# Receive message from SQS queue
-response = sqs.receive_message(
-    QueueUrl=queue_url,
-    AttributeNames=[
-        'SentTimestamp'
-    ],
-    MaxNumberOfMessages=1,
-    MessageAttributeNames=[
-        'All'
-    ],
-    VisibilityTimeout=0,
-    WaitTimeSeconds=0
-)
-
-message = response['Messages'][0]
-receipt_handle = message['ReceiptHandle']
-
-# Delete received message from queue
-sqs.delete_message(
-    QueueUrl=queue_url,
-    ReceiptHandle=receipt_handle
-)
-print('Received and deleted message: %s' % message)
+        # Delete received message from queue
+        sqs.delete_message(
+            QueueUrl=queue_url,
+            ReceiptHandle=receipt_handle
+        )
+        print('Received and deleted message: %s' % body["url"])
+    except:
+        pass
diff --git a/NLP/src/requirements.txt b/NLP/src/requirements.txt
@@ -0,0 +1,27 @@
+beautifulsoup4==4.8.1
+blinker==1.4
+boto3==1.10.14
+botocore==1.13.14
+bs4==0.0.1
+certifi==2019.9.11
+Click==7.0
+docutils==0.15.2
+Flask==1.1.1
+Flask-Cors==3.0.8
+itsdangerous==1.1.0
+Jinja2==2.10.3
+jmespath==0.9.4
+JPype1==0.7.0
+konlpy==0.5.1
+lxml==4.4.1
+MarkupSafe==1.1.1
+mecab-python===0.996-ko-0.9.2
+nltk==3.4.5
+python-dateutil==2.8.0
+s3transfer==0.2.1
+sentry-sdk==0.13.2
+six==1.13.0
+soupsieve==1.9.5
+textblob==0.15.3
+urllib3==1.25.6
+Werkzeug==0.16.0