Skip to content

Commit

Permalink
Merge pull request #1 from FernanOrtega/dev
Browse files Browse the repository at this point in the history
First beta version
  • Loading branch information
FernanOrtega authored Mar 12, 2020
2 parents 9aa7182 + d45ff7f commit 87d98d3
Show file tree
Hide file tree
Showing 9 changed files with 3,112 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,6 @@ dmypy.json

# Pyre type checker
.pyre/

# PyCharm
.idea
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
SpaCy==2.2.3
Flask==1.1.1
gunicorn==20.0.4
numpy==1.18.1
snowballstemmer==2.0.0
151 changes: 151 additions & 0 deletions sentimentanalysis/analizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from typing import Dict

import snowballstemmer
import spacy

from spacy.tokens.doc import Doc
from spacy.tokens.span import Span
from spacy.tokens.token import Token
from itertools import groupby

from sentimentanalysis.dataloader import load_dict


class SentimentAnalysis(object):
def __init__(self, language="es"):
self.__nlp = spacy.load("es_core_news_md")
stemmer = StemmerPipe(language)
annotator = SentimentAnnotatorPipe(language)

self.__nlp.add_pipe(stemmer)
self.__nlp.add_pipe(annotator)

def compute_sentiment(self, text: str, language="es") -> Dict:
result = {}
doc = self.__nlp(text)

result["per_sentence_sentiment"] = self.__compute_per_sentence_sentiment(doc)
result["global_sentiment"] = self.__compute_global_sentiment(doc)

return result

def __compute_per_sentence_sentiment(self, doc: Doc) -> Dict:

result = {}

for i, sent in enumerate(doc.sents):
max_score = 0.0
min_score = 0.0

for token in sent:
score = token._.sentiment_weight * token._.negation_weight
if score > 0:
score = max(1.0, score + token._.booster_weight)
if score > max_score:
max_score = max
elif score < 0:
score = min(-1.0, score - token._.booster_weight)
if score < min_score:
min_score = score

sentence_score = max_score + min_score
sent._.sentiment_weight = sentence_score
result[str(sent)] = sentence_score

return result

def __compute_global_sentiment(self, doc: Doc) -> float:

max_score = 0.0
min_score = 0.0

for sent in doc.sents:
if sent._.sentiment_weight > max_score:
max_score = sent._.sentiment_weight
elif sent._.sentiment_weight < min_score:
min_score = sent._.sentiment_weight

return max_score + min_score


class SentimentAnnotatorPipe(object):
def __init__(self, language: str = "es"):
self.__sentiment_words = load_dict(language, "sentiment_words.csv")
self.__boosters = load_dict(language, "boosters.csv")
self.__negations = load_dict(language, "negations.csv")
Span.set_extension("sentiment_weight", default=0.0, force=True)
Token.set_extension("sentiment_weight", default=0.0, force=True)
Token.set_extension("negation_weight", default=1.0, force=True)
Token.set_extension("booster_weight", default=0.0, force=True)

def __call__(self, doc: Doc) -> Doc:
self.__annotate_sentiment_words(doc)
self.__annotate_negations_and_boosters(doc)

return doc

def __annotate_sentiment_words(self, doc: Doc) -> None:
for token in doc:
if token.pos_ == "ADJ" and not token.is_stop:
sentiment_weight = self.__sentiment_words.get(token._.stem, 0.0)
if sentiment_weight != 0.0:
token._.booster_weight = self.__get_self_boosters(token)
token._.sentiment_weight = sentiment_weight

def __annotate_negations_and_boosters(self, doc: Doc) -> None:
for sentence in doc.sents:
for i, token in enumerate(sentence):
if token in self.__negations:
influenced_token = self.__get_influenced_token(sentence, i)
if influenced_token:
influenced_token._.negation_weight = (
self.__negations.get(token) * -1
)
elif token in self.__boosters:
influenced_token = self.__get_influenced_token(sentence, i)
if influenced_token:
influenced_token._.booster_weight += self.__boosters.get(token)

def __get_influenced_token(self, sentence: Span, influencer_index: int) -> Token:

result = None
for i in range(1, len(sentence)):
for j in [-1, 1]:
candidate_index = influencer_index + i * j
if 0 <= candidate_index < len(sentence):
candidate = sentence[candidate_index]

if candidate._.sentiment_weight != 0.0:
result = candidate
break

return result

def __get_self_boosters(self, token: Token) -> float:

return (
1.0
if (token.shape_.count("X") / len(token)) > 0.8
or self.__max_rep_letters(token) >= 3
else 0.0
)

def __max_rep_letters(self, token: Token) -> int:

return sorted(
[(letter, len(list(group))) for letter, group in groupby(token.lower_)],
key=lambda i: i[1],
reverse=True,
)[0][1]


class StemmerPipe(object):
def __init__(self, language="es"):
self.__stemmer = snowballstemmer.stemmer("spanish")
Token.set_extension("stem", default="")

def __call__(self, doc: Doc) -> Doc:
for token in doc:
token._.stem = self.__stemmer.stemWord(token.lemma_)

return doc
65 changes: 65 additions & 0 deletions sentimentanalysis/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import json
from typing import Union

from flask import Flask, request, Response
from werkzeug.exceptions import HTTPException, InternalServerError

from sentimentanalysis.analizer import SentimentAnalysis


def get_response(body: Union[str, dict], status: int = 200):
json_body = {("result" if status == 200 else "error"): body}

return Response(json.dumps(json_body), status, mimetype="application/json")


app = Flask(__name__)
sent_analysis = SentimentAnalysis()


@app.errorhandler(Exception)
def handle_exception(e):
# pass through HTTP errors
if isinstance(e, HTTPException):
return e

# now you're handling non-HTTP exceptions only
return get_response(f"500. HTTP Exception. Exception: {e}", 500)


@app.errorhandler(InternalServerError)
def handle_500(e):
original = getattr(e, "original_exception", None)

if original is None:
return get_response("500. Unhandled Internal Server Error", 500)

# wrapped unhandled error
return get_response(f"500. Handled Internal Server Error: {original}", 500)


@app.route("/sentimentanalysis", methods=["POST"])
def get_sentiment_analysis():
if not request.is_json:
message = "Incorrect mimetype, must be 'application/json'."
status_code = 415
else:
request_body = request.get_json()
if "text" not in request_body:
message = "'text' attribute not present in request body"
status_code = 422
else:
text = request_body["text"]
status_code = 200
message = sent_analysis.compute_sentiment(text)

return get_response(message, status_code, message_as_json=True)


@app.route("/")
def root():
return "Everything is working fine"


if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)
21 changes: 21 additions & 0 deletions sentimentanalysis/dataloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import csv
import os
from typing import Dict, List

BASE_URL = f"{os.path.dirname(os.path.realpath(__file__))}/resources"


def load_dict(language: str, file_name: str) -> Dict:

with open(os.path.join(BASE_URL, language, file_name)) as csvfile:
csv_reader = csv.reader(csvfile)
result = {row[0]: float(row[1]) for row in csv_reader}

return result


def load_string_file(language: str, file_name: str) -> List:
with open(os.path.join(BASE_URL, language, file_name)) as str_file:
result = [line.replace("\n", "") for line in str_file]

return result
6 changes: 6 additions & 0 deletions sentimentanalysis/resources/es/boosters.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
muy,1.0
much,1.0
bastant,1.0
demasi,1.0
mas,1.0
gran,1.0
8 changes: 8 additions & 0 deletions sentimentanalysis/resources/es/negations.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
nunc,1.0
jamas,1.0
no,1.0
poc,1.0
sin,1.0
sin embarg,1.0
nad de,1.0
no signif que,1.0
Loading

0 comments on commit 87d98d3

Please sign in to comment.