-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #120 from raumonmar1/Task-064
SpamDetector added
- Loading branch information
Showing
3 changed files
with
73 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
package acme.components; | ||
|
||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import acme.entities.systemConfiguration.SystemConfiguration; | ||
|
||
public class SpamDetector { | ||
|
||
public static Boolean isSpam(final String text, final SystemConfiguration systemConfiguration) { | ||
boolean result = false; | ||
|
||
final List<String> wordsChecking = SpamDetector.getWords(text); | ||
final Map<String,Double> enTuples = SpamDetector.getSpamWords(systemConfiguration.getSpamTuplesEn()); | ||
final Map<String,Double> esTuples = SpamDetector.getSpamWords(systemConfiguration.getSpamTuplesEs()); | ||
final Map<String,Double> spamTerms = new HashMap<>(); | ||
spamTerms.putAll(enTuples); | ||
spamTerms.putAll(esTuples); | ||
|
||
final Double spamThreshold = systemConfiguration.getSpamThreshold(); | ||
|
||
final Double spamRatio = SpamDetector.spam(wordsChecking, spamTerms); | ||
|
||
if(spamRatio >= spamThreshold) { | ||
result = true; | ||
} | ||
|
||
return result; | ||
} | ||
|
||
private static List<String> getWords(final String originalText){ | ||
return Arrays.asList(originalText.replaceAll("[.,:;/*=|()¡!¿?{}`´<>]"," ").replace("\""," ").replace("\\"," ") | ||
.trim().split("\\s+")); | ||
} | ||
|
||
private static Map<String,Double> getSpamWords(final String spamTuples){ | ||
final Map<String,Double> spamWords = new HashMap<String,Double>(); | ||
|
||
for(final String keyValue : spamTuples.split(",")) { | ||
final String[] pair = keyValue.replace("("," ").replace(")"," ").replace("'", "").trim().split(":"); | ||
spamWords.put(pair[0], Double.valueOf(pair[1])); | ||
} | ||
|
||
return spamWords; | ||
} | ||
|
||
private static Double spam(final List<String> words, final Map<String,Double> spamTerms) { | ||
Double spamWeight = 0.; | ||
Integer palabrasDobles = 0; | ||
String palabraAnterior= ""; | ||
|
||
for(final String word: words) { | ||
if(spamTerms.keySet().contains(word.toLowerCase())) { | ||
spamWeight += spamTerms.get(word.toLowerCase()); | ||
} | ||
if(spamTerms.keySet().contains(palabraAnterior.toLowerCase() + " " + word.toLowerCase())) { | ||
spamWeight += spamTerms.get(palabraAnterior.toLowerCase() + " " + word.toLowerCase()); | ||
palabrasDobles += 1; | ||
} | ||
palabraAnterior = word; | ||
} | ||
|
||
return (spamWeight/(words.size() - 1 * palabrasDobles)) * 10; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 2 additions & 2 deletions
4
src/main/webapp/WEB-INF/resources/initial-data/system-configuration.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
key,accepted-currencies,system-currency,spam-tuples,spam-threshold,money-exchange-name,money-exchange-link | ||
system-configuration-01,"EUR,USD,GBP",EUR,"(sex,0.10),(sexo,0.10),(viagra,0.10),(cialis,0.10),(hard core,0.10),(muy duro,0.10),(sexy,0.05),(nigeria,0.05),(you've won,0.05),(has ganado,0.05),(one million,0.05),(un millon,0.05)",0.10,Exchange rate API,https://exchangerate.host | ||
key,accepted-currencies,system-currency,spam-tuples-en,spam-tuples-es,spam-threshold,money-exchange-name,money-exchange-link | ||
system-configuration-01,"EUR,USD,GBP",EUR,"(sex,0.10),(viagra,0.10),(cialis,0.10),(hard core,0.10),(sexy,0.05),(nigeria,0.05),(you've won,0.05),(one million,0.05)","(sexo,0.10),(viagra,0.10),(cialis,0.10),(muy duro,0.10),(sexy,0.05),(nigeria,0.05),(has ganado,0.05),(un millon,0.05)",0.10,Exchange rate API,https://exchangerate.host |