-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkeywords.js
89 lines (74 loc) · 2.89 KB
/
keywords.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
const axios = require('axios');
const qs = require('qs');
//Function to call the keyword extraction API, returns the keywords as a string
async function extractKeywords(text) {
let data = qs.stringify({
'extractors': 'entities,nounPhrases',
'text': text
});
let config = {
method: 'post',
maxBodyLength: Infinity,
url: 'https://api.textrazor.com/',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-TextRazor-Key': process.env['TEXTRAZOR_API_KEY']
},
data: data
};
//Capture the request in a try/catch to check for any errors
try {
const response = await axios.request(config);
//console.log("Full Response: ", JSON.stringify(response.data, null, 2));
//Log the full response for debugging
let visualKeywords = [];
//Function to check if a word is a valid noun or adjective (visually descriptive)
function isValidVisualWord(wordObj) {
const validPartsOfSpeech = ['NN', 'NNS', 'NNP', 'JJ'];
//NN: noun
//NNP: proper noun
//JJ: adjective
return validPartsOfSpeech.includes(wordObj.partOfSpeech) && wordObj.token.trim().length > 0;
}
//Function to exclude some words from the keywords
function isImportantVisualPhrase(phrase) {
const lowerPhrase = phrase.toLowerCase();
const commonWords = ['i', 'we', 'and', 'but', 'the', 'a', 'to', 'my', 'in', 'at', 'on'];
//Exclude these common words from the phrase
//Check if the phrase is not a common word and is longer than 2 characters
return lowerPhrase.length > 2 && !commonWords.includes(lowerPhrase);
}
//Extract entities from the response
if (response.data && response.data.response && response.data.response.entities) {
response.data.response.entities.forEach(entity => {
if (entity.matchedText && isImportantVisualPhrase(entity.matchedText)) {
visualKeywords.push(entity.matchedText);
}
});
}
//Loop through all sentences in the response to extract additional descriptive words
if (response.data.response.sentences) {
response.data.response.sentences.forEach(sentence => {
const words = sentence.words;
const wordsInSentence = words
.filter(word => isValidVisualWord(word))
.map(word => word.token.trim());
wordsInSentence.forEach(word => {
if (isImportantVisualPhrase(word)) {
visualKeywords.push(word);
}
});
});
}
//Remove duplicate keywords and format them as a clean string
let uniqueVisualKeywords = [...new Set(visualKeywords)];
//Log the extracted visual keywords to the console for verification
console.log("Extracted Visual Keywords: ", uniqueVisualKeywords);
return uniqueVisualKeywords;
} catch (err) {
console.log("Error occurred: ", err);
return [];
}
}
//Export the extractKeywords function
module.exports = extractKeywords;