Skip to content

Commit

Permalink
fix(corpus): return object instead of array, make second argument opt…
Browse files Browse the repository at this point in the history
…ional
  • Loading branch information
ajmacdonald committed Jan 16, 2025
1 parent 436a8b2 commit 75c3495
Show file tree
Hide file tree
Showing 4 changed files with 361 additions and 174 deletions.
33 changes: 27 additions & 6 deletions src/corpus.js
Original file line number Diff line number Diff line change
Expand Up @@ -1328,20 +1328,41 @@ class Corpus {
}

/**
* Returns a list of corpus terms, filtered by the provided category.
* Given a Categories instance or ID, returns an object mapping category names to corpus terms. The results can be limited to specific category names by providing one or more of them.
* @param {String|Spyral.Categories} categories A categories ID or a Spyral.Categories instance.
* @param {String} categoryName The name of the category within the instance.
* @returns {Promise<Array>}
* @param {String|Array<String>} [categoryName] One or more names of categories within the instance.
* @returns {Promise<Object>}
*/
async filterByCategory(categories, categoryName) {
if (categories === undefined) return;
if (categoryName === undefined) return;

if (categories instanceof Categories === false) {
categories = await Categories.load(categories);
}
const catTerms = categories.getCategoryTerms(categoryName);
return this.terms({whiteList: catTerms});

let categoryNames = [];

if (categoryName === undefined) {
categoryNames = categories.getCategoryNames();
} else if (Util.isString(categoryName)) {
categoryNames = [categoryName];
} else {
categoryNames = categoryName;
}

const termsResults = await Promise.all(
categoryNames.map(key => {
let catTerms = categories.getCategoryTerms(key);
return this.terms({whiteList: catTerms});
})
);

let results = {};
termsResults.forEach((terms, i) => {
results[categoryNames[i]] = terms;
});

return results;
}

/**
Expand Down
10 changes: 8 additions & 2 deletions test/corpus.js
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,15 @@ test('filterByCategory', async () => {
const corpus = await Corpus.load(corpusId);
fetch.once(JSON.stringify(MocksCategories.Categories));
const categories = await Categories.load('categories.en.txt');
fetch.once(JSON.stringify(Mocks.TermsWhiteList));
fetch.once(JSON.stringify(MocksCategories.TermsWhiteListPositive));
const data = await corpus.filterByCategory(categories, 'positive');
expect(data.length).toBe(22);
expect(data.positive.length).toBe(22);
fetch
.once(JSON.stringify(MocksCategories.Categories))
.once(JSON.stringify(MocksCategories.TermsWhiteListPositive))
.once(JSON.stringify(MocksCategories.TermsWhiteListNegative));
const data2 = await corpus.filterByCategory('categories.en.txt');
expect(data2.negative.length).toBe(21);
})

test('tool', () => {
Expand Down
326 changes: 326 additions & 0 deletions test/mocks/categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,329 @@ export const Categories = {
}`
}
}


export const TermsWhiteListPositive = {
"version": "5.7",
"voyantVersion": "2025-01-14",
"voyantBuild": "",
"duration": 3,
"corpusTerms": {
"total": 22,
"terms": [
{
"term": "good",
"inDocumentsCount": 8,
"rawFreq": 1444,
"relativeFreq": 0.0018471071,
"comparisonRelativeFreqDifference": null
},
{
"term": "hope",
"inDocumentsCount": 8,
"rawFreq": 649,
"relativeFreq": 8.301749E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "happy",
"inDocumentsCount": 8,
"rawFreq": 562,
"relativeFreq": 7.1888795E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "happiness",
"inDocumentsCount": 8,
"rawFreq": 396,
"relativeFreq": 5.065474E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "advantage",
"inDocumentsCount": 7,
"rawFreq": 166,
"relativeFreq": 2.1234057E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "praise",
"inDocumentsCount": 8,
"rawFreq": 131,
"relativeFreq": 1.6756996E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "excellent",
"inDocumentsCount": 8,
"rawFreq": 129,
"relativeFreq": 1.6501165E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "superior",
"inDocumentsCount": 8,
"rawFreq": 128,
"relativeFreq": 1.6373249E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "confidence",
"inDocumentsCount": 8,
"rawFreq": 103,
"relativeFreq": 1.3175349E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "safe",
"inDocumentsCount": 7,
"rawFreq": 80,
"relativeFreq": 1.0233281E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "success",
"inDocumentsCount": 8,
"rawFreq": 64,
"relativeFreq": 8.1866245E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "enjoy",
"inDocumentsCount": 8,
"rawFreq": 57,
"relativeFreq": 7.2912124E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "wonderful",
"inDocumentsCount": 8,
"rawFreq": 48,
"relativeFreq": 6.139968E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "joyful",
"inDocumentsCount": 6,
"rawFreq": 19,
"relativeFreq": 2.4304041E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "fun",
"inDocumentsCount": 3,
"rawFreq": 15,
"relativeFreq": 1.91874E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "amazing",
"inDocumentsCount": 5,
"rawFreq": 14,
"relativeFreq": 1.790824E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "freedom",
"inDocumentsCount": 6,
"rawFreq": 13,
"relativeFreq": 1.6629081E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "enthusiasm",
"inDocumentsCount": 3,
"rawFreq": 9,
"relativeFreq": 1.151244E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "magnificent",
"inDocumentsCount": 3,
"rawFreq": 8,
"relativeFreq": 1.0233281E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "win",
"inDocumentsCount": 4,
"rawFreq": 4,
"relativeFreq": 5.1166403E-6,
"comparisonRelativeFreqDifference": null
},
{
"term": "bliss",
"inDocumentsCount": 2,
"rawFreq": 2,
"relativeFreq": 2.5583201E-6,
"comparisonRelativeFreqDifference": null
},
{
"term": "optimistic",
"inDocumentsCount": 0,
"rawFreq": 0,
"relativeFreq": 0.0,
"comparisonRelativeFreqDifference": null
}
]
}
}

export const TermsWhiteListNegative = {
"version": "5.7",
"voyantVersion": "2.6.0",
"voyantBuild": "",
"duration": 7,
"corpusTerms": {
"total": 21,
"terms": [
{
"term": "bad",
"inDocumentsCount": 8,
"rawFreq": 182,
"relativeFreq": 2.3280713E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "concern",
"inDocumentsCount": 8,
"rawFreq": 116,
"relativeFreq": 1.4838256E-4,
"comparisonRelativeFreqDifference": null
},
{
"term": "suffering",
"inDocumentsCount": 8,
"rawFreq": 76,
"relativeFreq": 9.7216165E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "suffer",
"inDocumentsCount": 8,
"rawFreq": 67,
"relativeFreq": 8.570372E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "sad",
"inDocumentsCount": 8,
"rawFreq": 63,
"relativeFreq": 8.058708E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "inferior",
"inDocumentsCount": 8,
"rawFreq": 51,
"relativeFreq": 6.523716E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "horror",
"inDocumentsCount": 7,
"rawFreq": 38,
"relativeFreq": 4.8608083E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "fail",
"inDocumentsCount": 7,
"rawFreq": 32,
"relativeFreq": 4.0933122E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "despair",
"inDocumentsCount": 8,
"rawFreq": 30,
"relativeFreq": 3.83748E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "hesitation",
"inDocumentsCount": 6,
"rawFreq": 24,
"relativeFreq": 3.069984E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "desperate",
"inDocumentsCount": 6,
"rawFreq": 17,
"relativeFreq": 2.174572E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "disadvantage",
"inDocumentsCount": 7,
"rawFreq": 16,
"relativeFreq": 2.0466561E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "terrible",
"inDocumentsCount": 3,
"rawFreq": 15,
"relativeFreq": 1.91874E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "forbidden",
"inDocumentsCount": 4,
"rawFreq": 11,
"relativeFreq": 1.4070761E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "destroy",
"inDocumentsCount": 6,
"rawFreq": 10,
"relativeFreq": 1.2791601E-5,
"comparisonRelativeFreqDifference": null
},
{
"term": "sadness",
"inDocumentsCount": 3,
"rawFreq": 6,
"relativeFreq": 7.67496E-6,
"comparisonRelativeFreqDifference": null
},
{
"term": "failure",
"inDocumentsCount": 5,
"rawFreq": 6,
"relativeFreq": 7.67496E-6,
"comparisonRelativeFreqDifference": null
},
{
"term": "depression",
"inDocumentsCount": 3,
"rawFreq": 4,
"relativeFreq": 5.1166403E-6,
"comparisonRelativeFreqDifference": null
},
{
"term": "disaster",
"inDocumentsCount": 1,
"rawFreq": 1,
"relativeFreq": 1.2791601E-6,
"comparisonRelativeFreqDifference": null
},
{
"term": "pessimistic",
"inDocumentsCount": 0,
"rawFreq": 0,
"relativeFreq": 0.0,
"comparisonRelativeFreqDifference": null
},
{
"term": "criticize",
"inDocumentsCount": 0,
"rawFreq": 0,
"relativeFreq": 0.0,
"comparisonRelativeFreqDifference": null
}
]
}
}
Loading

0 comments on commit 75c3495

Please sign in to comment.