From 7ee1527804fb526684a61f29bc56280032a68213 Mon Sep 17 00:00:00 2001 From: "B.J. Rossiter" Date: Wed, 16 Oct 2013 12:34:17 +1100 Subject: [PATCH] Add a flatten option --- README.md | 11 +++++++++++ index.js | 19 +++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9742ea9..6310840 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,17 @@ keyword.extract('beep beep and foo bar and beep beep and beep beep and foo bar', Returns `['beep beep']`. +#### Option: flatten + +Returns all occurrences of the ngram. Useful for passing data to Natural's +TF-IDF function. Note: the original order is not maintained. Off by default. + +```js +keyword.extract('beep beep and foo bar and beep beep and beep beep and foo bar', {flaten: true}) +``` + +Returns `['beep beep', 'beep beep', 'beep beep', 'foo bar', 'foo bar']`. + #### Option: html Extracts the keywords from html text elements. The default is false. diff --git a/index.js b/index.js index b50407f..e00034c 100644 --- a/index.js +++ b/index.js @@ -94,8 +94,23 @@ exports.extract = function(text, options){ }); } - // Return results with scores or without depending on options - combined = options.score ? combined : _.pluck(combined, 'term'); + if (options.flatten){ + // Flatten the results so that there is a list item for every occurence of + // the term + combined = _.flatten( + _.map(combined, function(result){ + var flattened = []; + for (var i=0; i < result.tf; i++){ + flattened.push(result.term); + } + return flattened; + }) + ); + }else{ + // Return results with scores or without depending on options + combined = options.score ? combined : _.pluck(combined, 'term'); + } + // Limit the results if (options.limit){