From 7ee1527804fb526684a61f29bc56280032a68213 Mon Sep 17 00:00:00 2001
From: "B.J. Rossiter" <b.j.rossiter@gmail.com>
Date: Wed, 16 Oct 2013 12:34:17 +1100
Subject: [PATCH] Add a flatten option

---
 README.md | 11 +++++++++++
 index.js  | 19 +++++++++++++++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 9742ea9..6310840 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,17 @@ keyword.extract('beep beep and foo bar and beep beep and beep beep and foo bar',
 
 Returns `['beep beep']`.
 
+#### Option: flatten
+
+Returns all occurrences of the ngram. Useful for passing data to Natural's
+TF-IDF function. Note: the original order is not maintained. Off by default.
+
+```js
+keyword.extract('beep beep and foo bar and beep beep and beep beep and foo bar', {flaten: true})
+```
+
+Returns `['beep beep', 'beep beep', 'beep beep', 'foo bar', 'foo bar']`.
+
 #### Option: html
 
 Extracts the keywords from html text elements. The default is false.
diff --git a/index.js b/index.js
index b50407f..e00034c 100644
--- a/index.js
+++ b/index.js
@@ -94,8 +94,23 @@ exports.extract = function(text, options){
     });
   }
 
-  // Return results with scores or without depending on options
-  combined =  options.score ? combined : _.pluck(combined, 'term');
+  if (options.flatten){
+    // Flatten the results so that there is a list item for every occurence of
+    // the term
+    combined = _.flatten(
+      _.map(combined, function(result){
+        var flattened = [];
+        for (var i=0; i < result.tf; i++){
+          flattened.push(result.term);
+        }
+        return flattened;
+      })
+    );
+  }else{
+    // Return results with scores or without depending on options
+    combined =  options.score ? combined : _.pluck(combined, 'term');
+  }
+
   
   // Limit the results
   if (options.limit){