diff --git a/.gitignore b/.gitignore
index 3a7b930..7e9868f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -179,3 +179,9 @@ downloads
 processed
 untarred
 countries
+en_filtered
+
+all_text.txt
+*.json
+*.pkl
+untarred_failed.txt
diff --git a/countryfier_light.py b/countryfier_light.py
new file mode 100644
index 0000000..5fc80c7
--- /dev/null
+++ b/countryfier_light.py
@@ -0,0 +1,13 @@
+import json
+import os
+
+if __name__ == '__main__':
+    countries = {}
+    with open('all_with_sentiments.json') as f:
+        tweets = json.load(f)
+    for tweet in tweets:
+        if tweet['country_code'] not in countries:
+            countries[tweet['country_code']] = []
+        countries[tweet['country_code']].append(tweet)
+    with open('all_by_countries.json', 'w') as f:
+        json.dump(countries, f)
diff --git a/notes.ipynb b/notes.ipynb
index 5ab2073..d5f393c 100644
--- a/notes.ipynb
+++ b/notes.ipynb
@@ -20,9 +20,11 @@
     "\n",
     "45253479 characters for Feb and 637787 tweets for Feb. Meaning per message 70 characters\n",
     "\n",
-    "6250000\n",
+    "49.9 G of geo enabled data\n",
     "\n",
-    "49.9 G of geo enabled data"
+    "5554059 of tweets to analyse\n",
+    "\n",
+    "estimate 10h for wolfram to analyse all sentiments"
    ],
    "metadata": {
     "collapsed": false
diff --git a/sentiment_analyser.nb b/sentiment_analyser.nb
new file mode 100644
index 0000000..fc14075
--- /dev/null
+++ b/sentiment_analyser.nb
@@ -0,0 +1,133 @@
+(* Content-type: application/vnd.wolfram.mathematica *)
+
+(*** Wolfram Notebook File ***)
+(* http://www.wolfram.com/nb *)
+
+(* CreatedBy='WolframDesktop 12.0' *)
+
+(*CacheID: 234*)
+(* Internal cache information:
+NotebookFileLineBreakTest
+NotebookFileLineBreakTest
+NotebookDataPosition[       161,          7]
+NotebookDataLength[      4851,        125]
+NotebookOptionsPosition[      3668,         99]
+NotebookOutlinePosition[      4008,        114]
+CellTagsIndexPosition[      3965,        111]
+WindowFrame->Normal*)
+
+(* Beginning of Notebook Content *)
+Notebook[{
+Cell[BoxData[
+ RowBox[{
+  RowBox[{"SetDirectory", "[", 
+   RowBox[{"NotebookDirectory", "[", "]"}], "]"}], ";"}]], "Input",
+ CellChangeTimes->{{3.8018292554418917`*^9, 3.801829268194729*^9}},
+ CellLabel->"In[3]:=",ExpressionUUID->"7e50b431-c814-43b3-8a70-47b3f510a34f"],
+
+Cell[BoxData[
+ RowBox[{
+  RowBox[{"tweets", "=", 
+   RowBox[{"Import", "[", 
+    RowBox[{"\"\<all.json\>\"", ",", "\"\<RawJSON\>\""}], "]"}]}], 
+  ";"}]], "Input",
+ CellChangeTimes->{{3.801829228874784*^9, 3.801829254017221*^9}, {
+   3.80182939434164*^9, 3.801829401467689*^9}, 3.8018304300215607`*^9, {
+   3.801831083869722*^9, 3.801831084036481*^9}},
+ CellLabel->"In[53]:=",ExpressionUUID->"8b5b0fcf-c0e1-4988-81e7-69b45f192640"],
+
+Cell[BoxData[
+ RowBox[{
+  RowBox[{"getSentiment", ":=", 
+   RowBox[{
+    RowBox[{"Classify", "[", 
+     RowBox[{"\"\<Sentiment\>\"", ",", 
+      RowBox[{"#", "[", 
+       RowBox[{"[", "\"\<text\>\"", "]"}], "]"}], ",", 
+      "\"\<Probabilities\>\""}], "]"}], "&"}]}], ";"}]], "Input",
+ CellChangeTimes->{{3.801829403348021*^9, 3.801829424402649*^9}, {
+   3.80182948481728*^9, 3.8018295571106853`*^9}, {3.8018297704071703`*^9, 
+   3.8018297721097307`*^9}, {3.8018299109776363`*^9, 3.801829913584139*^9}, {
+   3.801829960631764*^9, 3.801829969853791*^9}, {3.801830124089261*^9, 
+   3.801830125735715*^9}, 3.8018304526045523`*^9},
+ CellLabel->"In[49]:=",ExpressionUUID->"97cdedea-0864-4034-b766-46b519d70639"],
+
+Cell[BoxData[
+ RowBox[{
+  RowBox[{"sentiments", "=", 
+   RowBox[{"getSentiment", "/@", "tweets"}]}], ";"}]], "Input",
+ CellChangeTimes->{{3.80183014636773*^9, 3.801830180606152*^9}},
+ CellLabel->"In[40]:=",ExpressionUUID->"ae7e2788-419f-4be6-9f2a-be1d0da9426b"],
+
+Cell[CellGroupData[{
+
+Cell[BoxData[
+ RowBox[{"Export", "[", 
+  RowBox[{"\"\<sentiments.json\>\"", ",", "sentiments"}], "]"}]], "Input",
+ CellChangeTimes->{{3.801830165287106*^9, 3.801830216629527*^9}, {
+  3.801830318257956*^9, 3.80183033794354*^9}},
+ CellLabel->"In[48]:=",ExpressionUUID->"0acc16de-62e8-484b-9e46-3d58283d42b4"],
+
+Cell[BoxData["\<\"sentiments.json\"\>"], "Output",
+ CellChangeTimes->{{3.8018301897362432`*^9, 3.80183021764944*^9}, {
+  3.801830325411562*^9, 3.801830338261691*^9}},
+ CellLabel->"Out[48]=",ExpressionUUID->"bfbbe786-961e-479c-a40b-22f2eeae4a3e"]
+}, Open  ]],
+
+Cell[BoxData[
+ RowBox[{"sentiments", ";"}]], "Input",
+ CellChangeTimes->{{3.8018301987179413`*^9, 3.801830200109646*^9}, {
+  3.8018302662198677`*^9, 3.801830269690198*^9}},
+ CellLabel->"In[46]:=",ExpressionUUID->"43846aa9-a21d-4f9e-91a6-2976cfa0bd45"],
+
+Cell[CellGroupData[{
+
+Cell[BoxData[
+ RowBox[{"Length", "[", "tweets", "]"}]], "Input",
+ CellChangeTimes->{{3.801831146909433*^9, 3.80183115134906*^9}},
+ CellLabel->"In[54]:=",ExpressionUUID->"625a7efa-f1f3-4427-8efb-08b980b00e07"],
+
+Cell[BoxData["5554059"], "Output",
+ CellChangeTimes->{3.801831151926058*^9},
+ CellLabel->"Out[54]=",ExpressionUUID->"30e2dc34-92e9-40d3-82d7-7c3799d2128f"]
+}, Open  ]],
+
+Cell[BoxData[""], "Input",
+ CellChangeTimes->{{3.801830310465623*^9, 3.801830313016934*^9}, 
+   3.801831144429473*^9},ExpressionUUID->"4ed7d1d3-68aa-457a-a389-\
+ed58fde6ec2f"]
+},
+WindowSize->{808, 911},
+WindowMargins->{{832, Automatic}, {Automatic, 289}},
+FrontEndVersion->"12.0 for Mac OS X x86 (64-bit) (April 11, 2019)",
+StyleDefinitions->"Default.nb"
+]
+(* End of Notebook Content *)
+
+(* Internal cache information *)
+(*CellTagsOutline
+CellTagsIndex->{}
+*)
+(*CellTagsIndex
+CellTagsIndex->{}
+*)
+(*NotebookFileOutline
+Notebook[{
+Cell[561, 20, 269, 5, 30, "Input",ExpressionUUID->"7e50b431-c814-43b3-8a70-47b3f510a34f"],
+Cell[833, 27, 431, 9, 30, "Input",ExpressionUUID->"8b5b0fcf-c0e1-4988-81e7-69b45f192640"],
+Cell[1267, 38, 707, 14, 30, "Input",ExpressionUUID->"97cdedea-0864-4034-b766-46b519d70639"],
+Cell[1977, 54, 261, 5, 30, "Input",ExpressionUUID->"ae7e2788-419f-4be6-9f2a-be1d0da9426b"],
+Cell[CellGroupData[{
+Cell[2263, 63, 306, 5, 30, "Input",ExpressionUUID->"0acc16de-62e8-484b-9e46-3d58283d42b4"],
+Cell[2572, 70, 245, 3, 34, "Output",ExpressionUUID->"bfbbe786-961e-479c-a40b-22f2eeae4a3e"]
+}, Open  ]],
+Cell[2832, 76, 251, 4, 30, "Input",ExpressionUUID->"43846aa9-a21d-4f9e-91a6-2976cfa0bd45"],
+Cell[CellGroupData[{
+Cell[3108, 84, 208, 3, 30, "Input",ExpressionUUID->"625a7efa-f1f3-4427-8efb-08b980b00e07"],
+Cell[3319, 89, 155, 2, 69, "Output",ExpressionUUID->"30e2dc34-92e9-40d3-82d7-7c3799d2128f"]
+}, Open  ]],
+Cell[3489, 94, 175, 3, 30, "Input",ExpressionUUID->"4ed7d1d3-68aa-457a-a389-ed58fde6ec2f"]
+}
+]
+*)
+
diff --git a/sentiment_analyser.wls b/sentiment_analyser.wls
new file mode 100644
index 0000000..f1e8922
--- /dev/null
+++ b/sentiment_analyser.wls
@@ -0,0 +1,5 @@
+#!/usr/bin/env wolframscript
+tweets = Import["all.json", "RawJSON"];
+getSentiment := Classify["Sentiment", #[["text"]], "Probabilities"] &;
+sentiments = getSentiment /@ tweets;
+Export["sentiments.json", sentiments];
diff --git a/sentiment_threader.py b/sentiment_threader.py
new file mode 100644
index 0000000..947883e
--- /dev/null
+++ b/sentiment_threader.py
@@ -0,0 +1,12 @@
+import json
+import progressbar
+
+if __name__ == '__main__':
+    with open('all.json') as f:
+        tweets = json.load(f)
+    with open('sentiments.json') as f:
+        sentiments = json.load(f)
+    for i in progressbar.progressbar(range(len(tweets))):
+        tweets[i]['sentiment'] = sentiments[i]
+    with open('all_with_sentiments.json', 'w') as f:
+        json.dump(tweets, f)
diff --git a/statistics_finder.nb b/statistics_finder.nb
new file mode 100644
index 0000000..7e4a401
--- /dev/null
+++ b/statistics_finder.nb
@@ -0,0 +1,43 @@
+(* Content-type: application/vnd.wolfram.mathematica *)
+
+(*** Wolfram Notebook File ***)
+(* http://www.wolfram.com/nb *)
+
+(* CreatedBy='WolframDesktop 12.0' *)
+
+(*CacheID: 234*)
+(* Internal cache information:
+NotebookFileLineBreakTest
+NotebookFileLineBreakTest
+NotebookDataPosition[       161,          7]
+NotebookDataLength[       760,         33]
+NotebookOptionsPosition[       564,         21]
+NotebookOutlinePosition[       903,         36]
+CellTagsIndexPosition[       860,         33]
+WindowFrame->Normal*)
+
+(* Beginning of Notebook Content *)
+Notebook[{
+},
+WindowSize->{808, 911},
+WindowMargins->{{Automatic, 866}, {Automatic, 23}},
+FrontEndVersion->"12.0 for Mac OS X x86 (64-bit) (April 11, 2019)",
+StyleDefinitions->"Default.nb"
+]
+(* End of Notebook Content *)
+
+(* Internal cache information *)
+(*CellTagsOutline
+CellTagsIndex->{}
+*)
+(*CellTagsIndex
+CellTagsIndex->{}
+*)
+(*NotebookFileOutline
+Notebook[{
+}
+]
+*)
+
+(* End of internal cache information *)
+
diff --git a/text_filter.py b/text_filter.py
new file mode 100644
index 0000000..a7a4763
--- /dev/null
+++ b/text_filter.py
@@ -0,0 +1,27 @@
+import re
+import json
+import progressbar
+
+def filter_text(s):
+    # delete all the emoji (and any non ascii characters)
+    s = re.sub(r'[^\x00-\x7F]', '', s)
+    # remove all the @
+    s = re.sub(r'@[^\s]+', '', s)
+    # remove all the #
+    s = re.sub(r'#[^\s]+', '', s)
+    # remove all the links
+    s = re.sub(r'https://[^s]+', '', s)
+    s = re.sub(r'http://[^s]+', '', s)
+    return s
+
+
+if __name__ == '__main__':
+
+    with open('all.json') as f:
+        tweets = json.load(f)
+    with progressbar.ProgressBar(max_value=len(tweets)) as bar:
+        for i, tweet in enumerate(tweets):
+            tweet['text'] = filter_text(tweet['text'])
+            bar.update(i)
+    with open('all_text_filtered.json', 'w') as f:
+        json.dump(tweets, f)