merging pandas release to main branch

Bookworm-project · Oct 3, 2014 · 0e6e93e · 0e6e93e
2 parents 536176a + b4629a9
commit 0e6e93e
Show file tree

Hide file tree

Showing 14 changed files with 2,581 additions and 232 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,5 +7,5 @@ APIkeys
 *.cgi
 migration.py
 shipping.py
-bookworm
 genderizer*
+*.pyc
diff --git a/README.md b/README.md
@@ -6,21 +6,17 @@ They are used with the [Bookworm GUI](https://github.com/econpy/BookwormGUI) and
 For a more interactive explanation of how the GUI works, see the [D3 bookworm browser](http://github.com/bmschmidt/Presidio)
 
 ### General Description
-`dbbindings.py` is called as a CGI script and uses `APIimplementation.py` to construct a database query and return the data to the web client.
+`dbbindings.py` is called as a CGI script and uses a number of modules in `bookworm` to construct a database query and return the data to the web client.
 
 
 ### Installation
 
-On Ubuntu, you can simply run the `Makefile` to install dependencies for the API software.
-
-```bash
-sudo make ubuntu-install
-```
-
-However, you may wish to open the Makefile and see what is going on for yourself. The idea is to move the 3 python scripts to your cgi-bin, make them executable, and make the Apache user the owner of the files.
+Just put all the files in this repo into your core CGI-script folder. Hopefully we'll streamline a little bit soon.
 
 ### Usage
 
 If the bookworm is located on your server, there is no need to do anything--it should be drag-and-drop. (Although on anything but debian, settings might require a small amount of tweaking.
-If you want to have the webserver and database server on different machines, that needs to be specified in the knownhosts.py.
+If you want to have the webserver and database server on different machines, that needs to be specified in the configuration file for mysql that this reads: if you want to have multiple mysql servers, you may need to get fancy.
+.
 This tells the API where to look for the data for a particular bookworm. The benefit of this setup is that you can have your webserver on one server and the database on another server.
+
diff --git a/bookworm/#APIimplementation.py# b/bookworm/#APIimplementation.py#
diff --git a/bookworm/.gitignore b/bookworm/.gitignore
@@ -0,0 +1,5 @@
+old/*
+*~
+APIkeys
+#*
+.#*
diff --git a/bookworm/APIimplementation.py b/bookworm/APIimplementation.py
diff --git a/bookworm/MetaWorm.py b/bookworm/MetaWorm.py
@@ -0,0 +1,121 @@
+import pandas
+import json
+import copy
+import threading
+import time
+from collections import defaultdict
+
+def hostlist(dblist):
+    #This could do something fancier, but for now we look by default only on localhost.
+    return ["localhost"]*len(dblist)
+
+class childQuery(threading.Thread):
+    def __init__(self,dictJSON,host):
+        super(SummingThread, self).__init__()
+        self.dict = json.dumps(dict)
+        self.host = host
+
+    def runQuery(self):
+        #make a webquery, assign it to self.data
+        url = self.host + "/cgi-bin/bookwormAPI?query=" + self.dict
+
+    def parseResults(self):
+        pass
+        #return json.loads(self.data)
+
+    def run(self):
+        self.runQuery()
+
+def flatten(dictOfdicts):
+    """
+    Recursive function: transforms a dict with nested entries like
+    foo["a"]["b"]["c"] = 3
+    to one with tuple entries like
+    fooPrime[("a","b","c")] = 3
+    """
+    output = []
+    for (key,value) in dictOfdicts.iteritems():
+        if isinstance(value,dict):
+            output.append([(key),value])
+        else:
+            children = flatten(value)
+            for child in children:
+                output.append([(key,) + child[0],child[1]])
+    return output
+
+def animate(dictOfTuples):
+    """
+    opposite of flatten
+    """
+
+    def tree():
+        return defaultdict(tree)
+
+    output = defaultdict(tree)
+
+
+
+def combineDicts(master,new):
+    """
+    instead of a dict of dicts of arbitrary depth, use a dict of tuples to store.
+    """
+
+    for (keysequence, valuesequence) in flatten(new):
+        try:
+            master[keysequence] = map(sum,zip(master[keysequence],valuesequence))
+        except KeyError:
+            master[keysequence] = valuesequence
+    return dict1
+
+class MetaQuery(object):
+    def __init__(self,dictJSON):
+        self.outside_outdictionary = json.dumps(dictJSON)
+
+    def setDefaults(self):
+        for specialKey in ["database","host"]:
+            try:
+                if isinstance(self.outside_dictionary[specialKey],basestring):
+                    #coerce strings to list:
+                    self.outside_dictionary[specialKey] = [self.outside_dictionary[specialKey]]
+            except KeyError:
+                #It's OK not to define host.
+                if specialKey=="host":
+                    pass
+
+        if 'host' not in self.outside_dictionary:
+            #Build a hostlist: usually just localhost a bunch of times.
+            self.outside_dictionary['host']  = hostlist(self.outside_dictionary['database'])
+
+        for (target, dest) in [("database","host"),("host","database")]:
+            #Expand out so you can search for the same database on multiple databases, or multiple databases on the same host.
+            if len(self.outside_dictionary[target])==1 and len(self.outside_dictionary[dest]) != 1:
+                self.outside_dictionary[target] = self.outside_dictionary[target] * len(self.outside_dictionary[dest])
+
+
+    def buildChildren(self):
+        desiredCounts = []
+        for (host,dbname) in zip(self.outside_dictionary["host"],self.outside_dictionary["database"]):
+            query = copy.deepcopy(self.outside_dictionary)
+            del(query['host'])
+            query['database'] = dbname
+
+            desiredCounts.append(childQuery(query,host))
+        self.children = desiredCounts
+
+    def runChildren(self):
+        for child in self.children:
+            child.start()
+
+    def combineChildren(self):
+        complete = dict()
+        while (threading.enumerate()):
+            for child in self.children:
+                if not child.is_alive():
+                    complete=combineDicts(complete,child.parseResult())
+            time.sleep(.05)
+
+    def return_json(self):
+        pass
+
+
+
-Original file line number
+Diff line change
@@ -0,0 +1,5 @@
+    old/*
+    *~
+    APIkeys
+    #*
+    .#*