From 02a4c8ca967a56db7139e2a2213740031220d60a Mon Sep 17 00:00:00 2001 From: peterx Date: Wed, 3 Aug 2016 16:00:00 -0500 Subject: [PATCH 1/3] fix requirements, app_config --- geocoder/app_config.py.example | 2 +- loadAddresses.py | 8 +++++++- requirements.txt | 8 +++++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/geocoder/app_config.py.example b/geocoder/app_config.py.example index d203a88..f07ce9f 100644 --- a/geocoder/app_config.py.example +++ b/geocoder/app_config.py.example @@ -6,7 +6,7 @@ DB_USER = 'datamade' DB_PW = '' DB_HOST = 'localhost' DB_PORT = '5432' -DB_NAME = 'sunshine' +DB_NAME = 'geocoder' DB_CONN='postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'\ .format(DB_USER, DB_PW, DB_HOST, DB_PORT, DB_NAME) diff --git a/loadAddresses.py b/loadAddresses.py index 78a0542..1fc066c 100644 --- a/loadAddresses.py +++ b/loadAddresses.py @@ -194,7 +194,13 @@ class SuburbsETL(CookCountyETL): cook_county_data_portal = 'https://datacatalog.cookcountyil.gov/api/geospatial/%s?method=export&format=Original' if args.load_data: - engine = create_engine('postgresql://localhost:5432/geocoder') + from geocoder.app_config import DB_USER, DB_PW, DB_HOST, \ + DB_PORT, DB_NAME + if DB_PW != '': + engine = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'\ + .format(DB_USER, DB_PW, DB_HOST, DB_PORT, DB_NAME)) + else: + engine = create_engine('postgresql:///' + DB_NAME) connection = engine.connect() chicago = ChicagoETL(connection, 'chicago_addresses') diff --git a/requirements.txt b/requirements.txt index 066d926..12360d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,9 @@ -numpy==1.9.2 -dedupe==0.8.0.1.7 -dedupe-variable-address==0.0.4 +dedupe==1.4.15 +dedupe-variable-address==0.0.7 Flask==0.10.1 psycopg2==2.6.1 csvkit==0.9.1 requests==2.7.0 +simpledbf==0.2.6 +pytz==2016.6.1 +numpy==1.11.1 From 90c5880f48d2b2f6b8c502fe76f08859e19c1fbc Mon Sep 17 00:00:00 2001 From: peterx Date: Fri, 5 Aug 2016 09:14:33 -0500 Subject: [PATCH 2/3] db_conn fixes --- geocoder/app_config.py.example | 3 +++ loadAddresses.py | 14 +++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/geocoder/app_config.py.example b/geocoder/app_config.py.example index f07ce9f..41499ee 100644 --- a/geocoder/app_config.py.example +++ b/geocoder/app_config.py.example @@ -11,6 +11,9 @@ DB_NAME = 'geocoder' DB_CONN='postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'\ .format(DB_USER, DB_PW, DB_HOST, DB_PORT, DB_NAME) +# if no password, the conn_string should be changed to just +# 'postgresql+psychopg2://{0}'.format(DB_NAME) + SECRET_KEY = 'super secret key' # See: https://pythonhosted.org/Flask-Cache/#configuring-flask-cache diff --git a/loadAddresses.py b/loadAddresses.py index 1fc066c..ee6b134 100644 --- a/loadAddresses.py +++ b/loadAddresses.py @@ -5,6 +5,7 @@ import os import sqlalchemy as sa from geocoder.data_loader import ETLThing +from geocoder.app_config import DB_CONN class CookCountyETL(ETLThing): @@ -194,13 +195,7 @@ class SuburbsETL(CookCountyETL): cook_county_data_portal = 'https://datacatalog.cookcountyil.gov/api/geospatial/%s?method=export&format=Original' if args.load_data: - from geocoder.app_config import DB_USER, DB_PW, DB_HOST, \ - DB_PORT, DB_NAME - if DB_PW != '': - engine = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'\ - .format(DB_USER, DB_PW, DB_HOST, DB_PORT, DB_NAME)) - else: - engine = create_engine('postgresql:///' + DB_NAME) + engine = create_engine(DB_CONN) connection = engine.connect() chicago = ChicagoETL(connection, 'chicago_addresses') @@ -229,7 +224,8 @@ class SuburbsETL(CookCountyETL): import simplejson as json import dedupe - engine = create_engine('postgresql://localhost:5432/geocoder') + from geocoder.app_config import DB_CONN + engine = create_engine(DB_CONN) deduper = DatabaseGazetteer([{'field': 'complete_address', 'type': 'Address'}], engine=engine) @@ -261,7 +257,7 @@ class SuburbsETL(CookCountyETL): if args.block: from geocoder.deduper import StaticDatabaseGazetteer - engine = create_engine('postgresql://localhost:5432/geocoder') + engine = create_engine(DB_CONN) with open('geocoder/data/dedupe.settings', 'rb') as sf: deduper = StaticDatabaseGazetteer(sf, engine=engine) From 3df85bb300d8c3905ed3e4d7580e3e96e517a9f0 Mon Sep 17 00:00:00 2001 From: peterx Date: Mon, 8 Aug 2016 09:57:03 -0500 Subject: [PATCH 3/3] partial fix of queries --- linkNewData.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/linkNewData.py b/linkNewData.py index aa7ce12..bc3b708 100644 --- a/linkNewData.py +++ b/linkNewData.py @@ -1,6 +1,6 @@ import os import sqlalchemy as sa - +from geocoder.app_config import DB_CONN def checkForTable(engine, table_name): try: @@ -22,7 +22,7 @@ def trainIncoming(name): import simplejson as json import dedupe - engine = create_engine('postgresql://localhost:5432/geocoder') + engine = create_engine(DB_CONN) deduper = DatabaseGazetteer([{'field': 'complete_address', 'type': 'Address'}], engine=engine) @@ -71,7 +71,7 @@ def trainIncoming(name): def blockIncoming(name, train): from geocoder.deduper import StaticDatabaseGazetteer - engine = create_engine('postgresql://localhost:5432/geocoder') + engine = create_engine(DB_CONN) with open('geocoder/data/dedupe.settings', 'rb') as sf: deduper = StaticDatabaseGazetteer(sf, engine=engine) @@ -170,7 +170,7 @@ def blockIncoming(name, train): if args.link: from geocoder.deduper import AddressLinkGazetteer - engine = create_engine('postgresql://localhost:5432/geocoder') + engine = create_engine(DB_CONN) sql_table = checkForTable(engine, args.name)