Skip to content

Commit

Permalink
Merge pull request #10 from BritishGeologicalSurvey/sqlite-helper
Browse files Browse the repository at this point in the history
Add SQLite helper
  • Loading branch information
dvalters authored Oct 14, 2019
2 parents 76dd2a1 + 764bc16 commit bd54a57
Show file tree
Hide file tree
Showing 10 changed files with 305 additions and 104 deletions.
42 changes: 27 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

> etlhelper is a Python library to simplify data transfer between databases.
`etlhelper` provides a unified way to connect to different database types (currently Oracle, PostgreSQL and SQL Server).
`etlhelper` provides a unified way to connect to different database types (currently Oracle, PostgreSQL, SQLite and SQL Server).
It is a thin wrapper around Python's [DBAPI2](https://www.python.org/dev/peps/pep-0249/) specification.
The `get_rows` function returns the result of a SQL query and can be used to create simple HTTP APIs.
The `copy_rows` function transfers data from one database to another.
Expand Down Expand Up @@ -37,6 +37,7 @@ Required database drivers are specified in the square brackets. Options are:
```

Multiple values can be separated by commas, e.g.: `[oracle,mssql]` would install both sets of drivers.
The `sqlite3` driver is included within Python's Standard Library.


### Dependencies
Expand Down Expand Up @@ -99,6 +100,8 @@ import os
os.environ['ORACLE_PASSWORD'] = 'some-secret-password'
```

No password is required for SQLite databases.


#### DbParams

Expand All @@ -107,27 +110,36 @@ Database connection information is defined by `DbParams` objects.
```
from etlhelper import DbParams
ORACLEDB = DbParams(host="localhost", port=1521,
database="mydata",
user="oracle_user")
ORACLEDB = DbParams(dbtype='ORACLE', host="localhost", port=1521,
database="mydata", username="oracle_user")
POSTGRESDB = DbParams(dbtype='PG', host="localhost", port=5432,
database="mydata", username="postgres_user")
SQLITEDB = DbParams(dbtype='SQLITE', filename='/path/to/file.db')
MSSQLDB = DbParams(dbtype='MSSQL', host="localhost", port=5432,
database="mydata", username="mssql_user",
odbc_driver="ODBC Driver 17 for SQL Server")
```

DbParams objects can also be created from environment variables using the
`from_environment()` function.


#### Get rows

Connections are created by `connect` function.
The `get_rows` function returns a list of named tuples containing data as
native Python objects.

```python
from my_databases import ORADOCKER
from my_databases import ORACLEDB
from etlhelper import connect, get_rows

sql = "SELECT * FROM src"

with connect(ORADOCKER, "ORA_PASSWORD") as conn:
with connect(ORACLEDB, "ORA_PASSWORD") as conn:
result = get_rows(sql, conn)
```

Expand Down Expand Up @@ -157,15 +169,15 @@ to an INSERT query.
The source and destination tables must already exist.

```python
from my_databases import PGDOCKER, ORADOCKER
from my_databases import POSTGRESDB, ORACLEDB
from etlhelper import connect, copy_rows

select_sql = "SELECT id, name FROM src"
insert_sql = "INSERT INTO dest (id, name)
VALUES (%s, %s)"

src_conn = connect(ORADOCKER, "ORA_PASSWORD")
dest_conn = connect(PGDOCKER, "PG_PASSWORD")
src_conn = connect(ORACLEDB, "ORA_PASSWORD")
dest_conn = connect(POSTGRESDB, "PG_PASSWORD")

copy_rows(select_sql, src_conn, insert_sql, dest_conn)
```
Expand Down Expand Up @@ -236,16 +248,16 @@ Transform functions can manipulate geometries using the [Shapely](https://pypi.o
The following is an example ETL script.

```python
from my_databases import ORADOCKER, PGDOCKER
from my_databases import ORACLEDB, POSTGRESDB
from etl_helper import connect, copy_rows

DELETE_SQL = "..."
SELECT_SQL = "..."
INSERT_SQL = "..."

def copy_src_to_dest():
with connect(ORADOCKER, "ORA_PASSWORD") as src_conn:
with connect(PGDOCKER, "PG_PASSWORD") as dest_conn:
with connect(ORACLEDB, "ORA_PASSWORD") as src_conn:
with connect(POSTGRESDB, "PG_PASSWORD") as dest_conn:
execute(DELETE_SQL, dest_conn)
copy_rows(SELECT_SQL, src_conn,
INSERT_SQL, dest_conn)
Expand All @@ -270,11 +282,11 @@ It can be installed separately with `pip install sqlalchemy`.
For example, to export a CSV file of data:

```python
from my_databases import ORADOCKER
from my_databases import ORACLEDB
from etlhelper import get_sqlalchemy_connection_string
from sqlalchemy import create_engine

sqla_conn_str = get_sqlalchemy_connection_string(ORADOCKER, "ORACLE_PASSWORD")
sqla_conn_str = get_sqlalchemy_connection_string(ORACLEDB, "ORACLE_PASSWORD")
engine = create_engine(sqla_conn_str)

sql = "SELECT * FROM my_table"
Expand All @@ -292,7 +304,7 @@ For example, to return each row as a dictionary, use the following:
from etlhelper import connect, iter_rows
from etlhelper.row_factories import dict_rowfactory

conn = connect(ORADOCKER, 'ORACLE_PASSWORD')
conn = connect(ORACLEDB, 'ORACLE_PASSWORD')
sql = "SELECT * FROM my_table"
for row in iter_rows(sql, conn, row_factory=dict_rowfactory):
print(row['id'])
Expand Down
7 changes: 7 additions & 0 deletions bin/run_tests_for_developer.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
#! /bin/sh
echo "Building container"
docker build \
--build-arg INSTANT_CLIENT_ZIP=${INSTANT_CLIENT_ZIP} \
-t etlhelper-test-runner . || exit 1

echo "Flake8 checks"
docker run \
etlhelper-test-runner flake8 etlhelper test || exit 1

echo "Unit and integration tests"
docker run \
-e TEST_PG_PASSWORD="${TEST_PG_PASSWORD}" \
-e TEST_ORACLE_DBTYPE="${TEST_ORACLE_DBTYPE}" \
Expand Down
2 changes: 1 addition & 1 deletion etlhelper/connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from etlhelper.db_helper_factory import DB_HELPER_FACTORY


def connect(db_params, password_variable, **kwargs):
def connect(db_params, password_variable=None, **kwargs):
"""
Return database connection.
Expand Down
3 changes: 3 additions & 0 deletions etlhelper/db_helper_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from etlhelper.db_helpers.oracle import OracleDbHelper
from etlhelper.db_helpers.postgres import PostgresDbHelper
from etlhelper.db_helpers.mssql import SqlServerDbHelper
from etlhelper.db_helpers.sqlite import SQLiteDbHelper
from etlhelper.exceptions import ETLHelperHelperError


Expand Down Expand Up @@ -69,3 +70,5 @@ def from_dbtype(self, dbtype):
PostgresDbHelper)
DB_HELPER_FACTORY.register_helper('MSSQL', "<class 'pyodbc.Connection'>",
SqlServerDbHelper)
DB_HELPER_FACTORY.register_helper('SQLITE', "<class 'sqlite3.Connection'>",
SQLiteDbHelper)
1 change: 1 addition & 0 deletions etlhelper/db_helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from etlhelper.db_helpers.oracle import OracleDbHelper
from etlhelper.db_helpers.postgres import PostgresDbHelper
from etlhelper.db_helpers.mssql import SqlServerDbHelper
from etlhelper.db_helpers.sqlite import SQLiteDbHelper
13 changes: 11 additions & 2 deletions etlhelper/db_helpers/db_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self):
# Throws exception if not overidden
self._connect_func = lambda conn_str: 1/0

def connect(self, db_params, password_variable, **kwargs):
def connect(self, db_params, password_variable=None, **kwargs):
"""
Return a connection (as appropriate), configured for
the database with the password obtained from environment variable. These
Expand Down Expand Up @@ -80,10 +80,19 @@ def get_connection_string(db_params, password_variable):
@staticmethod
def executemany(cursor, query, chunk):
"""
Call executemany method appropriate to database.
Call executemany method appropriate to database. Overridden for PostgreSQL.
:param cursor: Open database cursor.
:param query: str, SQL query
:param chunk: list, Rows of parameters.
"""
cursor.executemany(query, chunk)

@staticmethod
def cursor(conn):
"""
Return a cursor on the connection. Overridded for SQLite.
:param conn: Open database connection.
"""
return conn.cursor()
55 changes: 55 additions & 0 deletions etlhelper/db_helpers/sqlite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
Database helper for SQLite
"""
from contextlib import contextmanager
from etlhelper.db_helpers.db_helper import DbHelper


class SQLiteDbHelper(DbHelper):
"""
SQLite DB helper class
"""
def __init__(self):
super().__init__()
try:
import sqlite3
self.sql_exceptions = (sqlite3.OperationalError,
sqlite3.IntegrityError)
self._connect_func = sqlite3.connect
self.connect_exceptions = (sqlite3.OperationalError)
self.required_params = {'filename'}
except ImportError:
print("The sqlite3 module was not found. Check configuration as "
"it should be in Python's standard library.")

def get_connection_string(self, db_params, password_variable=None):
"""
Return a connection string
:param db_params: DbParams
:return: str
"""
# Prepare connection string
# Accept unused password_variable for consistency with other databases
return (f'{db_params.filename}')

def get_sqlalchemy_connection_string(self, db_params,
password_variable=None):
"""
Returns connection string for SQLAlchemy type connections
:param db_params: DbParams
:return: str
"""
return (f'sqlite:///{db_params.filename}')

@staticmethod
@contextmanager
def cursor(conn):
"""
Return a cursor on current connection. This implementation allows
SQLite cursor to be used as context manager as with other db types.
"""
try:
cursor = conn.cursor()
yield cursor
finally:
cursor.close()
6 changes: 3 additions & 3 deletions etlhelper/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def iter_chunks(select_query, conn, parameters=(),
:param read_lob: bool, convert Oracle LOB objects to strings
"""
helper = DB_HELPER_FACTORY.from_conn(conn)
with conn.cursor() as cursor:
with helper.cursor(conn) as cursor:
# Run query
try:
cursor.execute(select_query, parameters)
Expand Down Expand Up @@ -170,7 +170,7 @@ def executemany(query, rows, conn, commit_chunks=True):
helper = DB_HELPER_FACTORY.from_conn(conn)
processed = 0

with conn.cursor() as cursor:
with helper.cursor(conn) as cursor:
for chunk in _chunker(rows, CHUNKSIZE):
# Run query
try:
Expand Down Expand Up @@ -239,7 +239,7 @@ def execute(query, conn, parameters=()):
:param parameters: sequence or dict of bind variables to insert in the query
"""
helper = DB_HELPER_FACTORY.from_conn(conn)
with conn.cursor() as cursor:
with helper.cursor(conn) as cursor:
# Run query
try:
cursor.execute(query, parameters)
Expand Down
Loading

0 comments on commit bd54a57

Please sign in to comment.