Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Рефакторинг заливки enum'ов в sqlite-базу #87

Open
ShashkovS opened this issue Jun 7, 2023 · 0 comments
Open

Comments

@ShashkovS
Copy link
Owner

Можно добавить sqlite-адаптеры и конверторы, чтобы не конвертить ENUM'ы в строки и числа и назад, а заодно не портить типизацию.

См. пример:

from enum import Enum, IntEnum, unique, EnumMeta
from operator import attrgetter
from time import perf_counter_ns
from random import randrange
import sqlite3

# Сколько раз дублируем все записи
INSERTS = 1000000
FETCHES = 1000000


@unique
class INT_ENUM(IntEnum):
    ONE = 1
    TWO = 2
    ELEVEL = 11


@unique
class STR_ENUM(Enum):
    SHORT = 'S'
    LONG = 'LONG'
    ANOTHER = 'a'


@unique
class MIXED_ENUM(Enum):
    FIVE = 5
    FORTY_TWO_AS_STR = '42'


def prepare_connection(database: str) -> sqlite3.Connection:
    """Создаём connection, настраиваем адаптеры"""
    # Регистрируем адаптеры и конверторы для всех enum'ом. Можно явно указать список
    all_enums = [globals()[x] for x in globals() if isinstance(globals()[x], EnumMeta)]
    for enum in all_enums:
        # Преобразуем enum в строку, используя value
        sqlite3.register_adapter(enum, attrgetter('value'))

        # Converter functions are always passed a bytes object, no matter the underlying SQLite data type.
        # Преобразуем из байтов в ENUM используя замыкание словаря с ключами
        # Если вы меняете enum на лету, то вам понадобится страдать
        def decoder_factory():
            decoder_dict = {str(x.value).encode('utf8'): x for x in enum}

            def decoder_function(value: bytes):
                return decoder_dict[value]

            return decoder_function

        sqlite3.register_converter(enum.__name__, decoder_factory())
    conn = sqlite3.connect(database, detect_types=sqlite3.PARSE_DECLTYPES)
    conn.row_factory = sqlite3.Row
    return conn


def prepare_table(conn: sqlite3.Connection):
    """Создаём временную таблицу с типами-ENUM'ами"""
    with conn:
        conn.execute('''
            drop table if exists data;
        ''')
        conn.execute('''      PRAGMA journal_mode = OFF;       ''')
        conn.execute('''      PRAGMA synchronous = 0;       ''')
        conn.execute('''      PRAGMA cache_size = 1000000;       ''')
        conn.execute('''      PRAGMA locking_mode = EXCLUSIVE;       ''')
        conn.execute('''      PRAGMA temp_store = MEMORY;       ''')

        conn.execute('''
            create table data (
                id         INTEGER PRIMARY KEY,
                int_enum   INT_ENUM null,
                str_enum   STR_ENUM null,
                mixed_enum MIXED_ENUM null,
                text       TEXT     not null 
            ); -- STRICT
        ''')


def insert_row(conn: sqlite3.Connection, int_enum: INT_ENUM, str_enum: STR_ENUM, mixed_enum: MIXED_ENUM, text: str):
    """Заливаем данные. Обратите внимание, типы работают! """
    with conn:
        conn.execute('''
            insert into data ( int_enum,  str_enum,  mixed_enum,  text)
            values           (:int_enum, :str_enum, :mixed_enum, :text)
        ''', locals())


def get_row_by_enums(conn: sqlite3.Connection, int_enum: INT_ENUM, str_enum: STR_ENUM, mixed_enum: MIXED_ENUM) -> sqlite3.Row:
    return conn.execute('''
        select * from data
        where 
        (:int_enum is null or int_enum = :int_enum) 
        and (:str_enum is null or str_enum = :str_enum)
        and (:mixed_enum is null or mixed_enum = :mixed_enum)
        limit 1
    ''', locals()).fetchone()


def get_row_by_id(conn: sqlite3.Connection, id: int) -> sqlite3.Row:
    return conn.execute('''
        select * from data
        where id = :id
    ''', locals()).fetchone()


def insert_dummy_rows(conn: sqlite3.Connection) -> int:
    inserted_rows = 0
    while inserted_rows < INSERTS:
        insert_row(conn, None, None, None, 'all_nulls')
        inserted_rows += 1
        for int_enum in INT_ENUM:
            for str_enum in STR_ENUM:
                for mixed_enum in MIXED_ENUM:
                    text = f'random stuff'
                    insert_row(conn, int_enum, str_enum, mixed_enum, text)
                    inserted_rows += 1
    return inserted_rows


conn = prepare_connection(':memory:')
# conn = prepare_connection('S:\\temp_yes.db')
prepare_table(conn)
insert_dummy_rows(conn)

# Теперь эксперементируем.
# Доступ по ENUM'у
row = get_row_by_enums(conn, INT_ENUM.ONE, STR_ENUM.ANOTHER, MIXED_ENUM.FORTY_TWO_AS_STR)
print(dict(row))
assert row['mixed_enum'] is MIXED_ENUM.FORTY_TWO_AS_STR
# Доступ по id
row = get_row_by_id(conn, 123)
print(dict(row))

# Теперь performance-тесты
# Добавление построчное
st = perf_counter_ns()
inserted_rows = insert_dummy_rows(conn)
en = perf_counter_ns()
print(f'{inserted_rows} rows inserted in {(en - st) / 1e6:0.2f}ms, {(en - st) / inserted_rows / 1e3:0.2f}𝜇s per row')

# Случайны доступ
ids_to_fetch = [randrange(0, inserted_rows*2) for __ in range(FETCHES)]
st = perf_counter_ns()
for id in ids_to_fetch:
    row = get_row_by_id(conn, id)
en = perf_counter_ns()
print(f'{len(ids_to_fetch)} rows fetched in {(en - st) / 1e6:0.2f}ms, {(en - st) / len(ids_to_fetch) / 1e3:0.2f}𝜇s per row')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant