From 7ec3b1e7b23f4baaecd995993766adc5d66c2be4 Mon Sep 17 00:00:00 2001 From: Harlan Lieberman-Berg Date: Wed, 1 Nov 2023 00:22:06 -0400 Subject: [PATCH 1/5] OD-1731: Remove unused imports --- efiction/chapters.py | 2 +- efiction/metadata.py | 1 - efiction/original.py | 1 - efiction/simplified.py | 1 - efiction/tests/test_chapters.py | 4 +--- efiction/tests/test_metadata.py | 6 +----- efiction/tests/test_original.py | 3 --- opendoors/utils.py | 3 +-- steps/tests/test_step_02.py | 3 +-- 9 files changed, 5 insertions(+), 19 deletions(-) diff --git a/efiction/chapters.py b/efiction/chapters.py index c93ae8a..8645977 100644 --- a/efiction/chapters.py +++ b/efiction/chapters.py @@ -96,7 +96,7 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]): # check if encoding is valid ''.encode(encoding_text) encoding = encoding_text - except: + except LookupError: print(f"{encoding_text} is not a valid encoding, try again") for old_chapter in old_chapters: chapid = old_chapter['chapid'] diff --git a/efiction/metadata.py b/efiction/metadata.py index 7b323d5..b9c4521 100644 --- a/efiction/metadata.py +++ b/efiction/metadata.py @@ -1,4 +1,3 @@ -import os import re from configparser import ConfigParser from logging import Logger diff --git a/efiction/original.py b/efiction/original.py index 6861908..d2fff47 100644 --- a/efiction/original.py +++ b/efiction/original.py @@ -1,7 +1,6 @@ """ Step 01 """ -import os from configparser import ConfigParser from logging import Logger diff --git a/efiction/simplified.py b/efiction/simplified.py index b979797..d3b0d53 100644 --- a/efiction/simplified.py +++ b/efiction/simplified.py @@ -1,4 +1,3 @@ -import os import re from configparser import ConfigParser from logging import Logger diff --git a/efiction/tests/test_chapters.py b/efiction/tests/test_chapters.py index 9deabf5..48cd202 100644 --- a/efiction/tests/test_chapters.py +++ b/efiction/tests/test_chapters.py @@ -5,10 +5,8 @@ from efiction.chapters import EFictionChapters -from opendoors.big_insert import BigInsert from opendoors.config import ArchiveConfig -from opendoors.mysql import SqlDb -from opendoors.utils import get_full_path, normalize, remove_output_files +from opendoors.utils import get_full_path, normalize def get_data(): diff --git a/efiction/tests/test_metadata.py b/efiction/tests/test_metadata.py index 26c87a6..d6e7f30 100644 --- a/efiction/tests/test_metadata.py +++ b/efiction/tests/test_metadata.py @@ -1,12 +1,8 @@ import datetime from unittest import TestCase -from unittest.mock import MagicMock -from efiction.metadata import EFictionMetadata from efiction.tests.test_utils import load_fixtures, create_efiction_converter -from opendoors.config import ArchiveConfig -from opendoors.mysql import SqlDb -from opendoors.utils import get_full_path, remove_output_files +from opendoors.utils import remove_output_files class TestEFictionConverter(TestCase): diff --git a/efiction/tests/test_original.py b/efiction/tests/test_original.py index f1a3f6a..761228b 100644 --- a/efiction/tests/test_original.py +++ b/efiction/tests/test_original.py @@ -1,6 +1,3 @@ -import glob -import os -import re from unittest import TestCase from unittest.mock import MagicMock, patch diff --git a/opendoors/utils.py b/opendoors/utils.py index 8fdf637..d569fe6 100644 --- a/opendoors/utils.py +++ b/opendoors/utils.py @@ -6,7 +6,6 @@ import os import re import shutil -import sys from typing import Mapping from pathlib import Path @@ -161,4 +160,4 @@ def get_prefixed_path(step: str, path: str, filename: str=""): if filename: return os.path.join(path, f"{prefix}-{filename}") else: - return os.path.join(path, prefix) \ No newline at end of file + return os.path.join(path, prefix) diff --git a/steps/tests/test_step_02.py b/steps/tests/test_step_02.py index eae9e4b..76e7470 100644 --- a/steps/tests/test_step_02.py +++ b/steps/tests/test_step_02.py @@ -4,11 +4,10 @@ import shutil from pathlib import Path from unittest import TestCase -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock from opendoors.config import ArchiveConfig from opendoors.step_base import StepInfo -from steps.step_01 import Step01 from steps.step_02 import Step02 test_logger = MagicMock() From 94aa44ea57c55836875673f31e15f5b3151d8c16 Mon Sep 17 00:00:00 2001 From: Harlan Lieberman-Berg Date: Wed, 1 Nov 2023 00:23:54 -0400 Subject: [PATCH 2/5] OD-1731: Fix f-strings without placeholders --- efiction/metadata.py | 6 +++--- efiction/tag_converter.py | 2 +- opendoors/progress.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/efiction/metadata.py b/efiction/metadata.py index b9c4521..58ccdbd 100644 --- a/efiction/metadata.py +++ b/efiction/metadata.py @@ -151,7 +151,7 @@ def _convert_story_tags(self, old_story): def _convert_tags_join(self, new_story, tags, sql=None): # Support using non-default sql connection for multithreaded workloads sql = self.sql if sql is None else sql - full_query = f"INSERT INTO item_tags (item_id, item_type, tag_id) VALUES " + full_query = "INSERT INTO item_tags (item_id, item_type, tag_id) VALUES " tag_query = [] for tag_list in tags.values(): for tag in tag_list: @@ -218,12 +218,12 @@ def story_processor(old_story): """ sql.execute(self.working_open_doors, query) - self.logger.debug(f" tags...") + self.logger.debug(" tags...") tags = self._convert_story_tags(old_story) # pass the new sql to be used instead of the main one self._convert_tags_join(new_story, tags, sql) - self.logger.debug(f" authors...") + self.logger.debug(" authors...") self._convert_author_join(new_story, old_story['uid'], sql) # Find if there are any coauthors for the work coauthors = self.fetch_coauthors(new_story, sql) diff --git a/efiction/tag_converter.py b/efiction/tag_converter.py index b265a12..593e478 100644 --- a/efiction/tag_converter.py +++ b/efiction/tag_converter.py @@ -29,7 +29,7 @@ def check_for_nonstandard_tag_tables(self) -> bool: if tag_table_name == 'rating': # Only one rating per story, so story rating should be single number # that exactly matches rating id - query = f"SELECT count(*) as cnt FROM stories WHERE rid NOT IN (SELECT rid FROM ratings);" + query = "SELECT count(*) as cnt FROM stories WHERE rid NOT IN (SELECT rid FROM ratings);" count: List[Dict[str, int]] = self.sql.execute_and_fetchall(self.working_original, query) tag_tables['rating'] = bool(count and count[0]['cnt'] > 0) else: diff --git a/opendoors/progress.py b/opendoors/progress.py index 5146577..c6a5f15 100644 --- a/opendoors/progress.py +++ b/opendoors/progress.py @@ -29,7 +29,7 @@ def continue_from_last(config: ConfigParser, logger: Logger, sql: SqlDb, steps: next_step = config['Processing']['next_step'] = step.next_step update_done_steps(config, done_steps, step_to_run) else: - restart_yn = input(f"All steps have been completed for this archive. Do you want to\n" + restart_yn = input("All steps have been completed for this archive. Do you want to\n" "1. Restart from step 1\n" "2. Exit (default - press Enter)\n>> ") if restart_yn == "1": From 564f4530446415c0168e221207daf93f0c18ae16 Mon Sep 17 00:00:00 2001 From: Harlan Lieberman-Berg Date: Wed, 1 Nov 2023 00:24:50 -0400 Subject: [PATCH 3/5] OD-1731: Don't store exceptions if we're not going to use them --- efiction/metadata.py | 2 +- efiction/tag_converter.py | 2 +- opendoors/mysql.py | 2 +- opendoors/progress.py | 2 +- opendoors/utils.py | 2 +- steps/tests/test_step_01.py | 2 +- steps/tests/test_step_02.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/efiction/metadata.py b/efiction/metadata.py index 58ccdbd..ae277d3 100644 --- a/efiction/metadata.py +++ b/efiction/metadata.py @@ -175,7 +175,7 @@ def fetch_coauthors(self, new_story, sql=None): # get a dict of coauthor IDs for the story try: authors = sql.execute_and_fetchall(self.working_original, full_query) - except Exception as e: + except Exception: authors = None self.logger.info("No coauthors table...") # We only try to operate on this result if it is not None diff --git a/efiction/tag_converter.py b/efiction/tag_converter.py index 593e478..e328ce8 100644 --- a/efiction/tag_converter.py +++ b/efiction/tag_converter.py @@ -53,7 +53,7 @@ def check_for_nonstandard_tag_tables(self) -> bool: tags = list(map(lambda story_tags: story_tags[id_name].replace(',', ''), tags)) int(''.join(tags)) tag_tables[tag_table_name] = False - except Exception as e: + except Exception: # Non-integer in identifier tag_tables[tag_table_name] = True except Exception as e: diff --git a/opendoors/mysql.py b/opendoors/mysql.py index 193ef99..ed67c1c 100644 --- a/opendoors/mysql.py +++ b/opendoors/mysql.py @@ -73,7 +73,7 @@ def read_table_to_dict(self, database: str, tablename: str): try: cursor.execute(f"SELECT * FROM {database}.{tablename};") return cursor.fetchall() - except Exception as e: + except Exception: self.logger.info(f"No table {tablename} in {database}...") return [] diff --git a/opendoors/progress.py b/opendoors/progress.py index c6a5f15..15daa08 100644 --- a/opendoors/progress.py +++ b/opendoors/progress.py @@ -36,7 +36,7 @@ def continue_from_last(config: ConfigParser, logger: Logger, sql: SqlDb, steps: next_step = "01" else: run_next = False - except Exception as e: + except Exception: logger.error(traceback.format_exc()) diff --git a/opendoors/utils.py b/opendoors/utils.py index d569fe6..a950f1c 100644 --- a/opendoors/utils.py +++ b/opendoors/utils.py @@ -135,7 +135,7 @@ def remove_output_files(path: str): shutil.rmtree(file) else: os.remove(file) - except PermissionError as pe: + except PermissionError: # We don't necessarily care that much continue diff --git a/steps/tests/test_step_01.py b/steps/tests/test_step_01.py index b59d843..40ea1fb 100644 --- a/steps/tests/test_step_01.py +++ b/steps/tests/test_step_01.py @@ -25,7 +25,7 @@ def tearDown(self) -> None: shutil.rmtree(file) else: os.remove(file) - except PermissionError as pe: + except PermissionError: # We don't necessarily care that much continue diff --git a/steps/tests/test_step_02.py b/steps/tests/test_step_02.py index 76e7470..b51064a 100644 --- a/steps/tests/test_step_02.py +++ b/steps/tests/test_step_02.py @@ -25,7 +25,7 @@ def tearDown(self) -> None: shutil.rmtree(file) else: os.remove(file) - except PermissionError as pe: + except PermissionError: # We don't necessarily care that much continue From b13aad3c7f41f1d24494db26f673ee63ea808601 Mon Sep 17 00:00:00 2001 From: Harlan Lieberman-Berg Date: Wed, 1 Nov 2023 00:46:10 -0400 Subject: [PATCH 4/5] OD-1730: Switch to using ruff for linting --- .github/workflows/python-app-linux.yml | 9 +++------ .github/workflows/python-app-macos-windows.yml | 9 +++------ opendoors/mysql.py | 4 ++-- requirements.txt | 1 + start.py | 2 +- 5 files changed, 10 insertions(+), 15 deletions(-) diff --git a/.github/workflows/python-app-linux.yml b/.github/workflows/python-app-linux.yml index 626c7aa..6fd2487 100644 --- a/.github/workflows/python-app-linux.yml +++ b/.github/workflows/python-app-linux.yml @@ -34,14 +34,11 @@ jobs: shell: bash run: | python -m pip install --upgrade pip - pip install flake8 pytest + pip install pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 + - name: Lint with ruff run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + ruff check . - name: Test with pytest run: | pytest diff --git a/.github/workflows/python-app-macos-windows.yml b/.github/workflows/python-app-macos-windows.yml index bcd0546..3c40777 100644 --- a/.github/workflows/python-app-macos-windows.yml +++ b/.github/workflows/python-app-macos-windows.yml @@ -27,14 +27,11 @@ jobs: shell: bash run: | python -m pip install --upgrade pip - pip install flake8 pytest + pip install pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 + - name: Lint with ruff run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + ruff check . - name: Test with pytest - exclude MySQL integration tests run: | pytest --ignore "opendoors/tests/test_sql_db.py" --ignore "efiction/tests/test_metadata.py" diff --git a/opendoors/mysql.py b/opendoors/mysql.py index ed67c1c..04964f1 100644 --- a/opendoors/mysql.py +++ b/opendoors/mysql.py @@ -159,7 +159,7 @@ def dump_database(self, database: str, destination_filepath: str): f.write(f"INSERT INTO {database}.`{str(table)}` ({column_names}) VALUES \n") field_arr = [] for field in row: - if type(row[field]) == str or type(row[field]) == datetime.datetime: + if type(row[field]) == str or type(row[field]) == datetime.datetime: # noqa: E721 field_arr.append(self.conn.escape(row[field])) elif row[field] is None: field_arr.append("NULL") @@ -198,7 +198,7 @@ def __del__(self): """ try: self.conn.close() - except: + except: # noqa: E722 pass diff --git a/requirements.txt b/requirements.txt index 0349a2b..3e82fa5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,7 @@ pycparser==2.20 PyMySQL==0.10.1 pyparsing==2.4.7 pytest==6.1.0 +ruff==0.1.3 shutils==0.1.0 six==1.15.0 sqlparse==0.3.1 diff --git a/start.py b/start.py index 8e96084..d1111a0 100755 --- a/start.py +++ b/start.py @@ -46,7 +46,7 @@ def save_config_and_exit(): code_name = sys.argv[1] else: code_name = None - while (code_name == None or any([x not in "qwertyuiopasdfghjklzxcvbnm" for x in code_name])): + while (code_name == None or any([x not in "qwertyuiopasdfghjklzxcvbnm" for x in code_name])): # noqa: E711 code_name = input( "Please provide a short, lowercase code name with no spaces or punctuation for the archive " "you are processing (and make a note of it as you'll need it in future!):\n>> ") From fe1a45de1ddf987580bc380990a5ee839275bd85 Mon Sep 17 00:00:00 2001 From: Harlan Lieberman-Berg Date: Wed, 1 Nov 2023 00:53:06 -0400 Subject: [PATCH 5/5] OD-1730: Switch to using ruff for formatting --- .github/workflows/python-app-linux.yml | 3 + .../workflows/python-app-macos-windows.yml | 3 + efiction/chapters.py | 199 ++-- efiction/eFiction_table_defs.py | 84 +- efiction/metadata.py | 220 +++-- efiction/original.py | 70 +- efiction/simplified.py | 69 +- efiction/tag_converter.py | 152 +-- efiction/tests/test_chapters.py | 117 ++- efiction/tests/test_metadata.py | 908 +++++++++++------- efiction/tests/test_original.py | 55 +- efiction/tests/test_simplified.py | 30 +- efiction/tests/test_utils.py | 16 +- opendoors/big_insert.py | 20 +- opendoors/config.py | 49 +- opendoors/logging.py | 9 +- opendoors/mysql.py | 50 +- opendoors/progress.py | 49 +- opendoors/sql_utils.py | 48 +- opendoors/step_base.py | 28 +- opendoors/tests/test_config.py | 14 +- opendoors/tests/test_logging.py | 26 +- opendoors/tests/test_progress.py | 64 +- opendoors/tests/test_sql_db.py | 28 +- opendoors/tests/test_sql_utils.py | 34 +- opendoors/tests/test_utils.py | 54 +- opendoors/thread_pool.py | 4 +- opendoors/utils.py | 31 +- start.py | 50 +- steps/tests/test_step_01.py | 10 +- steps/tests/test_step_02.py | 8 +- 31 files changed, 1591 insertions(+), 911 deletions(-) diff --git a/.github/workflows/python-app-linux.yml b/.github/workflows/python-app-linux.yml index 6fd2487..0f81f43 100644 --- a/.github/workflows/python-app-linux.yml +++ b/.github/workflows/python-app-linux.yml @@ -39,6 +39,9 @@ jobs: - name: Lint with ruff run: | ruff check . + - name: Check formatting with ruff + run : | + ruff format --check . - name: Test with pytest run: | pytest diff --git a/.github/workflows/python-app-macos-windows.yml b/.github/workflows/python-app-macos-windows.yml index 3c40777..757793e 100644 --- a/.github/workflows/python-app-macos-windows.yml +++ b/.github/workflows/python-app-macos-windows.yml @@ -32,6 +32,9 @@ jobs: - name: Lint with ruff run: | ruff check . + - name: Check formatting with ruff + run : | + ruff format --check . - name: Test with pytest - exclude MySQL integration tests run: | pytest --ignore "opendoors/tests/test_sql_db.py" --ignore "efiction/tests/test_metadata.py" diff --git a/efiction/chapters.py b/efiction/chapters.py index 8645977..79c87a4 100644 --- a/efiction/chapters.py +++ b/efiction/chapters.py @@ -7,23 +7,33 @@ from opendoors.mysql import SqlDb from opendoors.big_insert import BigInsert -from opendoors.utils import get_full_path, normalize, print_progress, make_banner, key_find, get_prefixed_path +from opendoors.utils import ( + get_full_path, + normalize, + print_progress, + make_banner, + key_find, + get_prefixed_path, +) class EFictionChapters: """ Process chapter contents and move them into the Open Doors working database. """ + def __init__(self, config: ConfigParser, logger: Logger, sql: SqlDb = None): self.sql = sql self.config = config self.logger = logger - self.working_original = self.config['Processing']['simplified_original_db'] - self.chapters_table = sql.read_table_to_dict(self.config['Processing']['simplified_original_db'], "chapters") - self.working_open_doors = self.config['Processing']['open_doors_working_db'] + self.working_original = self.config["Processing"]["simplified_original_db"] + self.chapters_table = sql.read_table_to_dict( + self.config["Processing"]["simplified_original_db"], "chapters" + ) + self.working_open_doors = self.config["Processing"]["open_doors_working_db"] def _are_chapters_in_table(self) -> bool: - return len([c for c in self.chapters_table if c['storytext']]) > 0 + return len([c for c in self.chapters_table if c["storytext"]]) > 0 @staticmethod def __file_with_path(dirpath, subdir, filename): @@ -35,9 +45,9 @@ def __file_with_path(dirpath, subdir, filename): :return: A dict containing metadata about the chapter based on its file path """ return { - 'path': os.path.join(dirpath, filename), - 'chap_id': Path(filename).stem, - 'author_id': subdir + "path": os.path.join(dirpath, filename), + "chap_id": Path(filename).stem, + "author_id": subdir, } def load_chapter_text_into_db(self, chapter_paths: List[dict]): @@ -49,33 +59,39 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]): warnings = 0 forced_continue = False self.logger.info("...loading data from chapters table...") - old_chapters, current, total = self.sql.read_table_with_total(self.working_original, "chapters") + old_chapters, current, total = self.sql.read_table_with_total( + self.working_original, "chapters" + ) self.logger.info("...removing rows from existing chapters table...") self.sql.execute(self.working_open_doors, "TRUNCATE TABLE chapters;") self.logger.info("...loading text from chapter files...") insert_op = BigInsert( - self.working_open_doors, - "chapters", - ["id", "position", "title", "text", "story_id", "notes"], - self.sql - ) + self.working_open_doors, + "chapters", + ["id", "position", "title", "text", "story_id", "notes"], + self.sql, + ) try: - encoding = self.config['Archive']['encoding'] + encoding = self.config["Archive"]["encoding"] except KeyError: encoding = None if encoding is None: - message_string = """ + message_string = ( + """ You have not specified any character encoding in the config file! If you are unsure which encoding is used in the backup -""".strip() + ( - f""", please run the mojibake tool: +""".strip() + + ( + f""", please run the mojibake tool: mojibake {self.config['Archive']['chapter_path']} - """ if shutil.which('mojibake') is not None else f""" + """ + if shutil.which("mojibake") is not None + else f""" , you can install the mojibake tool from its repository: @@ -86,49 +102,64 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]): mojibake {self.config['Archive']['chapter_path']} """.strip() -) + ) + ) print(message_string) while encoding is None: encoding_text = input("Enter a valid encoding (press enter for utf8): ") if encoding_text == "": - encoding_text = 'utf8' + encoding_text = "utf8" try: # check if encoding is valid - ''.encode(encoding_text) + "".encode(encoding_text) encoding = encoding_text except LookupError: print(f"{encoding_text} is not a valid encoding, try again") for old_chapter in old_chapters: - chapid = old_chapter['chapid'] - chapter = [chapter_path for chapter_path in chapter_paths if chapter_path['chap_id'] == str(chapid)] + chapid = old_chapter["chapid"] + chapter = [ + chapter_path + for chapter_path in chapter_paths + if chapter_path["chap_id"] == str(chapid) + ] if chapter: - file = chapter[0]['path'] - with open(file, 'rb') as raw_chapter: + file = chapter[0]["path"] + with open(file, "rb") as raw_chapter: raw = raw_chapter.read() while isinstance(raw, bytes): try: raw = raw.decode(encoding=encoding) except UnicodeDecodeError as e: error = f"Failed to decode {file}\n" - line_num = raw[:e.start].decode(encoding).count("\n") + line_num = raw[: e.start].decode(encoding).count("\n") error += f"At line {line_num}:\t{str(e)}\n" - error += "--\t" + str(raw[max(e.start - 40, 0):e.end + 30]) + "\n" + error += ( + "--\t" + + str(raw[max(e.start - 40, 0) : e.end + 30]) + + "\n" + ) # print `^` under the offending byte - error += "\t" + \ - " " * (len(str(raw[max(e.start - 40, 0):e.start])) - 1) + \ - "^" * (len(str(raw[e.start:e.end])) - 3) + "\n" + error += ( + "\t" + + " " + * (len(str(raw[max(e.start - 40, 0) : e.start])) - 1) + + "^" * (len(str(raw[e.start : e.end])) - 3) + + "\n" + ) error += "Will be converted to:\n" # remove the offending bytes (usually one) - raw = raw[:e.start] + raw[e.end:] - error += "++\t " + raw[ - max(e.start - 40, 0): - e.end + 30 - ].decode(encoding, errors='ignore') \ - .replace("\n", "\\n") \ - .replace("\r", "\\r") + "\n" + raw = raw[: e.start] + raw[e.end :] + error += ( + "++\t " + + raw[max(e.start - 40, 0) : e.end + 30] + .decode(encoding, errors="ignore") + .replace("\n", "\\n") + .replace("\r", "\\r") + + "\n" + ) self.logger.warning(error) warnings += 1 - if warnings > len(old_chapters) * .3 and not forced_continue: + if warnings > len(old_chapters) * 0.3 and not forced_continue: msg = f""" A total of {warnings} automatic modifications have been performed so far! @@ -147,28 +178,35 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]): raise Exception("Process aborted, too many errors!") text = normalize(raw) - if key_find('endnotes', old_chapter): + if key_find("endnotes", old_chapter): text = text + f"\n\n\n
\n{old_chapter['endnotes']}" insert_op.addRow( chapid, - old_chapter['inorder'], - old_chapter['title'], + old_chapter["inorder"], + old_chapter["title"], text, - old_chapter['sid'], - old_chapter['notes'] + old_chapter["sid"], + old_chapter["notes"], ) current = print_progress(current, total, "chapters converted") # If there were any errors, display a warning for the user to check the affected chapters if warnings >= 1: - self.logger.warning("If the character deletion is unacceptable please quit this processor and use the mojibake tool," - " then restart the processor from step 04") + self.logger.warning( + "If the character deletion is unacceptable please quit this processor and use the mojibake tool," + " then restart the processor from step 04" + ) self.logger.error( - make_banner('-', - f"There were {warnings} warnings; check the affected chapters listed above to make sure curly quotes " - "and accented characters are correctly displayed.")) + make_banner( + "-", + f"There were {warnings} warnings; check the affected chapters listed above to make sure curly quotes " + "and accented characters are correctly displayed.", + ) + ) insert_op.send() - return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM chapters;") + return self.sql.execute_and_fetchall( + self.working_open_doors, "SELECT * FROM chapters;" + ) def list_chapter_files(self): """ @@ -177,10 +215,21 @@ def list_chapter_files(self): """ self.logger.info("Loading chapters from the filesystem...") chapter_paths = [] - for dirpath, dirnames, filenames in os.walk(get_full_path(self.config['Archive']['chapter_path'])): + for dirpath, dirnames, filenames in os.walk( + get_full_path(self.config["Archive"]["chapter_path"]) + ): subdir = dirpath.split(os.path.sep)[-1] - if subdir and subdir != self.config['Archive']['chapter_path'].split(os.path.sep)[-1]: - chapter_paths.extend([self.__file_with_path(dirpath, subdir, filename) for filename in filenames]) + if ( + subdir + and subdir + != self.config["Archive"]["chapter_path"].split(os.path.sep)[-1] + ): + chapter_paths.extend( + [ + self.__file_with_path(dirpath, subdir, filename) + for filename in filenames + ] + ) return chapter_paths def load_og_chapters_into_db(self): @@ -189,37 +238,41 @@ def load_og_chapters_into_db(self): :return: """ self.logger.info("...loading data from chapters table...") - old_chapters, current, total = self.sql.read_table_with_total(self.working_original, "chapters") + old_chapters, current, total = self.sql.read_table_with_total( + self.working_original, "chapters" + ) self.logger.info("...removing rows from existing chapters table...") self.sql.execute(self.working_open_doors, "TRUNCATE TABLE chapters;") self.logger.info("...loading chapters from original chapters table...") insert_op = BigInsert( - self.working_open_doors, - "chapters", - ["id", "position", "title", "text", "story_id", "notes"], - self.sql - ) + self.working_open_doors, + "chapters", + ["id", "position", "title", "text", "story_id", "notes"], + self.sql, + ) for old_chapter in old_chapters: - text = normalize(old_chapter['storytext']) - if key_find('endnotes', old_chapter): + text = normalize(old_chapter["storytext"]) + if key_find("endnotes", old_chapter): text = text + f"\n\n\n
\n{old_chapter['endnotes']}" insert_op.addRow( - old_chapter['chapid'], - old_chapter['inorder'], - old_chapter['title'], + old_chapter["chapid"], + old_chapter["inorder"], + old_chapter["title"], text, - old_chapter['sid'], - old_chapter['notes'] + old_chapter["sid"], + old_chapter["notes"], ) current = print_progress(current, total, "chapters converted") insert_op.send() - return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM chapters;") + return self.sql.execute_and_fetchall( + self.working_open_doors, "SELECT * FROM chapters;" + ) def load_chapters(self, step_path: str): """ @@ -227,17 +280,21 @@ def load_chapters(self, step_path: str): :return: """ if self._are_chapters_in_table(): - self.logger.info("Chapters are already present in the original database, converting now") + self.logger.info( + "Chapters are already present in the original database, converting now" + ) self.load_og_chapters_into_db() else: - if not self.config.has_option('Archive', 'chapter_path'): + if not self.config.has_option("Archive", "chapter_path"): chapter_path = input("Full path to chapter files\n>> ") - self.config['Archive']['chapter_path'] = os.path.normpath(chapter_path) + self.config["Archive"]["chapter_path"] = os.path.normpath(chapter_path) chapter_paths = self.list_chapter_files() self.load_chapter_text_into_db(chapter_paths) - database_dump = get_prefixed_path("04", step_path, f"{self.working_open_doors}.sql") + database_dump = get_prefixed_path( + "04", step_path, f"{self.working_open_doors}.sql" + ) self.logger.info(f"Exporting converted tables to {database_dump}...") self.sql.dump_database(self.working_open_doors, database_dump) return True diff --git a/efiction/eFiction_table_defs.py b/efiction/eFiction_table_defs.py index 67dc1c9..7b9b328 100644 --- a/efiction/eFiction_table_defs.py +++ b/efiction/eFiction_table_defs.py @@ -5,8 +5,7 @@ import re table_definitions = { - 'authorfields': - """CREATE TABLE `{0}` ( + "authorfields": """CREATE TABLE `{0}` ( `field_id` int(11) NOT NULL AUTO_INCREMENT, `field_type` tinyint(4) NOT NULL DEFAULT '0', `field_name` varchar(30) NOT NULL DEFAULT ' ', @@ -17,16 +16,14 @@ `field_on` tinyint(1) NOT NULL DEFAULT '0', PRIMARY KEY (`field_id`) ) ENGINE = MyISAM;""", - 'authorinfo': - """CREATE TABLE `{0}` ( + "authorinfo": """CREATE TABLE `{0}` ( `uid` int(11) NOT NULL DEFAULT '0', `field` int(11) NOT NULL DEFAULT '0', `info` varchar(255) NOT NULL DEFAULT ' ', PRIMARY KEY (`uid`, `field`), KEY `uid` (`uid`) ) ENGINE = MyISAM;""", - 'authorprefs': - """CREATE TABLE `{0}` ( + "authorprefs": """CREATE TABLE `{0}` ( `uid` int(11) NOT NULL DEFAULT '0', `newreviews` tinyint(1) NOT NULL DEFAULT '0', `newrespond` tinyint(1) NOT NULL DEFAULT '0', @@ -43,8 +40,7 @@ `stories` int(11) NOT NULL DEFAULT '0', PRIMARY KEY (`uid`) ) ENGINE = MyISAM""", - 'authors': - """CREATE TABLE `{0}` ( + "authors": """CREATE TABLE `{0}` ( `uid` int(11) NOT NULL AUTO_INCREMENT, `penname` varchar(200) NOT NULL DEFAULT '', `realname` varchar(200) NOT NULL DEFAULT '', @@ -57,8 +53,7 @@ `password` varchar(40) NOT NULL DEFAULT '0', PRIMARY KEY (`uid`) ) ENGINE=MyISAM;""", - 'blocks': - """CREATE TABLE `{0}` ( + "blocks": """CREATE TABLE `{0}` ( `block_id` int(11) NOT NULL AUTO_INCREMENT, `block_name` varchar(30) NOT NULL DEFAULT '', `block_title` varchar(150) NOT NULL DEFAULT '', @@ -68,8 +63,7 @@ PRIMARY KEY (`block_id`), KEY `block_name` (`block_name`) ) ENGINE = MyISAM;""", - 'categories': - """CREATE TABLE `{0}` ( + "categories": """CREATE TABLE `{0}` ( `catid` int(11) NOT NULL AUTO_INCREMENT, `parentcatid` int(11) NOT NULL DEFAULT -1, `category` varchar(60) NOT NULL DEFAULT '', @@ -81,8 +75,7 @@ `numitems` int(11) NOT NULL DEFAULT 0, PRIMARY KEY (`catid`) ) ENGINE=MyISAM;""", - 'challenges': - """CREATE TABLE `{0}` ( + "challenges": """CREATE TABLE `{0}` ( `chalid` int(11) NOT NULL AUTO_INCREMENT, `challenger` varchar(200) NOT NULL DEFAULT '', `uid` int(11) NOT NULL DEFAULT 0, @@ -93,8 +86,7 @@ `responses` int(11) NOT NULL DEFAULT 0, PRIMARY KEY (`chalid`) ) ENGINE=MyISAM;""", - 'chapters': - """CREATE TABLE `{0}` ( + "chapters": """CREATE TABLE `{0}` ( `chapid` int(11) NOT NULL AUTO_INCREMENT, `title` varchar(250) NOT NULL DEFAULT '', `inorder` int(11) NOT NULL DEFAULT 0, @@ -110,8 +102,7 @@ `count` int(11) NOT NULL DEFAULT 0, PRIMARY KEY (`chapid`) ) ENGINE=MyISAM;""", - 'characters': - """CREATE TABLE `{0}` ( + "characters": """CREATE TABLE `{0}` ( `charid` int(11) NOT NULL AUTO_INCREMENT, `catid` int(11) NOT NULL DEFAULT 0, `charname` varchar(60) NOT NULL DEFAULT '', @@ -119,28 +110,24 @@ `image` varchar(200) NOT NULL DEFAULT '', PRIMARY KEY (`charid`) ) ENGINE=MyISAM;""", - 'classes': - """CREATE TABLE `{0}` ( + "classes": """CREATE TABLE `{0}` ( `class_id` int(11) NOT NULL AUTO_INCREMENT, `class_type` int(11) NOT NULL DEFAULT 0, `class_name` varchar(100) NOT NULL DEFAULT '', PRIMARY KEY (`class_id`) ) ENGINE=MyISAM;""", - 'classtypes': - """CREATE TABLE `{0}` ( + "classtypes": """CREATE TABLE `{0}` ( `classtype_id` int(11) NOT NULL AUTO_INCREMENT, `classtype_name` varchar(50) NOT NULL DEFAULT '', `classtype_title` varchar(50) NOT NULL DEFAULT '', PRIMARY KEY (`classtype_id`) ) ENGINE=MyISAM;""", - 'coauthors': - """CREATE TABLE `{0}` ( + "coauthors": """CREATE TABLE `{0}` ( `sid` int(11) NOT NULL DEFAULT 0, `uid` int(11) NOT NULL DEFAULT 0, PRIMARY KEY (`sid`,`uid`) ) ENGINE=MyISAM;""", - 'codeblocks': - """CREATE TABLE `{0}` ( + "codeblocks": """CREATE TABLE `{0}` ( `code_id` int(11) NOT NULL AUTO_INCREMENT, `code_text` text NOT NULL, `code_type` varchar(20) DEFAULT NULL, @@ -149,8 +136,7 @@ KEY `code_type` (`code_type`) ) ENGINE = MyISAM; """, - 'comments': - """CREATE TABLE `{0}` ( + "comments": """CREATE TABLE `{0}` ( `cid` int(11) NOT NULL AUTO_INCREMENT, `nid` int(11) NOT NULL DEFAULT '0', `uid` int(11) NOT NULL DEFAULT '0', @@ -159,8 +145,7 @@ PRIMARY KEY (`cid`), KEY `commentlist` (`nid`, `time`) ) ENGINE = MyISAM;""", - 'favorites': - """CREATE TABLE `{0}` ( + "favorites": """CREATE TABLE `{0}` ( `uid` int(11) NOT NULL DEFAULT '0', `item` int(11) NOT NULL DEFAULT '0', `type` char(2) NOT NULL DEFAULT '', @@ -168,8 +153,7 @@ UNIQUE KEY `byitem` (`item`, `type`, `uid`), UNIQUE KEY `byuid` (`uid`, `type`, `item`) ) ENGINE = MyISAM;""", - 'inseries': - """CREATE TABLE `{0}` ( + "inseries": """CREATE TABLE `{0}` ( `seriesid` int(11) NOT NULL DEFAULT 0, `sid` int(11) NOT NULL DEFAULT 0, `subseriesid` int(11) NOT NULL DEFAULT 0, @@ -178,8 +162,7 @@ PRIMARY KEY (`sid`,`seriesid`,`subseriesid`), KEY `seriesid` (`seriesid`,`inorder`) ) ENGINE=MyISAM;""", - 'log': - """CREATE TABLE `{0}` ( + "log": """CREATE TABLE `{0}` ( `log_id` int(11) NOT NULL AUTO_INCREMENT, `log_action` varchar(255) DEFAULT NULL, `log_uid` int(11) NOT NULL, @@ -188,8 +171,7 @@ `log_type` varchar(2) NOT NULL, PRIMARY KEY (`log_id`) ) ENGINE = MyISAM;""", - 'messages': - """CREATE TABLE `{0}` ( + "messages": """CREATE TABLE `{0}` ( `message_id` int(11) NOT NULL AUTO_INCREMENT, `message_name` varchar(50) NOT NULL DEFAULT '', `message_title` varchar(200) NOT NULL DEFAULT '', @@ -197,15 +179,14 @@ PRIMARY KEY (`message_id`), KEY `message_name` (`message_name`) ) ENGINE = MyISAM;""", - 'modules': - """CREATE TABLE `{0}` ( + "modules": """CREATE TABLE `{0}` ( `id` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(100) CHARACTER SET latin1 COLLATE latin1_general_ci NOT NULL DEFAULT 'Test Module', `version` varchar(10) CHARACTER SET latin1 COLLATE latin1_general_ci NOT NULL DEFAULT '1.0', PRIMARY KEY (`id`), KEY `name_version` (`name`, `version`) ) ENGINE = MyISAM;""", - 'news': """CREATE TABLE `{0}` ( + "news": """CREATE TABLE `{0}` ( `nid` int(11) NOT NULL AUTO_INCREMENT, `author` varchar(60) NOT NULL DEFAULT '', `title` varchar(255) NOT NULL DEFAULT '', @@ -214,8 +195,7 @@ `comments` int(11) NOT NULL DEFAULT '0', PRIMARY KEY (`nid`) ) ENGINE = MyISAM;""", - 'pagelinks': - """CREATE TABLE `{0}` ( + "pagelinks": """CREATE TABLE `{0}` ( `link_id` int(11) NOT NULL AUTO_INCREMENT, `link_name` varchar(50) NOT NULL DEFAULT '', `link_text` varchar(100) NOT NULL DEFAULT '', @@ -226,8 +206,7 @@ PRIMARY KEY (`link_id`), KEY `link_name` (`link_name`) ) ENGINE = MyISAM;""", - 'panels': - """CREATE TABLE `{0}` ( + "panels": """CREATE TABLE `{0}` ( `panel_id` int(11) NOT NULL AUTO_INCREMENT, `panel_name` varchar(50) NOT NULL DEFAULT 'unknown', `panel_title` varchar(100) NOT NULL DEFAULT 'Unnamed Panel', @@ -239,15 +218,14 @@ PRIMARY KEY (`panel_id`), KEY `panel_type` (`panel_type`, `panel_name`) ) ENGINE = MyISAM;""", - 'ratings': - """CREATE TABLE `{0}` ( + "ratings": """CREATE TABLE `{0}` ( `rid` int(11) NOT NULL AUTO_INCREMENT, `rating` varchar(60) NOT NULL DEFAULT '', `ratingwarning` char(1) NOT NULL DEFAULT '0', `warningtext` text NOT NULL, PRIMARY KEY (`rid`) ) ENGINE=MyISAM;""", - 'reviews': """CREATE TABLE `{0}` ( + "reviews": """CREATE TABLE `{0}` ( `reviewid` int(11) NOT NULL AUTO_INCREMENT, `item` int(11) NOT NULL DEFAULT '0', `chapid` int(11) NOT NULL DEFAULT '0', @@ -266,7 +244,7 @@ KEY `bychapter` (`chapid`, `rating`), KEY `byuid` (`uid`, `item`, `type`) ) ENGINE = MyISAM;""", - 'series': """CREATE TABLE `{0}` ( + "series": """CREATE TABLE `{0}` ( `seriesid` int(11) NOT NULL AUTO_INCREMENT, `title` varchar(200) NOT NULL DEFAULT '', `summary` text DEFAULT NULL, @@ -283,7 +261,7 @@ `numstories` int(11) NOT NULL DEFAULT 0, PRIMARY KEY (`seriesid`) ) ENGINE=MyISAM;""", - 'settings': """CREATE TABLE `{0}` ( + "settings": """CREATE TABLE `{0}` ( `sitekey` varchar(50) NOT NULL DEFAULT '1', `sitename` varchar(200) NOT NULL DEFAULT 'Your Site', `slogan` varchar(200) NOT NULL DEFAULT 'It''s a cool site!', @@ -341,8 +319,7 @@ `smtp_password` varchar(50) DEFAULT NULL, PRIMARY KEY (`sitekey`) ) ENGINE = MyISAM;""", - 'stats': - """CREATE TABLE `{0}` ( + "stats": """CREATE TABLE `{0}` ( `sitekey` varchar(50) NOT NULL DEFAULT '0', `stories` int(11) NOT NULL DEFAULT '0', `chapters` int(11) NOT NULL DEFAULT '0', @@ -354,8 +331,7 @@ `reviewers` int(11) NOT NULL DEFAULT '0', `newestmember` int(11) NOT NULL DEFAULT '0' ) ENGINE = MyISAM;""", - 'stories': - """CREATE TABLE `{0}` ( + "stories": """CREATE TABLE `{0}` ( `sid` int(11) NOT NULL AUTO_INCREMENT, `title` varchar(200) NOT NULL DEFAULT '', `summary` text, @@ -379,7 +355,7 @@ `challenges` varchar(200) NOT NULL DEFAULT '0', PRIMARY KEY (`sid`) ) ENGINE=MyISAM; - """ + """, } @@ -389,7 +365,7 @@ def create_def(table_name): :param table_name: the original table name (eg: fanfiction_stories) :return: the DROP and CREATE statements for this table """ - key = re.sub(r'\S+_', '', table_name) + key = re.sub(r"\S+_", "", table_name) if key: drop_table_def = f"\nDROP TABLE IF EXISTS `{table_name}`;" create_table_def = table_definitions[key].format(table_name) diff --git a/efiction/metadata.py b/efiction/metadata.py index ae277d3..85d1221 100644 --- a/efiction/metadata.py +++ b/efiction/metadata.py @@ -6,8 +6,18 @@ from efiction.tag_converter import TagConverter from opendoors.mysql import SqlDb -from opendoors.sql_utils import parse_remove_comments, write_statements_to_file, add_create_database -from opendoors.utils import print_progress, get_full_path, normalize, key_find, get_prefixed_path +from opendoors.sql_utils import ( + parse_remove_comments, + write_statements_to_file, + add_create_database, +) +from opendoors.utils import ( + print_progress, + get_full_path, + normalize, + key_find, + get_prefixed_path, +) from opendoors.thread_pool import ThreadedPool from opendoors.big_insert import BigInsert @@ -17,23 +27,28 @@ class EFictionMetadata: Create and populate Open Doors tables and extract tags to a separate table for later export to Tag Wrangling. """ - def __init__(self, config: ConfigParser, logger: Logger, sql: SqlDb, step_path: str): + def __init__( + self, config: ConfigParser, logger: Logger, sql: SqlDb, step_path: str + ): self.config = config self.sql = sql self.logger = logger - self.working_original = self.config['Processing']['simplified_original_db'] if self.config.has_option( - 'Processing', 'simplified_original_db') else None + self.working_original = ( + self.config["Processing"]["simplified_original_db"] + if self.config.has_option("Processing", "simplified_original_db") + else None + ) self.create_open_doors_db(step_path) - self.working_open_doors = self.config['Processing']['open_doors_working_db'] + self.working_open_doors = self.config["Processing"]["open_doors_working_db"] self.tag_converter = TagConverter(config, logger, sql) self.authors = [] self.tag_tables = { - 'rating': None, - 'categories': None, - 'warnings': None, - 'classes': None, - 'genres': None, - 'characters': None + "rating": None, + "categories": None, + "warnings": None, + "classes": None, + "genres": None, + "characters": None, } self.tag_tables_is_nonstandard = {} @@ -43,16 +58,22 @@ def create_open_doors_db(self, step_path): :param step_path: Path for the current step, where the database backup will be saved. :return: True if successful """ - od_table_sql_file = get_full_path('opendoors/open-doors-tables-working.sql') - self.config['Processing']['open_doors_working_db'] = \ - f"{self.config['Archive']['code_name']}_working_open_doors" - self.config['Processing']['open_doors_working_db_file'] = \ - get_prefixed_path("03", step_path, f"{self.config['Processing']['open_doors_working_db']}.sql") + od_table_sql_file = get_full_path("opendoors/open-doors-tables-working.sql") + self.config["Processing"][ + "open_doors_working_db" + ] = f"{self.config['Archive']['code_name']}_working_open_doors" + self.config["Processing"]["open_doors_working_db_file"] = get_prefixed_path( + "03", step_path, f"{self.config['Processing']['open_doors_working_db']}.sql" + ) with open(od_table_sql_file, "r") as f: od_table_defs = parse_remove_comments(f.read()) - statements = add_create_database(self.config['Processing']['open_doors_working_db'], od_table_defs) - od_tables = write_statements_to_file(self.config['Processing']['open_doors_working_db_file'], statements) + statements = add_create_database( + self.config["Processing"]["open_doors_working_db"], od_table_defs + ) + od_tables = write_statements_to_file( + self.config["Processing"]["open_doors_working_db_file"], statements + ) self.sql.load_sql_file_into_db(od_tables) return True @@ -70,27 +91,27 @@ def generate_email(name: str, email: str, archive_long_name: str): return email.strip() else: user = name.title() + archive_long_name.title() - return re.sub(r'\W+', '', unidecode(user)) + 'Archive@ao3.org' + return re.sub(r"\W+", "", unidecode(user)) + "Archive@ao3.org" def _convert_authors(self, old_authors=None): self.logger.info("Converting authors...") current = 0 total = len(old_authors) insert_op = BigInsert( - self.working_open_doors, - "authors", - ["id", "name", "email"], - self.sql - ) + self.working_open_doors, "authors", ["id", "name", "email"], self.sql + ) for old_author in old_authors: # convert all old_authors into rows in the insert operation new_author = { - 'id': old_author['uid'], - 'name': old_author['penname'], - 'email': self.generate_email(old_author['penname'], old_author['email'], - self.config['Archive']['archive_name']) + "id": old_author["uid"], + "name": old_author["penname"], + "email": self.generate_email( + old_author["penname"], + old_author["email"], + self.config["Archive"]["archive_name"], + ), } - insert_op.addRow(new_author['id'], new_author["name"], new_author["email"]) + insert_op.addRow(new_author["id"], new_author["name"], new_author["email"]) current = print_progress(current, total, "authors converted") insert_op.send() return self.sql.read_table_to_dict(self.working_open_doors, "authors") @@ -101,51 +122,71 @@ def _convert_characters(self, old_characters): self.working_open_doors, "tags", ["original_tagid", "original_tag", "original_type", "original_parent"], - self.sql + self.sql, ) - if self.tag_tables['categories'] is None: - self.tag_tables['categories'] = self.tag_converter.convert_categories() + if self.tag_tables["categories"] is None: + self.tag_tables["categories"] = self.tag_converter.convert_categories() for old_character in old_characters: - parent = [ct['original_tag'] for ct in self.tag_tables['categories'] if ct['original_tagid'] == old_character['catid']] + parent = [ + ct["original_tag"] + for ct in self.tag_tables["categories"] + if ct["original_tagid"] == old_character["catid"] + ] new_tag = { - 'id': old_character['charid'], - 'name': old_character['charname'], - 'parent': ", ".join(parent) if parent != [] else "" + "id": old_character["charid"], + "name": old_character["charname"], + "parent": ", ".join(parent) if parent != [] else "", } - insert_op.addRow(new_tag["id"], new_tag["name"], "character", new_tag["parent"]) + insert_op.addRow( + new_tag["id"], new_tag["name"], "character", new_tag["parent"] + ) current = print_progress(current, total, "characters converted") insert_op.send() - return self.sql.execute_and_fetchall(self.working_open_doors, - "SELECT * FROM tags WHERE `original_type` = 'character'") + return self.sql.execute_and_fetchall( + self.working_open_doors, + "SELECT * FROM tags WHERE `original_type` = 'character'", + ) def _convert_story_tag_table(self, table_name, old_tags): # Standard for tag table to be organized by id. - original_tagid = 'original_tagid' + original_tagid = "original_tagid" if self.tag_table_is_nonstandard[table_name]: # Tag table identified by name rather than id. - original_tagid = 'original_tag' - return [c['id'] for c in self.tag_tables[table_name] if str(c[original_tagid]) in old_tags[table_name]] + original_tagid = "original_tag" + return [ + c["id"] + for c in self.tag_tables[table_name] + if str(c[original_tagid]) in old_tags[table_name] + ] def _convert_story_tags(self, old_story): old_tags = { - 'rating': [t.strip() for t in key_find('rid', old_story, '').split(',')], - 'categories': [t.strip() for t in key_find('catid', old_story, '').split(',')], - 'warnings': [t.strip() for t in key_find('wid', old_story, '').split(',')], - 'classes': [t.strip() for t in key_find('classes', old_story, '').split(',')], - 'genres': [t.strip() for t in key_find('gid', old_story, '').split(',')], - 'characters': [t.strip() for t in key_find('charid', old_story, '').split(',')], + "rating": [t.strip() for t in key_find("rid", old_story, "").split(",")], + "categories": [ + t.strip() for t in key_find("catid", old_story, "").split(",") + ], + "warnings": [t.strip() for t in key_find("wid", old_story, "").split(",")], + "classes": [ + t.strip() for t in key_find("classes", old_story, "").split(",") + ], + "genres": [t.strip() for t in key_find("gid", old_story, "").split(",")], + "characters": [ + t.strip() for t in key_find("charid", old_story, "").split(",") + ], } new_tags = {} for tag_table_name in self.tag_tables.keys(): - new_tags[tag_table_name] = self._convert_story_tag_table(tag_table_name, old_tags) + new_tags[tag_table_name] = self._convert_story_tag_table( + tag_table_name, old_tags + ) return { - 'rating': new_tags['rating'], - 'categories': new_tags['categories'] + new_tags['warnings'], - 'classes': new_tags['classes'] + new_tags['genres'], - 'characters': new_tags['characters'] + "rating": new_tags["rating"], + "categories": new_tags["categories"] + new_tags["warnings"], + "classes": new_tags["classes"] + new_tags["genres"], + "characters": new_tags["characters"], } def _convert_tags_join(self, new_story, tags, sql=None): @@ -181,7 +222,7 @@ def fetch_coauthors(self, new_story, sql=None): # We only try to operate on this result if it is not None if authors: for author in authors: - coauthors.append(author['uid']) + coauthors.append(author["uid"]) return coauthors def convert_stories(self, language_code): @@ -192,22 +233,25 @@ def convert_stories(self, language_code): :return: The Open Doors stories table as a dict. """ self.logger.info("Converting stories...") - old_stories, current, total = self.sql.read_table_with_total(self.working_original, "stories") + old_stories, current, total = self.sql.read_table_with_total( + self.working_original, "stories" + ) + def story_processor(old_story): """ - Lambda-esque function that clones connection to database and + Lambda-esque function that clones connection to database and fully converts the given story :param old_story: Original story read from the database """ sql = self.sql.get_another_connection() new_story = { - 'id': old_story['sid'], - 'title': key_find('title', old_story, '').strip(), - 'summary': normalize(old_story['summary']), - 'notes': key_find('storynotes', old_story, '').strip(), - 'date': str(old_story['date']), - 'updated': str(old_story['updated']), - 'language_code': language_code + "id": old_story["sid"], + "title": key_find("title", old_story, "").strip(), + "summary": normalize(old_story["summary"]), + "notes": key_find("storynotes", old_story, "").strip(), + "date": str(old_story["date"]), + "updated": str(old_story["updated"]), + "language_code": language_code, } self.logger.debug(f"Converting story metadata for '{new_story['title']}'") @@ -224,41 +268,49 @@ def story_processor(old_story): self._convert_tags_join(new_story, tags, sql) self.logger.debug(" authors...") - self._convert_author_join(new_story, old_story['uid'], sql) + self._convert_author_join(new_story, old_story["uid"], sql) # Find if there are any coauthors for the work coauthors = self.fetch_coauthors(new_story, sql) for coauthor in coauthors: self._convert_author_join(new_story, coauthor, sql) - + # if you give to little threads, it will be too slow, # if you give too much - it will fail while attempting to connect to db # (mysql does not like 200 connections); 20 seems to be a nice balance pool = ThreadedPool(20) pool.map(story_processor, [[x] for x in old_stories]) - return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM stories") + return self.sql.execute_and_fetchall( + self.working_open_doors, "SELECT * FROM stories" + ) def convert_all_tags(self): """ Extract all tags by category. """ - self.tag_tables['rating'] = self.tag_converter.convert_ratings() - self.tag_tables['categories'] = self.tag_converter.convert_categories() - self.tag_tables['warnings'] = self.tag_converter.convert_warnings() - self.tag_tables['classes'] = self.tag_converter.convert_classes() - self.tag_tables['genres'] = self.tag_converter.convert_genres() - - old_characters = self.sql.read_table_to_dict(self.working_original, "characters") - self.tag_tables['characters'] = self._convert_characters(old_characters) + self.tag_tables["rating"] = self.tag_converter.convert_ratings() + self.tag_tables["categories"] = self.tag_converter.convert_categories() + self.tag_tables["warnings"] = self.tag_converter.convert_warnings() + self.tag_tables["classes"] = self.tag_converter.convert_classes() + self.tag_tables["genres"] = self.tag_converter.convert_genres() + + old_characters = self.sql.read_table_to_dict( + self.working_original, "characters" + ) + self.tag_tables["characters"] = self._convert_characters(old_characters) - self.tag_table_is_nonstandard = self.tag_converter.check_for_nonstandard_tag_tables() + self.tag_table_is_nonstandard = ( + self.tag_converter.check_for_nonstandard_tag_tables() + ) def convert_original_to_open_doors(self, step_path: str): """ Convert eFiction tables to Open Doors tables :return: True if the process was successful """ - self.logger.info("Converting metadata tables from eFiction to Open Doors structure...") + self.logger.info( + "Converting metadata tables from eFiction to Open Doors structure..." + ) self.convert_all_tags() @@ -266,15 +318,19 @@ def convert_original_to_open_doors(self, step_path: str): self.authors = self._convert_authors(old_authors) # Prompt for original db file if we don't already have it in the config - if not self.config.has_option('Archive', 'language_code'): - language = input("Two-letter Language code of the stories in this archive (default: en - press enter):\n>> ") - self.config['Archive']['language_code'] = language + if not self.config.has_option("Archive", "language_code"): + language = input( + "Two-letter Language code of the stories in this archive (default: en - press enter):\n>> " + ) + self.config["Archive"]["language_code"] = language else: - language = self.config['Archive']['language_code'] + language = self.config["Archive"]["language_code"] self.convert_stories(language) - database_dump = get_prefixed_path("03", step_path, f"{self.working_open_doors}_without_chapters.sql") + database_dump = get_prefixed_path( + "03", step_path, f"{self.working_open_doors}_without_chapters.sql" + ) self.logger.info(f"Exporting converted tables to {database_dump}...") self.sql.dump_database(self.working_open_doors, database_dump) return True diff --git a/efiction/original.py b/efiction/original.py index d2fff47..8ae56bc 100644 --- a/efiction/original.py +++ b/efiction/original.py @@ -6,8 +6,18 @@ from efiction.eFiction_table_defs import create_def from opendoors.mysql import SqlDb -from opendoors.sql_utils import write_statements_to_file, parse_remove_comments, group_by_table, add_create_database -from opendoors.utils import copy_to_dir, check_if_file_exists, get_full_path, get_prefixed_path +from opendoors.sql_utils import ( + write_statements_to_file, + parse_remove_comments, + group_by_table, + add_create_database, +) +from opendoors.utils import ( + copy_to_dir, + check_if_file_exists, + get_full_path, + get_prefixed_path, +) class EFictionOriginal: @@ -20,7 +30,7 @@ def __init__(self, config: ConfigParser, logger: Logger, sql: SqlDb): self.sql = sql self.config = config self.logger = logger - self.code_name = config['Archive']['code_name'] + self.code_name = config["Archive"]["code_name"] self.edited_db_name = f"{self.code_name}_efiction_original" self.edited_file_name = f"{self.code_name}_efiction_original_edited.sql" @@ -30,7 +40,9 @@ def _contains_table_defs(grouped_statements): Does the provided list of statements contain create table statements? :return: True or False """ - statements = [item for sublist in grouped_statements.values() for item in sublist] + statements = [ + item for sublist in grouped_statements.values() for item in sublist + ] return any(str.lower(elem).startswith("create table") for elem in statements) def __backup_original(self): @@ -39,21 +51,27 @@ def __backup_original(self): :return: the path to the backup file """ # Prompt for original db file if we don't already have it in the config - if not (self.config.has_option('Archive', 'original_db_file_path')) or \ - self.config['Archive']['original_db_file_path'] == "": - path_to_original_db = input("Full path to the original database file " - "(this will be copied into the working path and loaded into MySQL):\n>> ") - self.config['Archive']['original_db_file_path'] = path_to_original_db + if ( + not (self.config.has_option("Archive", "original_db_file_path")) + or self.config["Archive"]["original_db_file_path"] == "" + ): + path_to_original_db = input( + "Full path to the original database file " + "(this will be copied into the working path and loaded into MySQL):\n>> " + ) + self.config["Archive"]["original_db_file_path"] = path_to_original_db # Return the existing file or create a backup of the original db dump - if check_if_file_exists(self.config, 'Processing', 'backup_file'): - backup_file = self.config['Processing']['backup_file'] + if check_if_file_exists(self.config, "Processing", "backup_file"): + backup_file = self.config["Processing"]["backup_file"] self.logger.info("Using backup file {}".format(backup_file)) else: - backup_file = copy_to_dir(old_file_path=self.config['Archive']['original_db_file_path'], - new_file_dir=self.config['Processing']['working_dir'], - new_file_name=f"{self.code_name}_original_db_backup.sql") - self.config['Processing']['backup_file'] = backup_file + backup_file = copy_to_dir( + old_file_path=self.config["Archive"]["original_db_file_path"], + new_file_dir=self.config["Processing"]["working_dir"], + new_file_name=f"{self.code_name}_original_db_backup.sql", + ) + self.config["Processing"]["backup_file"] = backup_file self.logger.info(f"Created backup of original file at {backup_file}") return backup_file @@ -67,7 +85,7 @@ def _add_table_definitions(self, statements: list): grouped_statements = group_by_table(statements) if not self._contains_table_defs(grouped_statements): new_grouped_statements = {} - for (table_name, statements) in grouped_statements.items(): + for table_name, statements in grouped_statements.items(): new_grouped_statements[table_name] = create_def(table_name) + statements groups = new_grouped_statements else: @@ -79,10 +97,16 @@ def __add_definitions(self): Remove comments and if needed, add table definitions to the original eFiction database :param step_path: the destination path for the file backup """ - self.logger.info("\nAdding table definitions and removing comments from original eFiction database") + self.logger.info( + "\nAdding table definitions and removing comments from original eFiction database" + ) self.logger.info("...adding table definitions and tidying original db dump...") - with open(get_full_path(self.config['Processing']['backup_file']), "r", encoding="utf-8") as f: + with open( + get_full_path(self.config["Processing"]["backup_file"]), + "r", + encoding="utf-8", + ) as f: original_db_sql = f.read() clean_statements = parse_remove_comments(original_db_sql) statements_with_defs = self._add_table_definitions(clean_statements) @@ -97,10 +121,14 @@ def __load_into_database(self, step_path, statements): """ self.logger.info("...writing edited SQL statements to a backup file...") edited_file_path = get_prefixed_path("01", step_path, self.edited_file_name) - self.config['Processing']['original_edited_file'] = edited_file_path - edited_file = write_statements_to_file(self.config['Processing']['original_edited_file'], statements) + self.config["Processing"]["original_edited_file"] = edited_file_path + edited_file = write_statements_to_file( + self.config["Processing"]["original_edited_file"], statements + ) - self.logger.info("...removing any existing edited original database in MySQL...") + self.logger.info( + "...removing any existing edited original database in MySQL..." + ) self.sql.drop_database(self.edited_db_name) self.logger.info("...loading edited original database into MySQL...") diff --git a/efiction/simplified.py b/efiction/simplified.py index d3b0d53..6c14de0 100644 --- a/efiction/simplified.py +++ b/efiction/simplified.py @@ -3,7 +3,12 @@ from logging import Logger from opendoors.mysql import SqlDb -from opendoors.sql_utils import group_by_table, add_create_database, parse_remove_comments, write_statements_to_file +from opendoors.sql_utils import ( + group_by_table, + add_create_database, + parse_remove_comments, + write_statements_to_file, +) from opendoors.utils import get_full_path, get_prefixed_path @@ -14,14 +19,27 @@ class EFictionSimplified: # EFiction names its tables as xxxxx_tablename and we only need to keep a few to process the archive tables_to_keep = ( - "authors", "categories", "challenges", "chapters", "characters", "classes", "classtypes", "coauthors", - "inseries", "ratings", "series", "stories", "warnings", "genres") + "authors", + "categories", + "challenges", + "chapters", + "characters", + "classes", + "classtypes", + "coauthors", + "inseries", + "ratings", + "series", + "stories", + "warnings", + "genres", + ) def __init__(self, config: ConfigParser, logger: Logger, sql: SqlDb): self.sql = sql self.config = config self.logger = logger - self.code_name = config['Archive']['code_name'] + self.code_name = config["Archive"]["code_name"] self.simplified_db_name = f"{self.code_name}_efiction_original_simplified" self.simplified_file_name = f"{self.code_name}_efiction_original_simplified.sql" @@ -35,7 +53,7 @@ def __is_table_to_keep(self, table_name: str): @staticmethod def __strip_prefix(table_name: str, statements: list): - new_name = re.sub(r'\S+_', '', table_name) + new_name = re.sub(r"\S+_", "", table_name) new_statements = [item.replace(table_name, new_name) for item in statements] return new_name, new_statements @@ -50,7 +68,18 @@ def _remove_unwanted_tables(self, statements): if self.__is_table_to_keep(k): key, value = self.__strip_prefix(k, v) grouped_statements[key] = value - elif k in ["", "lock", "unlock", "alter", "commit", "drop", "create", "use", "set", "start"]: + elif k in [ + "", + "lock", + "unlock", + "alter", + "commit", + "drop", + "create", + "use", + "set", + "start", + ]: # Ignore some side-effects of the table extraction pass else: @@ -68,15 +97,23 @@ def __simplify_and_load_statements(self, statements, step_path): grouped_statements = self._remove_unwanted_tables(statements) # Flatten grouped SQL statements and add CREATE DATABASE statement - output_filename = get_prefixed_path("02", step_path, f"{self.simplified_db_name}.sql") - flattened_statements = [item for sublist in list(grouped_statements.values()) for item in sublist] - final_statements = add_create_database(self.simplified_db_name, flattened_statements) + output_filename = get_prefixed_path( + "02", step_path, f"{self.simplified_db_name}.sql" + ) + flattened_statements = [ + item for sublist in list(grouped_statements.values()) for item in sublist + ] + final_statements = add_create_database( + self.simplified_db_name, flattened_statements + ) self.logger.info("...writing simplified original tables to file...") - self.config['Processing']['simplified_original_db'] = self.simplified_db_name + self.config["Processing"]["simplified_original_db"] = self.simplified_db_name step01_working_db = write_statements_to_file(output_filename, final_statements) - self.logger.info("...removing any existing simplified original database in MySQL...") + self.logger.info( + "...removing any existing simplified original database in MySQL..." + ) self.sql.drop_database(self.simplified_db_name) self.logger.info("...loading simplified original tables into MySQL...") @@ -89,8 +126,14 @@ def simplify_original_file(self, step_path) -> bool: :return: True if nothing went wrong """ self.logger.info("\nProcessing edited original eFiction database...") - with open(get_full_path(self.config['Processing']['original_edited_file']), "r", encoding="utf-8") as f: + with open( + get_full_path(self.config["Processing"]["original_edited_file"]), + "r", + encoding="utf-8", + ) as f: statements = f.read() - self.__simplify_and_load_statements(parse_remove_comments(statements), step_path) + self.__simplify_and_load_statements( + parse_remove_comments(statements), step_path + ) return True diff --git a/efiction/tag_converter.py b/efiction/tag_converter.py index e328ce8..fc078a9 100644 --- a/efiction/tag_converter.py +++ b/efiction/tag_converter.py @@ -12,8 +12,8 @@ def __init__(self, config: ConfigParser, logger: Logger, sql: SqlDb): self.logger = logger self.config = config self.sql = sql - self.working_original = self.config['Processing']['simplified_original_db'] - self.working_open_doors = self.config['Processing']['open_doors_working_db'] + self.working_original = self.config["Processing"]["simplified_original_db"] + self.working_open_doors = self.config["Processing"]["open_doors_working_db"] def check_for_nonstandard_tag_tables(self) -> bool: """ @@ -24,34 +24,47 @@ def check_for_nonstandard_tag_tables(self) -> bool: tag_tables = {} - for tag_table_name in ['rating', 'categories', 'warnings', 'classes', 'genres', 'characters']: - - if tag_table_name == 'rating': + for tag_table_name in [ + "rating", + "categories", + "warnings", + "classes", + "genres", + "characters", + ]: + if tag_table_name == "rating": # Only one rating per story, so story rating should be single number # that exactly matches rating id query = "SELECT count(*) as cnt FROM stories WHERE rid NOT IN (SELECT rid FROM ratings);" - count: List[Dict[str, int]] = self.sql.execute_and_fetchall(self.working_original, query) - tag_tables['rating'] = bool(count and count[0]['cnt'] > 0) + count: List[Dict[str, int]] = self.sql.execute_and_fetchall( + self.working_original, query + ) + tag_tables["rating"] = bool(count and count[0]["cnt"] > 0) else: # Rough check: ensure all identifiers for tag table are integers - if tag_table_name == 'categories': - id_name = 'catid' - elif tag_table_name == 'warnings': - id_name = 'wid' - elif tag_table_name == 'classes': - id_name = 'classes' - elif tag_table_name == 'genres': - id_name = 'gid' - elif tag_table_name == 'characters': - id_name = 'charid' + if tag_table_name == "categories": + id_name = "catid" + elif tag_table_name == "warnings": + id_name = "wid" + elif tag_table_name == "classes": + id_name = "classes" + elif tag_table_name == "genres": + id_name = "gid" + elif tag_table_name == "characters": + id_name = "charid" try: query = f"SELECT {id_name} FROM stories;" tags = self.sql.execute_and_fetchall(self.working_original, query) try: - tags = list(map(lambda story_tags: story_tags[id_name].replace(',', ''), tags)) - int(''.join(tags)) + tags = list( + map( + lambda story_tags: story_tags[id_name].replace(",", ""), + tags, + ) + ) + int("".join(tags)) tag_tables[tag_table_name] = False except Exception: # Non-integer in identifier @@ -63,18 +76,19 @@ def check_for_nonstandard_tag_tables(self) -> bool: return tag_tables - def convert_ratings(self): """ Convert the eFiction ratings table to Open Doors tags. :return: Open Doors tags with the original type "rating" """ - old_ratings, current, total = self.sql.read_table_with_total(self.working_original, "ratings") + old_ratings, current, total = self.sql.read_table_with_total( + self.working_original, "ratings" + ) for old_rating in old_ratings: new_rating = { - 'id': old_rating['rid'], - 'name': old_rating['rating'], - 'description': old_rating['warningtext'] + "id": old_rating["rid"], + "name": old_rating["rating"], + "description": old_rating["warningtext"], } query = f""" INSERT INTO tags (`original_tagid`, `original_tag`, `original_type`, `original_description`) @@ -82,22 +96,30 @@ def convert_ratings(self): """ self.sql.execute(self.working_open_doors, query) current = print_progress(current, total, "ratings converted") - return self.sql.execute_and_fetchall(self.working_open_doors, - "SELECT * FROM tags WHERE `original_type` = 'rating';") + return self.sql.execute_and_fetchall( + self.working_open_doors, + "SELECT * FROM tags WHERE `original_type` = 'rating';", + ) def convert_categories(self): """ Convert the eFiction categories table to Open Doors tags. :return: Open Doors tags with the original type "categories" """ - old_categories, current, total = self.sql.read_table_with_total(self.working_original, "categories") + old_categories, current, total = self.sql.read_table_with_total( + self.working_original, "categories" + ) for old_category in old_categories: - parent = [cat['category'] for cat in old_categories if cat['catid'] == old_category['parentcatid']] + parent = [ + cat["category"] + for cat in old_categories + if cat["catid"] == old_category["parentcatid"] + ] new_tag = { - 'id': old_category['catid'], - 'parent': ", ".join(parent) if parent != [] else "", - 'name': old_category['category'], - 'description': old_category['description'] + "id": old_category["catid"], + "parent": ", ".join(parent) if parent != [] else "", + "name": old_category["category"], + "description": old_category["description"], } query = f""" INSERT INTO tags @@ -106,21 +128,25 @@ def convert_categories(self): """ self.sql.execute(self.working_open_doors, query) current = print_progress(current, total, "categories converted") - return self.sql.execute_and_fetchall(self.working_open_doors, - "SELECT * FROM tags WHERE `original_type` = 'category'") + return self.sql.execute_and_fetchall( + self.working_open_doors, + "SELECT * FROM tags WHERE `original_type` = 'category'", + ) def convert_warnings(self): """ Convert the eFiction warnings table to Open Doors tags. :return: Open Doors tags with the original type "warnings" """ - old_warnings, current, total = self.sql.read_table_with_total(self.working_original, "warnings") + old_warnings, current, total = self.sql.read_table_with_total( + self.working_original, "warnings" + ) for old_warning in old_warnings: new_tag = { - 'id': old_warning['wid'], - 'parent': "", - 'name': old_warning['warning'], - 'description': old_warning['warning'] + "id": old_warning["wid"], + "parent": "", + "name": old_warning["warning"], + "description": old_warning["warning"], } query = f""" INSERT INTO tags @@ -129,22 +155,32 @@ def convert_warnings(self): """ self.sql.execute(self.working_open_doors, query) current = print_progress(current, total, "warnings converted") - return self.sql.execute_and_fetchall(self.working_open_doors, - "SELECT * FROM tags WHERE `original_type` = 'warning'") + return self.sql.execute_and_fetchall( + self.working_open_doors, + "SELECT * FROM tags WHERE `original_type` = 'warning'", + ) def convert_classes(self): """ Convert the eFiction classes table to Open Doors tags. :return: Open Doors tags with the original type "class" """ - old_classes, current, total = self.sql.read_table_with_total(self.working_original, "classes") - old_class_types, _, _ = self.sql.read_table_with_total(self.working_original, "classtypes") + old_classes, current, total = self.sql.read_table_with_total( + self.working_original, "classes" + ) + old_class_types, _, _ = self.sql.read_table_with_total( + self.working_original, "classtypes" + ) for old_class in old_classes: - parent = [ct['classtype_title'] for ct in old_class_types if ct['classtype_id'] == old_class['class_type']] + parent = [ + ct["classtype_title"] + for ct in old_class_types + if ct["classtype_id"] == old_class["class_type"] + ] new_tag = { - 'id': old_class['class_id'], - 'name': old_class['class_name'], - 'parent': ", ".join(parent) if parent != [] else "" + "id": old_class["class_id"], + "name": old_class["class_name"], + "parent": ", ".join(parent) if parent != [] else "", } query = f""" INSERT INTO tags @@ -153,21 +189,21 @@ def convert_classes(self): """ self.sql.execute(self.working_open_doors, query) current = print_progress(current, total, "classes converted") - return self.sql.execute_and_fetchall(self.working_open_doors, - "SELECT * FROM tags WHERE `original_type` = 'class'") + return self.sql.execute_and_fetchall( + self.working_open_doors, + "SELECT * FROM tags WHERE `original_type` = 'class'", + ) def convert_genres(self): """ Convert the eFiction genres table to Open Doors tags. :return: Open Doors tags with the original type "genre" """ - old_genres, current, total = self.sql.read_table_with_total(self.working_original, "genres") + old_genres, current, total = self.sql.read_table_with_total( + self.working_original, "genres" + ) for old_genre in old_genres: - new_tag = { - 'id': old_genre['gid'], - 'name': old_genre['genre'], - 'parent': "" - } + new_tag = {"id": old_genre["gid"], "name": old_genre["genre"], "parent": ""} query = f""" INSERT INTO tags (`original_tagid`, `original_tag`, `original_type`, `original_parent`) @@ -175,5 +211,7 @@ def convert_genres(self): """ self.sql.execute(self.working_open_doors, query) current = print_progress(current, total, "genres converted") - return self.sql.execute_and_fetchall(self.working_open_doors, - "SELECT * FROM tags WHERE `original_type` = 'genre'") + return self.sql.execute_and_fetchall( + self.working_open_doors, + "SELECT * FROM tags WHERE `original_type` = 'genre'", + ) diff --git a/efiction/tests/test_chapters.py b/efiction/tests/test_chapters.py index 48cd202..16bfd20 100644 --- a/efiction/tests/test_chapters.py +++ b/efiction/tests/test_chapters.py @@ -10,7 +10,11 @@ def get_data(): - with open(get_full_path("efiction/tests/test_data/efiction_chapters.json"), "r", encoding="utf-8") as file: + with open( + get_full_path("efiction/tests/test_data/efiction_chapters.json"), + "r", + encoding="utf-8", + ) as file: old_data_with_text = json.loads(file.read()) old_data_no_text = copy.deepcopy(old_data_with_text) @@ -19,8 +23,8 @@ def get_data(): for i, chap in enumerate(old_data_with_text): old_data_no_text[i]["storytext"] = "" - text = normalize(chap['storytext']) - if chap['endnotes']: + text = normalize(chap["storytext"]) + if chap["endnotes"]: text = text + f"\n\n\n
\n{chap['endnotes']}" chap_dict = { @@ -31,7 +35,7 @@ def get_data(): "date": None, "story_id": chap["sid"], "notes": chap["notes"], - "url": "" + "url": "", } new_data_full.append(chap_dict) @@ -47,73 +51,120 @@ def get_data(): test_config = ArchiveConfig(test_logger, "efiction", "efiction/tests/test_data").config -@pytest.fixture(scope='function', autouse=True) +@pytest.fixture(scope="function", autouse=True) def mock_sql(): - with patch('opendoors.mysql.SqlDb', autospec=True) as mock_Sql: + with patch("opendoors.mysql.SqlDb", autospec=True) as mock_Sql: sql = mock_Sql(test_config, test_logger) yield sql -@pytest.fixture(scope='function', autouse=True) +@pytest.fixture(scope="function", autouse=True) def mock_insert(mock_sql): - with patch('efiction.chapters.BigInsert', autospec=True) as mock_Insert: - insert = mock_Insert("efictiontest_working_open_doors", "chapters", ["id", "position", "title", "text", "story_id", "notes"], mock_sql) + with patch("efiction.chapters.BigInsert", autospec=True) as mock_Insert: + insert = mock_Insert( + "efictiontest_working_open_doors", + "chapters", + ["id", "position", "title", "text", "story_id", "notes"], + mock_sql, + ) yield insert -@patch('efiction.chapters.EFictionChapters.load_og_chapters_into_db') +@patch("efiction.chapters.EFictionChapters.load_og_chapters_into_db") def test_chapters_with_text_main_method(mock_method, mock_sql, mock_insert): mock_sql.read_table_to_dict.return_value = old_data_with_text - mock_sql.read_table_with_total.return_value = (old_data_with_text, 0, len(old_data_with_text)) + mock_sql.read_table_with_total.return_value = ( + old_data_with_text, + 0, + len(old_data_with_text), + ) mock_sql.execute_and_fetchall.return_value = new_data_full efiction_chapters = EFictionChapters(test_config, test_logger, mock_sql) - assert efiction_chapters.load_chapters("test_path"), "load_chapters doesn't return true" + assert efiction_chapters.load_chapters( + "test_path" + ), "load_chapters doesn't return true" mock_method.assert_called_once() def test_chapters_with_text_processing(mock_sql, mock_insert): mock_sql.read_table_to_dict.return_value = old_data_with_text - mock_sql.read_table_with_total.return_value = (old_data_with_text, 0, len(old_data_with_text)) + mock_sql.read_table_with_total.return_value = ( + old_data_with_text, + 0, + len(old_data_with_text), + ) mock_sql.execute_and_fetchall.return_value = new_data_full efiction_chapters = EFictionChapters(test_config, test_logger, mock_sql) working_chapters = efiction_chapters.load_og_chapters_into_db() - assert len(new_data_full) == len(working_chapters), "Returned chapters from method don't match the number of expected chapters" - - mock_sql.read_table_to_dict.assert_called_with("efictiontest_test_step_original_efiction_edited", "chapters") - mock_sql.read_table_with_total.assert_called_with("efictiontest_test_step_original_efiction_edited", "chapters") - mock_sql.execute_and_fetchall.assert_called_with("efictiontest_working_open_doors", "SELECT * FROM chapters;") - - assert mock_insert.addRow.call_count == len(new_data_full), "addRow calls don't match the number of expected chapters" + assert len(new_data_full) == len( + working_chapters + ), "Returned chapters from method don't match the number of expected chapters" + + mock_sql.read_table_to_dict.assert_called_with( + "efictiontest_test_step_original_efiction_edited", "chapters" + ) + mock_sql.read_table_with_total.assert_called_with( + "efictiontest_test_step_original_efiction_edited", "chapters" + ) + mock_sql.execute_and_fetchall.assert_called_with( + "efictiontest_working_open_doors", "SELECT * FROM chapters;" + ) + + assert mock_insert.addRow.call_count == len( + new_data_full + ), "addRow calls don't match the number of expected chapters" mock_insert.send.assert_called_once() -@patch('efiction.chapters.EFictionChapters.load_chapter_text_into_db') +@patch("efiction.chapters.EFictionChapters.load_chapter_text_into_db") def test_chapters_no_text_main_method(mock_method, mock_sql, mock_insert): mock_sql.read_table_to_dict.return_value = old_data_no_text - mock_sql.read_table_with_total.return_value = (old_data_no_text, 0, len(old_data_no_text)) + mock_sql.read_table_with_total.return_value = ( + old_data_no_text, + 0, + len(old_data_no_text), + ) mock_sql.execute_and_fetchall.return_value = new_data_missing efiction_chapters = EFictionChapters(test_config, test_logger, mock_sql) - assert efiction_chapters.load_chapters("test_path"), "load_chapters doesn't return true" + assert efiction_chapters.load_chapters( + "test_path" + ), "load_chapters doesn't return true" mock_method.assert_called_once() def test_chapters_no_text_processing(monkeypatch, mock_sql, mock_insert): mock_sql.read_table_to_dict.return_value = old_data_no_text - mock_sql.read_table_with_total.return_value = (old_data_no_text, 0, len(old_data_no_text)) + mock_sql.read_table_with_total.return_value = ( + old_data_no_text, + 0, + len(old_data_no_text), + ) mock_sql.execute_and_fetchall.return_value = new_data_missing - monkeypatch.setattr('builtins.input', lambda _: "YES, DO AS I SAY!") + monkeypatch.setattr("builtins.input", lambda _: "YES, DO AS I SAY!") efiction_chapters = EFictionChapters(test_config, test_logger, mock_sql) - working_chapters = efiction_chapters.load_chapter_text_into_db(efiction_chapters.list_chapter_files()) - assert len(new_data_missing) == len(working_chapters), "Returned chapters from method don't match the number of expected chapters" - - mock_sql.read_table_to_dict.assert_called_with("efictiontest_test_step_original_efiction_edited", "chapters") - mock_sql.read_table_with_total.assert_called_with("efictiontest_test_step_original_efiction_edited", "chapters") - mock_sql.execute_and_fetchall.assert_called_with("efictiontest_working_open_doors", "SELECT * FROM chapters;") - - assert mock_insert.addRow.call_count == len(new_data_missing), "addRow calls don't match the number of expected chapters" + working_chapters = efiction_chapters.load_chapter_text_into_db( + efiction_chapters.list_chapter_files() + ) + assert len(new_data_missing) == len( + working_chapters + ), "Returned chapters from method don't match the number of expected chapters" + + mock_sql.read_table_to_dict.assert_called_with( + "efictiontest_test_step_original_efiction_edited", "chapters" + ) + mock_sql.read_table_with_total.assert_called_with( + "efictiontest_test_step_original_efiction_edited", "chapters" + ) + mock_sql.execute_and_fetchall.assert_called_with( + "efictiontest_working_open_doors", "SELECT * FROM chapters;" + ) + + assert mock_insert.addRow.call_count == len( + new_data_missing + ), "addRow calls don't match the number of expected chapters" mock_insert.send.assert_called_once() diff --git a/efiction/tests/test_metadata.py b/efiction/tests/test_metadata.py index d6e7f30..13a2dab 100644 --- a/efiction/tests/test_metadata.py +++ b/efiction/tests/test_metadata.py @@ -11,61 +11,176 @@ class TestEFictionConverter(TestCase): # Before and after steps and utility methods def setUp(self) -> None: - """ Load test data and create the Open Doors tables for the normal eFiction tests """ + """Load test data and create the Open Doors tables for the normal eFiction tests""" load_fixtures(self.efiction_converter.config, self.efiction_converter.sql) self.efiction_converter.create_open_doors_db("test_path") def tearDown(self) -> None: - """ Remove files created during the tests """ - remove_output_files('efiction/tests/test_output') - remove_output_files('test_path') + """Remove files created during the tests""" + remove_output_files("efiction/tests/test_output") + remove_output_files("test_path") # Tests def test_for_coauthor_none(self): - """ Test checking for coauthors where no coauthors are present """ + """Test checking for coauthors where no coauthors are present""" fake_story = {"id": 2} assert not self.efiction_converter.fetch_coauthors(fake_story) def test_for_coauthor_existing(self): - """ Test checking for coauthor where there is a coauthor """ + """Test checking for coauthor where there is a coauthor""" # Assert list > 0 fake_story = {"id": 1} assert self.efiction_converter.fetch_coauthors(fake_story) def test_convert_authors(self): old_authors = [ - {'uid': 1, 'penname': 'Author1', 'realname': 'Author1', 'email': 'A1@example.com', 'website': '', 'bio': '', - 'image': '', 'date': datetime.datetime(2006, 1, 6, 1, 2, 13), 'admincreated': '0', 'password': 'xfghtu'}, - {'uid': 2, 'penname': 'B Author 2', 'realname': 'B Author 2', 'email': 'B2@example.com', 'website': '', - 'bio': '', 'image': 'bauthor2', 'date': datetime.datetime(2006, 2, 9, 1, 37, 24), 'admincreated': '1', - 'password': 'xfghtu'}, - {'uid': 3, 'penname': 'C Author 3', 'realname': 'C Author 3', 'email': 'C3@example.com', - 'website': 'http://example.com', 'bio': 'An author bio with some text in it', 'image': '', - 'date': datetime.datetime(2006, 2, 16, 22, 58, 2), 'admincreated': '1', 'password': 'xfghtu'}, - {'uid': 4, 'penname': 'D Author 4', 'realname': 'D Author 4', 'email': 'D4@example.com', 'website': '', - 'bio': '', 'image': '', 'date': datetime.datetime(2006, 2, 15, 23, 0), 'admincreated': '1', - 'password': 'xfghtu'}, - {'uid': 5, 'penname': 'E Author 5', 'realname': 'E Author 5', 'email': 'E5@example.com', - 'website': 'www.example.com', 'bio': '', 'image': 'eauthor5', - 'date': datetime.datetime(2006, 2, 16, 23, 0), 'admincreated': '1', 'password': 'xfghtu'}] + { + "uid": 1, + "penname": "Author1", + "realname": "Author1", + "email": "A1@example.com", + "website": "", + "bio": "", + "image": "", + "date": datetime.datetime(2006, 1, 6, 1, 2, 13), + "admincreated": "0", + "password": "xfghtu", + }, + { + "uid": 2, + "penname": "B Author 2", + "realname": "B Author 2", + "email": "B2@example.com", + "website": "", + "bio": "", + "image": "bauthor2", + "date": datetime.datetime(2006, 2, 9, 1, 37, 24), + "admincreated": "1", + "password": "xfghtu", + }, + { + "uid": 3, + "penname": "C Author 3", + "realname": "C Author 3", + "email": "C3@example.com", + "website": "http://example.com", + "bio": "An author bio with some text in it", + "image": "", + "date": datetime.datetime(2006, 2, 16, 22, 58, 2), + "admincreated": "1", + "password": "xfghtu", + }, + { + "uid": 4, + "penname": "D Author 4", + "realname": "D Author 4", + "email": "D4@example.com", + "website": "", + "bio": "", + "image": "", + "date": datetime.datetime(2006, 2, 15, 23, 0), + "admincreated": "1", + "password": "xfghtu", + }, + { + "uid": 5, + "penname": "E Author 5", + "realname": "E Author 5", + "email": "E5@example.com", + "website": "www.example.com", + "bio": "", + "image": "eauthor5", + "date": datetime.datetime(2006, 2, 16, 23, 0), + "admincreated": "1", + "password": "xfghtu", + }, + ] authors = self.efiction_converter._convert_authors(old_authors) self.assertEqual(5, len(authors), "there should be 5 authors") def test_convert_characters(self): - old_characters = [{'charid': 1, 'catid': -1, 'charname': "Bill O'Connell", 'bio': '', 'image': ''}, - {'charid': 2, 'catid': -1, 'charname': 'Bob Billson', 'bio': '', 'image': ''}, - {'charid': 3, 'catid': -1, 'charname': 'Fatima Habibi', 'bio': '', 'image': ''}, - {'charid': 4, 'catid': -1, 'charname': 'Václav', 'bio': '', 'image': ''}, - {'charid': 5, 'catid': -1, 'charname': 'Spyros Papadopoulos', 'bio': '', 'image': ''}, - {'charid': 6, 'catid': -1, 'charname': 'Olu Adebayo', 'bio': '', 'image': ''}, - {'charid': 7, 'catid': -1, 'charname': 'Samia Ben Abdel', 'bio': '', 'image': ''}, - {'charid': 8, 'catid': -1, 'charname': 'Einar Rønquist', 'bio': '', 'image': ''}, - {'charid': 9, 'catid': -1, 'charname': 'Aisha Johnson', 'bio': '', 'image': ''}, - {'charid': 10, 'catid': -1, 'charname': 'Mikhael Antonov', 'bio': '', 'image': ''}, - {'charid': 11, 'catid': -1, 'charname': 'Liam Habibi', 'bio': '', 'image': ''}, - {'charid': 12, 'catid': -1, 'charname': 'Bernard', 'bio': '', 'image': ''}, - {'charid': 13, 'catid': -1, 'charname': 'Vincent Corentin', 'bio': '', 'image': ''}, - {'charid': 14, 'catid': -1, 'charname': 'Other', 'bio': '', 'image': ''}] + old_characters = [ + { + "charid": 1, + "catid": -1, + "charname": "Bill O'Connell", + "bio": "", + "image": "", + }, + { + "charid": 2, + "catid": -1, + "charname": "Bob Billson", + "bio": "", + "image": "", + }, + { + "charid": 3, + "catid": -1, + "charname": "Fatima Habibi", + "bio": "", + "image": "", + }, + {"charid": 4, "catid": -1, "charname": "Václav", "bio": "", "image": ""}, + { + "charid": 5, + "catid": -1, + "charname": "Spyros Papadopoulos", + "bio": "", + "image": "", + }, + { + "charid": 6, + "catid": -1, + "charname": "Olu Adebayo", + "bio": "", + "image": "", + }, + { + "charid": 7, + "catid": -1, + "charname": "Samia Ben Abdel", + "bio": "", + "image": "", + }, + { + "charid": 8, + "catid": -1, + "charname": "Einar Rønquist", + "bio": "", + "image": "", + }, + { + "charid": 9, + "catid": -1, + "charname": "Aisha Johnson", + "bio": "", + "image": "", + }, + { + "charid": 10, + "catid": -1, + "charname": "Mikhael Antonov", + "bio": "", + "image": "", + }, + { + "charid": 11, + "catid": -1, + "charname": "Liam Habibi", + "bio": "", + "image": "", + }, + {"charid": 12, "catid": -1, "charname": "Bernard", "bio": "", "image": ""}, + { + "charid": 13, + "catid": -1, + "charname": "Vincent Corentin", + "bio": "", + "image": "", + }, + {"charid": 14, "catid": -1, "charname": "Other", "bio": "", "image": ""}, + ] characters = self.efiction_converter._convert_characters(old_characters) self.assertEqual(14, len(characters), "there should be 14 characters") @@ -75,18 +190,40 @@ def test_convert_story_tags_normal_ratings(self): """ self.efiction_converter.convert_all_tags() old_stories = [ - {'sid': 1, 'title': 'Bacon ipsum', 'summary': ' 

 

Meat-related text.', 'storynotes': None, - 'catid': '1', 'classes': '3,10,16,26', 'charid': '2,1', 'rid': '3', - 'date': datetime.datetime(2006, 2, 9, 22, 21, 35), 'updated': datetime.datetime(2006, 2, 9, 22, 21, 35), - 'uid': 2, 'coauthors': None, 'featured': '', 'validated': '1', 'completed': '1', 'rr': '', - 'wordcount': 3992, 'rating': 0, 'reviews': 2, 'count': 2872, 'challenges': '0'}] + { + "sid": 1, + "title": "Bacon ipsum", + "summary": " 

 

Meat-related text.", + "storynotes": None, + "catid": "1", + "classes": "3,10,16,26", + "charid": "2,1", + "rid": "3", + "date": datetime.datetime(2006, 2, 9, 22, 21, 35), + "updated": datetime.datetime(2006, 2, 9, 22, 21, 35), + "uid": 2, + "coauthors": None, + "featured": "", + "validated": "1", + "completed": "1", + "rr": "", + "wordcount": 3992, + "rating": 0, + "reviews": 2, + "count": 2872, + "challenges": "0", + } + ] result = self.efiction_converter._convert_story_tags(old_stories[0]) - self.assertEqual({ - 'categories': [6], - 'characters': [106, 107], - 'classes': [70, 77, 83, 94], - 'rating': [3] - }, result) + self.assertEqual( + { + "categories": [6], + "characters": [106, 107], + "classes": [70, 77, 83, 94], + "rating": [3], + }, + result, + ) def test_convert_story_tags_string_ratings(self): """ @@ -94,323 +231,398 @@ def test_convert_story_tags_string_ratings(self): key) ratings. """ # Set up test data with string ratings - efiction_converter_string_ratings = create_efiction_converter("efiction_string_ratings") + efiction_converter_string_ratings = create_efiction_converter( + "efiction_string_ratings" + ) # Create databases for this test - load_fixtures(efiction_converter_string_ratings.config, efiction_converter_string_ratings.sql) + load_fixtures( + efiction_converter_string_ratings.config, + efiction_converter_string_ratings.sql, + ) efiction_converter_string_ratings.create_open_doors_db("test_path") # Now convert the tags and set ratings_nonstandard efiction_converter_string_ratings.convert_all_tags() old_stories = [ - {'sid': 1, 'title': 'Bacon ipsum', 'summary': ' 

 

Meat-related text.', 'storynotes': None, - 'catid': '1', 'classes': '3,10,16,26', 'charid': '2,1', 'rid': 'Teen', - 'date': datetime.datetime(2006, 2, 9, 22, 21, 35), 'updated': datetime.datetime(2006, 2, 9, 22, 21, 35), - 'uid': 2, 'coauthors': None, 'featured': '', 'validated': '1', 'completed': '1', 'rr': '', - 'wordcount': 3992, 'rating': 0, 'reviews': 2, 'count': 2872, 'challenges': '0'}] + { + "sid": 1, + "title": "Bacon ipsum", + "summary": " 

 

Meat-related text.", + "storynotes": None, + "catid": "1", + "classes": "3,10,16,26", + "charid": "2,1", + "rid": "Teen", + "date": datetime.datetime(2006, 2, 9, 22, 21, 35), + "updated": datetime.datetime(2006, 2, 9, 22, 21, 35), + "uid": 2, + "coauthors": None, + "featured": "", + "validated": "1", + "completed": "1", + "rr": "", + "wordcount": 3992, + "rating": 0, + "reviews": 2, + "count": 2872, + "challenges": "0", + } + ] result = efiction_converter_string_ratings._convert_story_tags(old_stories[0]) - self.assertEqual({'categories': [6], 'characters': [8], 'classes': [], 'rating': [3]}, result) + self.assertEqual( + {"categories": [6], "characters": [8], "classes": [], "rating": [3]}, result + ) # Remove test files created during this test - remove_output_files('efiction/tests/test_output') + remove_output_files("efiction/tests/test_output") def test_convert_stories(self): self.efiction_converter.convert_all_tags() - result = self.efiction_converter.convert_stories('de') + result = self.efiction_converter.convert_stories("de") self.assertEqual(15, len(result)) self.assertEqual( - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 2, 9, 22, 21, 35), - 'do_not_import': 0, - 'fandoms': '', - 'id': 1, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': '

Meat-related text.', - 'tags': '', - 'title': 'Bacon ipsum', - 'updated': datetime.datetime(2006, 2, 9, 22, 21, 35), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, result[0], "Entities should be unencoded and leading and trailing spaces stripped") + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 2, 9, 22, 21, 35), + "do_not_import": 0, + "fandoms": "", + "id": 1, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "

Meat-related text.", + "tags": "", + "title": "Bacon ipsum", + "updated": datetime.datetime(2006, 2, 9, 22, 21, 35), + "language_code": "de", + "url": None, + "warnings": "", + }, + result[0], + "Entities should be unencoded and leading and trailing spaces stripped", + ) self.assertEqual( - [{'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 3, 4, 13, 0, 45), - 'do_not_import': 0, - 'fandoms': '', - 'id': 3, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'Short, and no tricky characters.', - 'tags': '', - 'title': 'Lorem ipsum', - 'updated': datetime.datetime(2006, 3, 4, 13, 0, 45), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 3, 4, 13, 16, 12), - 'do_not_import': 0, - 'fandoms': '', - 'id': 4, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'Email-related story.', - 'tags': '', - 'title': 'Email story', - 'updated': datetime.datetime(2006, 3, 4, 13, 16, 12), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 3, 5, 17, 12, 16), - 'do_not_import': 0, - 'fandoms': '', - 'id': 50, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'Meow all night chew iPad power cord.', - 'tags': '', - 'title': 'Cat-related ipsum', - 'updated': datetime.datetime(2006, 3, 5, 17, 12, 16), - 'language_code': 'de', - 'url': None, - 'warnings': ''}], result[1:4]) + [ + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 3, 4, 13, 0, 45), + "do_not_import": 0, + "fandoms": "", + "id": 3, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "Short, and no tricky characters.", + "tags": "", + "title": "Lorem ipsum", + "updated": datetime.datetime(2006, 3, 4, 13, 0, 45), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 3, 4, 13, 16, 12), + "do_not_import": 0, + "fandoms": "", + "id": 4, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "Email-related story.", + "tags": "", + "title": "Email story", + "updated": datetime.datetime(2006, 3, 4, 13, 16, 12), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 3, 5, 17, 12, 16), + "do_not_import": 0, + "fandoms": "", + "id": 50, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "Meow all night chew iPad power cord.", + "tags": "", + "title": "Cat-related ipsum", + "updated": datetime.datetime(2006, 3, 5, 17, 12, 16), + "language_code": "de", + "url": None, + "warnings": "", + }, + ], + result[1:4], + ) self.assertEqual( - [{'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 3, 5, 17, 20, 38), - 'do_not_import': 0, - 'fandoms': '', - 'id': 51, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'Biscuit candy cake candy macaroon. Soufflé marzipan croissant ' - 'gummi bears. Wafer lollipop tart topping. Bonbon danish dragée ' - 'lemon drops lemon drops caramels jelly. Tootsie roll chocolate ' - 'cookie cake. Topping cheesecake lollipop halvah jujubes brownie ' - 'bear claw.', - 'tags': '', - 'title': 'Cupcake ipsum', - 'updated': datetime.datetime(2006, 3, 5, 17, 20, 38), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 3, 18, 12, 56, 43), - 'do_not_import': 0, - 'fandoms': '', - 'id': 835, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'Eôs in ipsum ocûrrëret.', - 'tags': '', - 'title': 'Windows 1252 Story', - 'updated': datetime.datetime(2006, 3, 18, 12, 56, 43), - 'language_code': 'de', - 'url': None, - 'warnings': ''}], [result[4], result[8]], "Unicode characters should be normalized") + [ + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 3, 5, 17, 20, 38), + "do_not_import": 0, + "fandoms": "", + "id": 51, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "Biscuit candy cake candy macaroon. Soufflé marzipan croissant " + "gummi bears. Wafer lollipop tart topping. Bonbon danish dragée " + "lemon drops lemon drops caramels jelly. Tootsie roll chocolate " + "cookie cake. Topping cheesecake lollipop halvah jujubes brownie " + "bear claw.", + "tags": "", + "title": "Cupcake ipsum", + "updated": datetime.datetime(2006, 3, 5, 17, 20, 38), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 3, 18, 12, 56, 43), + "do_not_import": 0, + "fandoms": "", + "id": 835, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "Eôs in ipsum ocûrrëret.", + "tags": "", + "title": "Windows 1252 Story", + "updated": datetime.datetime(2006, 3, 18, 12, 56, 43), + "language_code": "de", + "url": None, + "warnings": "", + }, + ], + [result[4], result[8]], + "Unicode characters should be normalized", + ) self.assertEqual( - [{'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 3, 5, 17, 27, 5), - 'do_not_import': 0, - 'fandoms': '', - 'id': 54, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'Only shorter.', - 'tags': '', - 'title': 'Carl Sagan ipsum', - 'updated': datetime.datetime(2006, 3, 5, 17, 27, 5), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 3, 6, 15, 42, 57), - 'do_not_import': 0, - 'fandoms': '', - 'id': 108, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'Lots and lots of cakes.', - 'tags': '', - 'title': 'A lot of cakes', - 'updated': datetime.datetime(2006, 3, 6, 15, 42, 57), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 3, 17, 15, 26, 36), - 'do_not_import': 0, - 'fandoms': '', - 'id': 741, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'This is a story containing only a link to another location.', - 'tags': '', - 'title': 'Actually a bookmark', - 'updated': datetime.datetime(2006, 3, 17, 15, 26, 36), - 'language_code': 'de', - 'url': None, - 'warnings': ''}], result[5:8]) + [ + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 3, 5, 17, 27, 5), + "do_not_import": 0, + "fandoms": "", + "id": 54, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "Only shorter.", + "tags": "", + "title": "Carl Sagan ipsum", + "updated": datetime.datetime(2006, 3, 5, 17, 27, 5), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 3, 6, 15, 42, 57), + "do_not_import": 0, + "fandoms": "", + "id": 108, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "Lots and lots of cakes.", + "tags": "", + "title": "A lot of cakes", + "updated": datetime.datetime(2006, 3, 6, 15, 42, 57), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 3, 17, 15, 26, 36), + "do_not_import": 0, + "fandoms": "", + "id": 741, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "This is a story containing only a link to another location.", + "tags": "", + "title": "Actually a bookmark", + "updated": datetime.datetime(2006, 3, 17, 15, 26, 36), + "language_code": "de", + "url": None, + "warnings": "", + }, + ], + result[5:8], + ) self.assertEqual( - [{'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2006, 3, 18, 13, 42, 27), - 'do_not_import': 0, - 'fandoms': '', - 'id': 838, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'Things happen.', - 'tags': '', - 'title': 'Another story in series', - 'updated': datetime.datetime(2006, 3, 18, 13, 42, 27), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2008, 2, 11, 13, 32, 43), - 'do_not_import': 0, - 'fandoms': '', - 'id': 3519, - 'import_notes': '', - 'imported': 0, - 'notes': "Written for someone's birthday as a small thank you for all their " - 'hard work and dedication here on Efiction Test archive and the ' - "Testing Solutions website. Moderator, you're a star!", - 'rating': None, - 'relationships': '', - 'summary': 'More vegetables.', - 'tags': '', - 'title': 'Beans and other vegetables', - 'updated': datetime.datetime(2008, 2, 11, 13, 33, 2), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2008, 10, 8, 20, 58, 26), - 'do_not_import': 0, - 'fandoms': '', - 'id': 3721, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': "Database is Latin-1 and doesn't support Japanese text.", - 'tags': '', - 'title': 'Japanese', - 'updated': datetime.datetime(2008, 10, 8, 20, 58, 29), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2008, 11, 28, 14, 10, 56), - 'do_not_import': 0, - 'fandoms': '', - 'id': 3745, - 'import_notes': '', - 'imported': 0, - 'notes': '', - 'rating': None, - 'relationships': '', - 'summary': 'A nice little summary.', - 'tags': '', - 'title': 'Accented lorem ipsum', - 'updated': datetime.datetime(2008, 11, 28, 14, 10, 59), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2008, 12, 27, 7, 18, 6), - 'do_not_import': 0, - 'fandoms': '', - 'id': 3785, - 'import_notes': '', - 'imported': 0, - 'notes': 'Some story notes about Zombies.', - 'rating': None, - 'relationships': '', - 'summary': 'Zombie-related lorem ipsum.', - 'tags': '', - 'title': 'Zombies', - 'updated': datetime.datetime(2008, 12, 27, 7, 18, 9), - 'language_code': 'de', - 'url': None, - 'warnings': ''}, - {'ao3_url': None, - 'categories': None, - 'characters': '', - 'date': datetime.datetime(2010, 1, 3, 10, 4, 12), - 'do_not_import': 0, - 'fandoms': '', - 'id': 4035, - 'import_notes': '', - 'imported': 0, - 'notes': 'Thanks to betas.', - 'rating': None, - 'relationships': '', - 'summary': 'Bushwick man braid vaporware hot chicken yuccie snackwave ' - 'cold-pressed +1 3 wolf moon.', - 'tags': '', - 'title': 'Hipster ipsum', - 'updated': datetime.datetime(2010, 1, 3, 10, 4, 16), - 'language_code': 'de', - 'url': None, - 'warnings': ''}], - result[9:]) + [ + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2006, 3, 18, 13, 42, 27), + "do_not_import": 0, + "fandoms": "", + "id": 838, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "Things happen.", + "tags": "", + "title": "Another story in series", + "updated": datetime.datetime(2006, 3, 18, 13, 42, 27), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2008, 2, 11, 13, 32, 43), + "do_not_import": 0, + "fandoms": "", + "id": 3519, + "import_notes": "", + "imported": 0, + "notes": "Written for someone's birthday as a small thank you for all their " + "hard work and dedication here on Efiction Test archive and the " + "Testing Solutions website. Moderator, you're a star!", + "rating": None, + "relationships": "", + "summary": "More vegetables.", + "tags": "", + "title": "Beans and other vegetables", + "updated": datetime.datetime(2008, 2, 11, 13, 33, 2), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2008, 10, 8, 20, 58, 26), + "do_not_import": 0, + "fandoms": "", + "id": 3721, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "Database is Latin-1 and doesn't support Japanese text.", + "tags": "", + "title": "Japanese", + "updated": datetime.datetime(2008, 10, 8, 20, 58, 29), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2008, 11, 28, 14, 10, 56), + "do_not_import": 0, + "fandoms": "", + "id": 3745, + "import_notes": "", + "imported": 0, + "notes": "", + "rating": None, + "relationships": "", + "summary": "A nice little summary.", + "tags": "", + "title": "Accented lorem ipsum", + "updated": datetime.datetime(2008, 11, 28, 14, 10, 59), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2008, 12, 27, 7, 18, 6), + "do_not_import": 0, + "fandoms": "", + "id": 3785, + "import_notes": "", + "imported": 0, + "notes": "Some story notes about Zombies.", + "rating": None, + "relationships": "", + "summary": "Zombie-related lorem ipsum.", + "tags": "", + "title": "Zombies", + "updated": datetime.datetime(2008, 12, 27, 7, 18, 9), + "language_code": "de", + "url": None, + "warnings": "", + }, + { + "ao3_url": None, + "categories": None, + "characters": "", + "date": datetime.datetime(2010, 1, 3, 10, 4, 12), + "do_not_import": 0, + "fandoms": "", + "id": 4035, + "import_notes": "", + "imported": 0, + "notes": "Thanks to betas.", + "rating": None, + "relationships": "", + "summary": "Bushwick man braid vaporware hot chicken yuccie snackwave " + "cold-pressed +1 3 wolf moon.", + "tags": "", + "title": "Hipster ipsum", + "updated": datetime.datetime(2010, 1, 3, 10, 4, 16), + "language_code": "de", + "url": None, + "warnings": "", + }, + ], + result[9:], + ) diff --git a/efiction/tests/test_original.py b/efiction/tests/test_original.py index 761228b..0c705ad 100644 --- a/efiction/tests/test_original.py +++ b/efiction/tests/test_original.py @@ -8,48 +8,65 @@ test_logger = MagicMock() test_sql = MagicMock() -test_config = ArchiveConfig(MagicMock(), "efiction_no_defs", "efiction/tests/test_data").config +test_config = ArchiveConfig( + MagicMock(), "efiction_no_defs", "efiction/tests/test_data" +).config class TestOriginal(TestCase): - test_config_no_defs = ArchiveConfig(test_logger, "efiction_no_defs", "efiction/tests/test_data").config + test_config_no_defs = ArchiveConfig( + test_logger, "efiction_no_defs", "efiction/tests/test_data" + ).config efiction_with_defs = EFictionOriginal(test_config, test_logger, test_sql) efiction_no_defs = EFictionOriginal(test_config_no_defs, test_logger, test_sql) def tearDown(self) -> None: - """ Remove files created during the tests """ - remove_output_files('efiction/tests/test_output/*') + """Remove files created during the tests""" + remove_output_files("efiction/tests/test_output/*") - @patch('builtins.input', lambda *args: 'efiction/tests/test_data/efiction.sql') - @patch('efiction.original.add_create_database') + @patch("builtins.input", lambda *args: "efiction/tests/test_data/efiction.sql") + @patch("efiction.original.add_create_database") def test_load_original_file(self, mock_add_create_database): self.efiction_with_defs.load_original_file("efiction/tests/test_output") mock_add_create_database.assert_called_once() - test_sql.load_sql_file_into_db.arg_should_contain("efictiontestnodefs_efiction_original_edited.sql") + test_sql.load_sql_file_into_db.arg_should_contain( + "efictiontestnodefs_efiction_original_edited.sql" + ) def test_check_for_table_defs_no_defs(self): statements = group_by_table( - ['INSERT INTO fanfiction_authorinfo VALUES ("0","2","http://example.com")', - 'INSERT INTO fanfiction_authorprefs VALUES ("5","BillBob","2","0","1","77")', - 'INSERT INTO fanfiction_authors VALUES ("1","Author1","Author1","A1@example.com")'] + [ + 'INSERT INTO fanfiction_authorinfo VALUES ("0","2","http://example.com")', + 'INSERT INTO fanfiction_authorprefs VALUES ("5","BillBob","2","0","1","77")', + 'INSERT INTO fanfiction_authors VALUES ("1","Author1","Author1","A1@example.com")', + ] ) has_defs = EFictionOriginal._contains_table_defs(statements) self.assertFalse(has_defs) def test_check_for_table_defs_with_defs(self): statements = group_by_table( - ['CREATE TABLE fanfiction_authorinfo (`uid` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(200))', - 'INSERT INTO fanfiction_authorinfo VALUES ("5","BillBob","2","0","1","77")', - 'INSERT INTO fanfiction_authors VALUES ("1","Author1","Author1","A1@example.com")'] + [ + "CREATE TABLE fanfiction_authorinfo (`uid` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(200))", + 'INSERT INTO fanfiction_authorinfo VALUES ("5","BillBob","2","0","1","77")', + 'INSERT INTO fanfiction_authors VALUES ("1","Author1","Author1","A1@example.com")', + ] ) has_defs = self.efiction_with_defs._contains_table_defs(statements) self.assertTrue(has_defs) def test_add_table_definitions(self): - statements = ['INSERT INTO fanfiction_authorinfo VALUES ("0","2","http://example.com")', - "INSERT INTO fanfiction_stories VALUES ('1', 'thing');"] + statements = [ + 'INSERT INTO fanfiction_authorinfo VALUES ("0","2","http://example.com")', + "INSERT INTO fanfiction_stories VALUES ('1', 'thing');", + ] result = self.efiction_no_defs._add_table_definitions(statements) - self.assertEqual("DROP TABLE IF EXISTS `fanfiction_stories`;", result[3].strip(), - "should generate DROP TABLE statements for the given tables") - self.assertTrue(result[4].strip().startswith("CREATE TABLE `fanfiction_stories`"), - "should generate CREATE TABLE statements for the given tables") + self.assertEqual( + "DROP TABLE IF EXISTS `fanfiction_stories`;", + result[3].strip(), + "should generate DROP TABLE statements for the given tables", + ) + self.assertTrue( + result[4].strip().startswith("CREATE TABLE `fanfiction_stories`"), + "should generate CREATE TABLE statements for the given tables", + ) diff --git a/efiction/tests/test_simplified.py b/efiction/tests/test_simplified.py index 108b426..b41e2fd 100644 --- a/efiction/tests/test_simplified.py +++ b/efiction/tests/test_simplified.py @@ -10,13 +10,29 @@ class TestEFictionSimplified(TestCase): - test_config_no_defs = ArchiveConfig(test_logger, "efiction_no_defs", "efiction/tests/test_data").config - efiction_with_defs = simplified.EFictionSimplified(test_config, test_logger, test_sql) - efiction_no_defs = simplified.EFictionSimplified(test_config_no_defs, test_logger, test_sql) + test_config_no_defs = ArchiveConfig( + test_logger, "efiction_no_defs", "efiction/tests/test_data" + ).config + efiction_with_defs = simplified.EFictionSimplified( + test_config, test_logger, test_sql + ) + efiction_no_defs = simplified.EFictionSimplified( + test_config_no_defs, test_logger, test_sql + ) def test_remove_unwanted_tables(self): result = self.efiction_no_defs._remove_unwanted_tables( - ["use database;", "create table fanfiction_stats;", "create table fanfiction_stories;"]) - self.assertTrue('fanfiction_stats' not in result, "statements on unwanted tables should be removed") - self.assertTrue('stories' in result, "statements on desired tables should be stripped of their prefix and kept") - + [ + "use database;", + "create table fanfiction_stats;", + "create table fanfiction_stories;", + ] + ) + self.assertTrue( + "fanfiction_stats" not in result, + "statements on unwanted tables should be removed", + ) + self.assertTrue( + "stories" in result, + "statements on desired tables should be stripped of their prefix and kept", + ) diff --git a/efiction/tests/test_utils.py b/efiction/tests/test_utils.py index 3856a80..2b1b52f 100644 --- a/efiction/tests/test_utils.py +++ b/efiction/tests/test_utils.py @@ -13,19 +13,25 @@ def create_efiction_converter(ini_file_name: str): :return: an EFictionConverter with the desired configuration """ test_logger = MagicMock() - test_config = ArchiveConfig(test_logger, ini_file_name, "efiction/tests/test_data").config + test_config = ArchiveConfig( + test_logger, ini_file_name, "efiction/tests/test_data" + ).config test_sql = SqlDb(test_config, test_logger) - test_config['Processing']['working_dir'] = get_full_path("efiction/tests/test_output") + test_config["Processing"]["working_dir"] = get_full_path( + "efiction/tests/test_output" + ) return EFictionMetadata(test_config, test_logger, test_sql, "test_path") def load_fixtures(test_config, test_sql): dbs = [ - test_config['Processing']['open_doors_working_db'], - test_config['Processing']['simplified_original_db'], + test_config["Processing"]["open_doors_working_db"], + test_config["Processing"]["simplified_original_db"], ] for db in dbs: cursor = test_sql.conn.cursor() cursor.execute(f"DROP DATABASE IF EXISTS {db.strip()};") test_sql.conn.commit() - test_sql.load_sql_file_into_db(get_full_path(test_config['Processing']['original_tidied_file'])) + test_sql.load_sql_file_into_db( + get_full_path(test_config["Processing"]["original_tidied_file"]) + ) diff --git a/opendoors/big_insert.py b/opendoors/big_insert.py index 6daed15..9fb92fb 100644 --- a/opendoors/big_insert.py +++ b/opendoors/big_insert.py @@ -2,13 +2,14 @@ from tempfile import NamedTemporaryFile from os import unlink + class BigInsert: def __init__(self, database: str, table_name: str, columns: list, sql: SqlDb): """ Handles `Load data local infile` inserts - :param database: Name of database to use - :param table_name: Name of table to insert to - :param columns: list of column names, the order of those is used in addRow() + :param database: Name of database to use + :param table_name: Name of table to insert to + :param columns: list of column names, the order of those is used in addRow() method :param sql: A connection to mysql to use """ @@ -16,7 +17,9 @@ def __init__(self, database: str, table_name: str, columns: list, sql: SqlDb): self._sql.ensure_local_infile() self._database = database self._columns = columns - self._tempfile = NamedTemporaryFile("w+", delete=False, suffix=".otw.tmp", encoding="utf-8") + self._tempfile = NamedTemporaryFile( + "w+", delete=False, suffix=".otw.tmp", encoding="utf-8" + ) # convert windows slashes to unix because windows needs it like that? windows_friendly_name = self._tempfile.name.replace("\\", "/") # We are storing data as a `tab separated` file @@ -26,7 +29,7 @@ def __init__(self, database: str, table_name: str, columns: list, sql: SqlDb): # # However we need to preserve newlines because they will eventually get # cleaned up into proper tags, hence we will escape them into proper \n - self._query = fr""" + self._query = rf""" LOAD DATA LOCAL INFILE '{windows_friendly_name}' INTO TABLE {table_name} FIELDS TERMINATED BY '\t' ESCAPED BY '\\' @@ -46,7 +49,10 @@ def addRow(self, *args): if len(args) != len(self._columns): raise Exception("Number of arguments does not equal number of columns!") # remove tabs and escape newlines - values = [x.replace("\t", " ").replace("\n", "\\n") if isinstance(x, str) else str(x) for x in args] + values = [ + x.replace("\t", " ").replace("\n", "\\n") if isinstance(x, str) else str(x) + for x in args + ] line = "\t".join(values) + "\n" self._tempfile.write(line) @@ -60,5 +66,3 @@ def send(self): # delete file unlink(self._tempfile.name) self._tempfile = None - - diff --git a/opendoors/config.py b/opendoors/config.py index 67d522d..d5a6439 100644 --- a/opendoors/config.py +++ b/opendoors/config.py @@ -12,6 +12,7 @@ class ArchiveConfig: """ Wrapper for ConfigParser to provide convenience methods for configuration """ + def __init__(self, logger: Logger, code_name: str, working_dir: str): self.logger = logger self.working_dir = working_dir @@ -32,30 +33,34 @@ def _new_config_file(self): # Write out the new config file with open(self.config_path, "w") as configfile: self.config.write(configfile) - self.logger.info("Successfully created configuration file {}.".format(self.config_path)) + self.logger.info( + "Successfully created configuration file {}.".format(self.config_path) + ) return self.config def _set_archive_config(self): """ Set Archive name and type """ - archive_name = input("Full archive name (eg: 'TER/MA', 'West Wing Fanfiction Archive') - this will be used to " - "generate dummy emails\n>> ") - self.config['Archive'] = { - 'archive_name': archive_name, - 'archive_type': 'EF', - 'code_name': self.code_name + archive_name = input( + "Full archive name (eg: 'TER/MA', 'West Wing Fanfiction Archive') - this will be used to " + "generate dummy emails\n>> " + ) + self.config["Archive"] = { + "archive_name": archive_name, + "archive_type": "EF", + "code_name": self.code_name, } def _set_processing_config(self): """ Set Processing settings """ - self.config['Processing'] = { - 'code_name': self.code_name, - 'working_dir': self.working_dir, - 'next_step': "01", - 'done_steps': "" + self.config["Processing"] = { + "code_name": self.code_name, + "working_dir": self.working_dir, + "next_step": "01", + "done_steps": "", } def _create_or_get_archive_config(self): @@ -74,15 +79,15 @@ def processing(self, key): :param key: Configuration key to look up. :return: The value of the specified key. """ - return self.config['Processing'][key] + return self.config["Processing"][key] def archive(self, key): """ - Return value of the specified key from the Archive section in the ini file. - :param key: Configuration key to look up. - :return: The value of the specified key. - """ - return self.config['Archive'][key] + Return value of the specified key from the Archive section in the ini file. + :param key: Configuration key to look up. + :return: The value of the specified key. + """ + return self.config["Archive"][key] def save(self): """ @@ -90,11 +95,13 @@ def save(self): :return: 0 if there is no config to save """ if len(self.config.keys()) != 0: - backup_path = os.path.join(self.processing('working_dir'), os.path.basename(self.config_path)) - with open(backup_path, 'w') as backup_config: + backup_path = os.path.join( + self.processing("working_dir"), os.path.basename(self.config_path) + ) + with open(backup_path, "w") as backup_config: self.config.write(backup_config) - with open(self.config_path, 'w') as configfile: + with open(self.config_path, "w") as configfile: self.config.write(configfile) else: return None diff --git a/opendoors/logging.py b/opendoors/logging.py index 716e4d2..cc0b917 100644 --- a/opendoors/logging.py +++ b/opendoors/logging.py @@ -12,6 +12,7 @@ class Logging: """ Utility class for logging """ + def __init__(self, working_dir, code_name): self.code_name = code_name self.working_dir = working_dir @@ -25,14 +26,16 @@ def logger(self): log = logging.getLogger() log.setLevel(logging.INFO) - color_formatter = ColoredFormatter('%(log_color)s%(message)s%(reset)s') + color_formatter = ColoredFormatter("%(log_color)s%(message)s%(reset)s") stream = logging.StreamHandler(sys.stdout) stream.setLevel(logging.INFO) stream.setFormatter(color_formatter) log.addHandler(stream) - formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - fh = logging.FileHandler(os.path.join(self.working_dir, "{}.log".format(self.code_name))) + formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s") + fh = logging.FileHandler( + os.path.join(self.working_dir, "{}.log".format(self.code_name)) + ) fh.setFormatter(formatter) log.addHandler(fh) return log diff --git a/opendoors/mysql.py b/opendoors/mysql.py index 04964f1..c5ce604 100644 --- a/opendoors/mysql.py +++ b/opendoors/mysql.py @@ -15,35 +15,36 @@ class SqlDb: Wrapper and helper methods for MySQL commands """ - def __init__(self, config: ConfigParser, logger: Logger, suppress_log: bool = False): + def __init__( + self, config: ConfigParser, logger: Logger, suppress_log: bool = False + ): self.config = config self.logger = logger self.conn = pymysql.connect( - **self.get_db_config(), - cursorclass=DictCursor, - local_infile=True) + **self.get_db_config(), cursorclass=DictCursor, local_infile=True + ) if not suppress_log: - self.logger.info(f"Connected to MySQL database server at {self.config['Database']['host']} " - f"as {self.config['Database']['user']}") + self.logger.info( + f"Connected to MySQL database server at {self.config['Database']['host']} " + f"as {self.config['Database']['user']}" + ) def get_db_config(self): """ Get or prompt user for MySQL connection config :return: MySQL connection config """ - if not (self.config.has_section('Database') - and self.config['Database']['user'] - and self.config['Database']['host'] - and self.config.has_option('Database', 'password')): + if not ( + self.config.has_section("Database") + and self.config["Database"]["user"] + and self.config["Database"]["host"] + and self.config.has_option("Database", "password") + ): host = input("MySQL host name (eg: localhost):\n>> ") user = input("MySQL user name (eg: root):\n>> ") password = input("MySQL password:\n>> ") - self.config['Database'] = { - 'host': host, - 'user': user, - 'password': password - } - return self.config['Database'] + self.config["Database"] = {"host": host, "user": user, "password": password} + return self.config["Database"] def load_sql_file_into_db(self, sql_path: str): """ @@ -103,7 +104,7 @@ def execute_and_fetchall(self, database: str, statement: str): self.conn.commit() return cursor.fetchall() - def execute(self, database: str, statement: str, params: Tuple=None): + def execute(self, database: str, statement: str, params: Tuple = None): """ Execute a statement without fetching the results. :param database: The database to run the statement against. @@ -136,17 +137,19 @@ def dump_database(self, database: str, destination_filepath: str): cursor.execute("SHOW TABLES") tables = [] for table in cursor.fetchall(): - tables.append(table[f'Tables_in_{database}']) + tables.append(table[f"Tables_in_{database}"]) for table in tables: f.writelines(f"\nDROP TABLE IF EXISTS {database}.`{str(table)}`;\n") cursor.execute(f"SHOW CREATE TABLE {database}.`{str(table)}`;") - f.writelines([str(cursor.fetchone()['Create Table']), ";\n"]) + f.writelines([str(cursor.fetchone()["Create Table"]), ";\n"]) cursor.execute(f"SHOW COLUMNS FROM {str(table)};") column_definitions = cursor.fetchall() - column_names = ", ".join([f"`{definition['Field']}`" for definition in column_definitions]) + column_names = ", ".join( + [f"`{definition['Field']}`" for definition in column_definitions] + ) cursor.execute(f"SELECT * FROM {database}.`{str(table)}`;") counter = 0 @@ -156,7 +159,9 @@ def dump_database(self, database: str, destination_filepath: str): if row_group: f.write(",\n".join(row_group) + ";\n") row_group = [] - f.write(f"INSERT INTO {database}.`{str(table)}` ({column_names}) VALUES \n") + f.write( + f"INSERT INTO {database}.`{str(table)}` ({column_names}) VALUES \n" + ) field_arr = [] for field in row: if type(row[field]) == str or type(row[field]) == datetime.datetime: # noqa: E721 @@ -191,7 +196,6 @@ def ensure_local_infile(self): cursor.execute("SET GLOBAL local_infile=1") cursor.close() - def __del__(self): """ Destructor to disconnect from the database @@ -200,5 +204,3 @@ def __del__(self): self.conn.close() except: # noqa: E722 pass - - diff --git a/opendoors/progress.py b/opendoors/progress.py index 15daa08..68bf6b2 100644 --- a/opendoors/progress.py +++ b/opendoors/progress.py @@ -14,24 +14,28 @@ def continue_from_last(config: ConfigParser, logger: Logger, sql: SqlDb, steps: :param steps: A dict containing all the steps in this process as StepInfo items. """ run_next = True - next_step = config['Processing']['next_step'] if config['Processing']['next_step'] else "01" + next_step = ( + config["Processing"]["next_step"] if config["Processing"]["next_step"] else "01" + ) try: while run_next: if next_step != "None": # Prompt for the step to run based on the configured next step step_to_run, done_steps = get_next_step(config, next_step) step_config = steps[step_to_run] - step = step_config['class'](config, logger, sql, step_config['info']) + step = step_config["class"](config, logger, sql, step_config["info"]) run_next = step.run() # Update the list of completed steps in the config if run_next: - next_step = config['Processing']['next_step'] = step.next_step + next_step = config["Processing"]["next_step"] = step.next_step update_done_steps(config, done_steps, step_to_run) else: - restart_yn = input("All steps have been completed for this archive. Do you want to\n" - "1. Restart from step 1\n" - "2. Exit (default - press Enter)\n>> ") + restart_yn = input( + "All steps have been completed for this archive. Do you want to\n" + "1. Restart from step 1\n" + "2. Exit (default - press Enter)\n>> " + ) if restart_yn == "1": next_step = "01" else: @@ -53,7 +57,7 @@ def update_done_steps(config: ConfigParser, done_steps: list, step_to_run: str) done_steps = {step_to_run.strip()} else: done_steps.append(step_to_run.strip()) - steps = config['Processing']['done_steps'] = ', '.join(done_steps) + steps = config["Processing"]["done_steps"] = ", ".join(done_steps) return steps @@ -64,25 +68,40 @@ def get_next_step(config: ConfigParser, step_to_run: str) -> (str, list): :param step_to_run: The next step in the process. :return: """ + def __is_valid(s): - return s not in [',', '', ' '] + return s not in [",", "", " "] def __tidy_steps(): - return sorted(set(list(map(lambda x: x.strip(), filter(__is_valid, config['Processing']['done_steps'].split(",")))))) + return sorted( + set( + list( + map( + lambda x: x.strip(), + filter( + __is_valid, config["Processing"]["done_steps"].split(",") + ), + ) + ) + ) + ) completed_steps = [] if step_to_run == "01" else __tidy_steps() if step_to_run != "01" and completed_steps: if len(completed_steps) > 1: - steps_list = "Steps {} and {} have".format(", ".join(completed_steps[:-1]), completed_steps[-1]) + steps_list = "Steps {} and {} have".format( + ", ".join(completed_steps[:-1]), completed_steps[-1] + ) elif len(completed_steps) == 1: steps_list = "Step {} has".format(", ".join(completed_steps)) else: steps_list = "No steps have" - resume_yn = \ - input(f"{steps_list} been completed. Please choose one of the following options:\n" - f"1. Restart entire process from step 01 (this will remove any working files already created)\n" - f"2. Continue processing from step {step_to_run} (default - press enter)\n>> ") - restart = resume_yn.lower() == '1' + resume_yn = input( + f"{steps_list} been completed. Please choose one of the following options:\n" + f"1. Restart entire process from step 01 (this will remove any working files already created)\n" + f"2. Continue processing from step {step_to_run} (default - press enter)\n>> " + ) + restart = resume_yn.lower() == "1" else: restart = False if restart: diff --git a/opendoors/sql_utils.py b/opendoors/sql_utils.py index e9a4097..e632852 100644 --- a/opendoors/sql_utils.py +++ b/opendoors/sql_utils.py @@ -12,10 +12,14 @@ def extract_table_name(sql: str): :param sql: str. a single SQL statement :return: """ - prefixes = ['drop table if exists ', 'create table ', 'insert into '] - end = re.sub(r'|'.join(map(re.escape, prefixes)), '', sql.lower().strip()) - table_name_match = re.match(r'`?(\S*?_?\S*?)`?[\s;]', end) - table_name = str.strip(table_name_match[1]).replace('`', '') if table_name_match is not None else '' + prefixes = ["drop table if exists ", "create table ", "insert into "] + end = re.sub(r"|".join(map(re.escape, prefixes)), "", sql.lower().strip()) + table_name_match = re.match(r"`?(\S*?_?\S*?)`?[\s;]", end) + table_name = ( + str.strip(table_name_match[1]).replace("`", "") + if table_name_match is not None + else "" + ) return table_name @@ -25,7 +29,9 @@ def group_by_table(statements: list): :param statements: list of SQL statements :return: dict of statements grouped by table name """ - group_list = [(gr, list(items)) for gr, items in groupby(statements, key=extract_table_name)] + group_list = [ + (gr, list(items)) for gr, items in groupby(statements, key=extract_table_name) + ] groups = defaultdict(list) for gr, stmt_list in group_list: groups[gr].extend(stmt_list) @@ -39,9 +45,11 @@ def add_create_database(database_name: str, statements: list): :param statements: existing SQL statements to apply to that database :return: amended list of statements """ - return [f"DROP DATABASE IF EXISTS `{database_name}`;", - f"CREATE DATABASE `{database_name}`;", - f"USE `{database_name}`;\n"] + statements + return [ + f"DROP DATABASE IF EXISTS `{database_name}`;", + f"CREATE DATABASE `{database_name}`;", + f"USE `{database_name}`;\n", + ] + statements def write_statements_to_file(filepath: str, statements: list) -> str: @@ -54,10 +62,10 @@ def write_statements_to_file(filepath: str, statements: list) -> str: if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) - with open(filepath, 'w', encoding="utf-8") as file: + with open(filepath, "w", encoding="utf-8") as file: for statement in statements: - if statement.startswith('DROP TABLE'): - file.write('\n') + if statement.startswith("DROP TABLE"): + file.write("\n") file.write(statement + "\n") return filepath @@ -70,7 +78,11 @@ def parse_remove_comments(original_db_sql: str): """ stmts = sqlparse.format(original_db_sql, None, strip_comments=True) raw_statements = sqlparse.split(stmts) - cleaned_statements = [clean_up_sql_statement(item) for item in raw_statements if item not in [';', '#']] + cleaned_statements = [ + clean_up_sql_statement(item) + for item in raw_statements + if item not in [";", "#"] + ] return [statement for statement in cleaned_statements if statement.strip()] @@ -80,8 +92,12 @@ def remove_invalid_date_default(statement: str) -> str: :param statement: SQL statement to modify :return: modified statement """ - return re.sub(r"datetime not null default '0000-00-00 00:00:00'", "datetime DEFAULT NULL", statement, - flags=re.IGNORECASE) + return re.sub( + r"datetime not null default '0000-00-00 00:00:00'", + "datetime DEFAULT NULL", + statement, + flags=re.IGNORECASE, + ) def remove_unwanted_statements(statement: str) -> str: @@ -91,8 +107,8 @@ def remove_unwanted_statements(statement: str) -> str: :param statement: SQL statement to modify :return: an empty string if this was a CREATE or USE DATABASE statement or the original statement if not """ - if statement.lower().startswith(('create database ', 'use ', 'lock ', 'unlock ')): - return '' + if statement.lower().startswith(("create database ", "use ", "lock ", "unlock ")): + return "" else: return statement diff --git a/opendoors/step_base.py b/opendoors/step_base.py index 647cfd5..5f91d39 100644 --- a/opendoors/step_base.py +++ b/opendoors/step_base.py @@ -10,10 +10,11 @@ @dataclass -class StepInfo(): +class StepInfo: """ Data class for steps. """ + step_number: str step_description: str next_step: str @@ -24,22 +25,31 @@ class StepBase: Base for processing steps. """ - def __init__(self, config: ConfigParser, logger: Logger, sql: SqlDb, step_info: StepInfo): + def __init__( + self, config: ConfigParser, logger: Logger, sql: SqlDb, step_info: StepInfo + ): self.next_step = step_info.next_step self.sql = sql self.logger = logger self.config = config - self.code_name = config['Archive']['code_name'] + self.code_name = config["Archive"]["code_name"] self.step = step_info.step_number self.step_path = self.create_working_sub_dir() - banner = make_banner('-', f' Running Step {step_info.step_number}: {step_info.step_description} ') + banner = make_banner( + "-", + f" Running Step {step_info.step_number}: {step_info.step_description} ", + ) self.logger.info(banner) def create_working_sub_dir(self): - self.step_path = get_prefixed_path(self.step, self.config['Processing']['working_dir']) + self.step_path = get_prefixed_path( + self.step, self.config["Processing"]["working_dir"] + ) if os.path.exists(self.step_path): shutil.rmtree(self.step_path) - self.logger.info(f"Deleted existing {self.step} folder to start from scratch") + self.logger.info( + f"Deleted existing {self.step} folder to start from scratch" + ) os.makedirs(self.step_path) return self.step_path @@ -59,6 +69,8 @@ def finish(self): if self.next_step is None: self.logger.info("All steps completed.") else: - self.logger.info(f"\nStep {self.step} completed, ready for step {self.next_step}\n") - self.config['Processing']['next_step'] = self.next_step + self.logger.info( + f"\nStep {self.step} completed, ready for step {self.next_step}\n" + ) + self.config["Processing"]["next_step"] = self.next_step return True diff --git a/opendoors/tests/test_config.py b/opendoors/tests/test_config.py index 8756be4..b4177b7 100644 --- a/opendoors/tests/test_config.py +++ b/opendoors/tests/test_config.py @@ -6,11 +6,13 @@ class ConfigTest(TestCase): - @patch('builtins.input', lambda *args: 'test') - @patch('builtins.open') - @patch('opendoors.config.configparser.ConfigParser.write') + @patch("builtins.input", lambda *args: "test") + @patch("builtins.open") + @patch("opendoors.config.configparser.ConfigParser.write") def test_save(self, _mock_write, _mock_open): config = ArchiveConfig(Logger("test"), "test", "working_dir") - self.assertEqual("test", - config.config["Archive"]["code_name"], - "code_name config should be set to the provided short code for the archive") + self.assertEqual( + "test", + config.config["Archive"]["code_name"], + "code_name config should be set to the provided short code for the archive", + ) diff --git a/opendoors/tests/test_logging.py b/opendoors/tests/test_logging.py index 07a3ccc..032c8d7 100644 --- a/opendoors/tests/test_logging.py +++ b/opendoors/tests/test_logging.py @@ -11,14 +11,20 @@ def test_logger(self): self.assertEqual(20, logger.level, "log level should be INFO") self.assertEqual(6, len(handlers), "there should be 6 handlers in the Logger") - file_handler = [h for h in handlers if h.__class__.__name__ == 'FileHandler'][0] - formatter1 = file_handler.__dict__['formatter'].__dict__['_fmt'] - self.assertEqual("%(asctime)s %(levelname)s %(message)s", - formatter1, - "file formatter should include date, level and message only") + file_handler = [h for h in handlers if h.__class__.__name__ == "FileHandler"][0] + formatter1 = file_handler.__dict__["formatter"].__dict__["_fmt"] + self.assertEqual( + "%(asctime)s %(levelname)s %(message)s", + formatter1, + "file formatter should include date, level and message only", + ) - stream_handler = [h for h in handlers if h.__class__.__name__ == 'StreamHandler'][0] - formatter2 = stream_handler.__dict__['formatter'].__dict__['_fmt'] - self.assertEqual("%(log_color)s%(message)s%(reset)s", - formatter2, - "stream handler should use color formatting") + stream_handler = [ + h for h in handlers if h.__class__.__name__ == "StreamHandler" + ][0] + formatter2 = stream_handler.__dict__["formatter"].__dict__["_fmt"] + self.assertEqual( + "%(log_color)s%(message)s%(reset)s", + formatter2, + "stream handler should use color formatting", + ) diff --git a/opendoors/tests/test_progress.py b/opendoors/tests/test_progress.py index 2e52c2d..eb15743 100644 --- a/opendoors/tests/test_progress.py +++ b/opendoors/tests/test_progress.py @@ -24,8 +24,14 @@ def run(self): steps = { - '01': {'info': StepInfo(next_step='02', step_description='Step 1', step_number='01'), 'class': Step1}, - '02': {'info': StepInfo(next_step='None', step_description='Step 2', step_number='02'), 'class': Step2} + "01": { + "info": StepInfo(next_step="02", step_description="Step 1", step_number="01"), + "class": Step1, + }, + "02": { + "info": StepInfo(next_step="None", step_description="Step 2", step_number="02"), + "class": Step2, + }, } test_logger = MagicMock() @@ -34,43 +40,63 @@ def run(self): class TestProgress(TestCase): def tearDown(self) -> None: - """ Remove any files generated in test_output """ - remove_output_files('opendoors/tests/test_output/*') + """Remove any files generated in test_output""" + remove_output_files("opendoors/tests/test_output/*") # This patch responds '2' to every prompt and makes it run all the steps in turn - @patch('builtins.input', lambda *args: '2') + @patch("builtins.input", lambda *args: "2") def test_continue_from_last(self): - test_config = ArchiveConfig(test_logger, "test1", "opendoors/tests/test_output").config + test_config = ArchiveConfig( + test_logger, "test1", "opendoors/tests/test_output" + ).config continue_from_last(test_config, test_logger, test_sql, steps) - self.assertSetEqual(set("01, 02".split(', ')), set(test_config['Processing']['done_steps'].split(', ')), - "continue_from_last should update the done_steps config") + self.assertSetEqual( + set("01, 02".split(", ")), + set(test_config["Processing"]["done_steps"].split(", ")), + "continue_from_last should update the done_steps config", + ) - @patch('builtins.input', lambda *args: 'Full Archive Name') + @patch("builtins.input", lambda *args: "Full Archive Name") def test_update_done_steps(self): - test_config = ArchiveConfig(test_logger, "test2", "opendoors/tests/test_output").config + test_config = ArchiveConfig( + test_logger, "test2", "opendoors/tests/test_output" + ).config test_config["Processing"]["done_steps"] = "01, 02" done_steps = update_done_steps(test_config, ["01", "02"], "03") - self.assertEqual("01, 02, 03", done_steps, "update_done_steps should return the done steps as a string") + self.assertEqual( + "01, 02, 03", + done_steps, + "update_done_steps should return the done steps as a string", + ) - @patch('builtins.input', lambda *args: '2') + @patch("builtins.input", lambda *args: "2") def test_get_next_step_after_01(self): - test_config = ArchiveConfig(test_logger, "test3", "opendoors/tests/test_output").config + test_config = ArchiveConfig( + test_logger, "test3", "opendoors/tests/test_output" + ).config next_step, done_steps = get_next_step(test_config, "01") self.assertEqual("01", next_step, "next_step should be set to 01") self.assertEqual([], done_steps, "done_steps should be reset to empty") - @patch('builtins.input', lambda *args: '2') + @patch("builtins.input", lambda *args: "2") def test_get_next_step_after_02(self): - test_config = ArchiveConfig(test_logger, "test4", "opendoors/tests/test_output").config + test_config = ArchiveConfig( + test_logger, "test4", "opendoors/tests/test_output" + ).config test_config["Processing"]["done_steps"] = "01, 02" next_step, done_steps = get_next_step(test_config, "02") self.assertEqual("02", next_step, "next_step should be set to 02") - self.assertListEqual(test_config["Processing"]["done_steps"].split(", "), done_steps, - "done_steps should be the value currently in the config") + self.assertListEqual( + test_config["Processing"]["done_steps"].split(", "), + done_steps, + "done_steps should be the value currently in the config", + ) - @patch('builtins.input', lambda *args: '1') + @patch("builtins.input", lambda *args: "1") def test_get_next_step_with_restart(self): - test_config = ArchiveConfig(test_logger, "test5", "opendoors/tests/test_output").config + test_config = ArchiveConfig( + test_logger, "test5", "opendoors/tests/test_output" + ).config test_config["Processing"]["done_steps"] = "01, 02" next_step, done_steps = get_next_step(test_config, "02") self.assertEqual("01", next_step, "next_step should be set to 01") diff --git a/opendoors/tests/test_sql_db.py b/opendoors/tests/test_sql_db.py index eea11c8..2a1d2de 100644 --- a/opendoors/tests/test_sql_db.py +++ b/opendoors/tests/test_sql_db.py @@ -12,20 +12,32 @@ class TestSqlDb(TestCase): test_logger = Logger("test") - test_config: ConfigParser = ArchiveConfig(test_logger, "test", "opendoors/tests/test_data").config + test_config: ConfigParser = ArchiveConfig( + test_logger, "test", "opendoors/tests/test_data" + ).config test_sql = SqlDb(test_config, test_logger) def tearDown(self) -> None: - """ Remove any files generated in test_output """ - filtered = [f for f in glob.glob('opendoors/tests/test_output/*') if not re.match(r'\.keep', f)] + """Remove any files generated in test_output""" + filtered = [ + f + for f in glob.glob("opendoors/tests/test_output/*") + if not re.match(r"\.keep", f) + ] for file in filtered: os.remove(file) def test_dump_database(self): - self.test_sql.load_sql_file_into_db(get_full_path("opendoors/tests/test_data/test.sql")) - self.test_sql.dump_database("od_test_sql", get_full_path("opendoors/tests/test_output/test_output.sql")) + self.test_sql.load_sql_file_into_db( + get_full_path("opendoors/tests/test_data/test.sql") + ) + self.test_sql.dump_database( + "od_test_sql", get_full_path("opendoors/tests/test_output/test_output.sql") + ) with open(get_full_path("opendoors/tests/test_output/test_output.sql")) as f: result = f.readlines() - self.assertEqual("(1,'Name1\\'s \\\"stringé\\\"',NULL),\n", - result[14], - "all the SQL statements should be present") + self.assertEqual( + "(1,'Name1\\'s \\\"stringé\\\"',NULL),\n", + result[14], + "all the SQL statements should be present", + ) diff --git a/opendoors/tests/test_sql_utils.py b/opendoors/tests/test_sql_utils.py index 01ee5a1..54b2bc7 100644 --- a/opendoors/tests/test_sql_utils.py +++ b/opendoors/tests/test_sql_utils.py @@ -15,27 +15,41 @@ class TestSqlUtils(TestCase): ) ENGINE=MyISAM AUTO_INCREMENT=311 DEFAULT CHARSET=latin1;""", """insert into thingy_stories VALUES ("1", "thing");""", """insert into thingy_stories VALUES ("2", "thing");""", - """insert into thingy_polls VALUES ("3", "thing");""" + """insert into thingy_polls VALUES ("3", "thing");""", ] def test_extract_table_name_lower_with_underscore_and_backtick(self): result = sql_utils.extract_table_name(self.statements[0]) - self.assertEqual(result, 'thingy_stories', "the correct table name should be extracted") + self.assertEqual( + result, "thingy_stories", "the correct table name should be extracted" + ) def test_extract_table_name_upper_no_underscore_with_backtick(self): result = sql_utils.extract_table_name(self.statements[1]) - self.assertEqual(result, 'fanfictionstories', - "the original table name should be returned if it is surrounded by backticks") + self.assertEqual( + result, + "fanfictionstories", + "the original table name should be returned if it is surrounded by backticks", + ) def test_extract_table_name_upper_with_underscore_no_backtick(self): result = sql_utils.extract_table_name(self.statements[2]) - self.assertEqual(result, 'fanfiction_stories', - "the original table name should be returned if it is NOT surrounded by backticks") + self.assertEqual( + result, + "fanfiction_stories", + "the original table name should be returned if it is NOT surrounded by backticks", + ) def test_group_by_table(self): result = sql_utils.group_by_table(self.statements) self.assertIsInstance(result, dict, "group_by_table should return a dict") - self.assertIsInstance(result['fanfiction_stories'], list, - "the value of each key should be a list of statements") - self.assertEqual(3, len(result['thingy_stories']), - "the value of a key should contain the expected number of statements") + self.assertIsInstance( + result["fanfiction_stories"], + list, + "the value of each key should be a list of statements", + ) + self.assertEqual( + 3, + len(result["thingy_stories"]), + "the value of a key should contain the expected number of statements", + ) diff --git a/opendoors/tests/test_utils.py b/opendoors/tests/test_utils.py index bea8e31..7fa5691 100644 --- a/opendoors/tests/test_utils.py +++ b/opendoors/tests/test_utils.py @@ -7,44 +7,52 @@ class UtilsTest(TestCase): def test_make_banner_with_padding(self): - banner1 = make_banner('-', "TEXT", 3) - self.assertEqual("\n----------\n TEXT \n----------", banner1, - "text should be padded by the specified number of spaces") + banner1 = make_banner("-", "TEXT", 3) + self.assertEqual( + "\n----------\n TEXT \n----------", + banner1, + "text should be padded by the specified number of spaces", + ) def test_make_banner_with_default_padding(self): - banner2 = make_banner('-', "TEXT") - self.assertEqual("\n--------\n TEXT \n--------", banner2, - "text should be padded by two spaces when no padding is specified") - - @patch('opendoors.utils.os.makedirs') + banner2 = make_banner("-", "TEXT") + self.assertEqual( + "\n--------\n TEXT \n--------", + banner2, + "text should be padded by two spaces when no padding is specified", + ) + + @patch("opendoors.utils.os.makedirs") def test_set_working_dir_with_path(self, mock_makedirs): working_dir1 = set_working_dir("new_path", "archive_code") mock_makedirs.assert_called_with("new_path") - self.assertEqual("new_path", - working_dir1, - "`path` parameter should be used if supplied") + self.assertEqual( + "new_path", working_dir1, "`path` parameter should be used if supplied" + ) - @patch('builtins.input', lambda *args: '') + @patch("builtins.input", lambda *args: "") def test_set_working_dir_without_path(self): working_dir_2 = set_working_dir(None, "archive_code") - self.assertEqual(str(Path().home() / "otw_opendoors" / "archive_code"), - working_dir_2, - "a path based on the user folder should be used if no path is supplied") + self.assertEqual( + str(Path().home() / "otw_opendoors" / "archive_code"), + working_dir_2, + "a path based on the user folder should be used if no path is supplied", + ) def test_prefixed_path_no_filename(self): base_path = str(Path().home() / "otw_opendoors") test_path = get_prefixed_path("01", base_path) - full_path = str(Path().home() / "otw_opendoors" / "efiction-01") - self.assertEqual(full_path, - test_path, - "step folder should be created in lieu of filename") + full_path = str(Path().home() / "otw_opendoors" / "efiction-01") + self.assertEqual( + full_path, test_path, "step folder should be created in lieu of filename" + ) def test_prefixed_path_with_filename(self): prefix = "efiction-01" base_path = str(Path().home() / "otw_opendoors" / prefix) file_name = "test_working_open_doors.sql" test_path = get_prefixed_path("01", base_path, file_name) - full_path = str(Path().home() / "otw_opendoors" / prefix / f"{prefix}-{file_name}") - self.assertEqual(full_path, - test_path, - "filename should be prefixed with step") \ No newline at end of file + full_path = str( + Path().home() / "otw_opendoors" / prefix / f"{prefix}-{file_name}" + ) + self.assertEqual(full_path, test_path, "filename should be prefixed with step") diff --git a/opendoors/thread_pool.py b/opendoors/thread_pool.py index 41f3634..962d50c 100644 --- a/opendoors/thread_pool.py +++ b/opendoors/thread_pool.py @@ -1,5 +1,6 @@ from threading import Thread + class ThreadedPool: def __init__(self, number_of_threads: int): """ @@ -10,7 +11,7 @@ def __init__(self, number_of_threads: int): def map(self, func, work): """ - Maps threads to work to be done, does not return values returned by + Maps threads to work to be done, does not return values returned by function :param func: a callable that will be called with *args from work :param work: List of lists with args @@ -31,4 +32,3 @@ def map(self, func, work): for job in jobs: job.join() print() - diff --git a/opendoors/utils.py b/opendoors/utils.py index a950f1c..ac198b0 100644 --- a/opendoors/utils.py +++ b/opendoors/utils.py @@ -11,6 +11,7 @@ import unicodedata + def get_full_path(path): """ Return the absolute path based on the supplied fragment @@ -43,7 +44,10 @@ def check_if_file_exists(config, section, option): """ return config.has_option(section, option) and Path(config[section][option]).exists -def key_find(needle: object, haystack: Mapping[object, object], none_val: object = None) -> object: + +def key_find( + needle: object, haystack: Mapping[object, object], none_val: object = None +) -> object: """ Helper function to search a Dict (or any Mappable type) for a key, returning the value if it exists. This avoids KeyErrors when it is not certain if the key @@ -59,6 +63,7 @@ def key_find(needle: object, haystack: Mapping[object, object], none_val: object return haystack[needle] return none_val + def make_banner(border_char: chr, banner_text: str, padding=2): """ Make a banner with the provided text bordered by the provided character @@ -69,9 +74,11 @@ def make_banner(border_char: chr, banner_text: str, padding=2): """ width = len(banner_text) + (padding * 2) banner_border = border_char.ljust(width, border_char) - return f"\n{banner_border}\n" \ - f"{' ' * padding}{banner_text}{' ' * padding}\n" \ - f"{banner_border}" + return ( + f"\n{banner_border}\n" + f"{' ' * padding}{banner_text}{' ' * padding}\n" + f"{banner_border}" + ) def set_working_dir(path=None, code_name=""): @@ -84,8 +91,10 @@ def set_working_dir(path=None, code_name=""): """ if path is None: _working_dir = os.path.join(os.path.expanduser("~"), "otw_opendoors", code_name) - prompt = input("Path to working directory to use for this archive " - "(press Enter for default: {}):\n>> ".format(_working_dir)) + prompt = input( + "Path to working directory to use for this archive " + "(press Enter for default: {}):\n>> ".format(_working_dir) + ) if prompt != "": _working_dir = prompt else: @@ -116,7 +125,8 @@ def print_progress(current, total, text="stories"): """ current += 1 import sys - sys.stdout.write(f'\r{current}/{total} {text}') + + sys.stdout.write(f"\r{current}/{total} {text}") if current >= total: sys.stdout.write("\n") sys.stdout.flush() @@ -128,7 +138,7 @@ def remove_output_files(path: str): Remove all files and folders in a path - mainly useful for test cleanup :param path: the path to tidy up, relative to the root of the project """ - filtered = [f for f in glob.glob(path) if not re.match(r'\.keep', f)] + filtered = [f for f in glob.glob(path) if not re.match(r"\.keep", f)] for file in filtered: try: if Path(file).is_dir(): @@ -146,9 +156,10 @@ def normalize(text): :param text: the text to normalize :return: normalized, unescaped text """ - return unicodedata.normalize("NFKD", html.unescape(text) or '').strip() + return unicodedata.normalize("NFKD", html.unescape(text) or "").strip() + -def get_prefixed_path(step: str, path: str, filename: str=""): +def get_prefixed_path(step: str, path: str, filename: str = ""): """ Adds the efiction step prefix to filenames or folders :param step: The current step diff --git a/start.py b/start.py index d1111a0..8b9c8c4 100755 --- a/start.py +++ b/start.py @@ -13,15 +13,36 @@ from steps import step_01, step_02, step_03, step_04 steps = { - '01': {'info': StepInfo(next_step='02', step_description='Load original database', step_number='01'), - 'class': step_01.Step01}, - '02': {'info': StepInfo(next_step='03', step_description='Create simplified database', step_number='02'), - 'class': step_02.Step02}, - '03': {'info': StepInfo(next_step='04', step_description='Convert metadata to Open Doors tables', step_number='03'), - 'class': step_03.Step03}, - '04': { - 'info': StepInfo(next_step='None', step_description='Load chapters into Open Doors tables', step_number='04'), - 'class': step_04.Step04} + "01": { + "info": StepInfo( + next_step="02", step_description="Load original database", step_number="01" + ), + "class": step_01.Step01, + }, + "02": { + "info": StepInfo( + next_step="03", + step_description="Create simplified database", + step_number="02", + ), + "class": step_02.Step02, + }, + "03": { + "info": StepInfo( + next_step="04", + step_description="Convert metadata to Open Doors tables", + step_number="03", + ), + "class": step_03.Step03, + }, + "04": { + "info": StepInfo( + next_step="None", + step_description="Load chapters into Open Doors tables", + step_number="04", + ), + "class": step_04.Step04, + }, } config = configparser.ConfigParser() @@ -36,7 +57,7 @@ def save_config_and_exit(): config.save() -if __name__ == '__main__': +if __name__ == "__main__": """ Syntax: python3 start.py [CODENAME] [ROOT_PATH_TO_USE] Example: python3 start.py mvw /users/me/otw_opendoors @@ -46,13 +67,16 @@ def save_config_and_exit(): code_name = sys.argv[1] else: code_name = None - while (code_name == None or any([x not in "qwertyuiopasdfghjklzxcvbnm" for x in code_name])): # noqa: E711 + while code_name == None or any( # noqa: E711 + [x not in "qwertyuiopasdfghjklzxcvbnm" for x in code_name] + ): code_name = input( "Please provide a short, lowercase code name with no spaces or punctuation for the archive " - "you are processing (and make a note of it as you'll need it in future!):\n>> ") + "you are processing (and make a note of it as you'll need it in future!):\n>> " + ) banner_text = f"""Starting processing for archive "{code_name}"...""" - banner = make_banner('=', banner_text) + banner = make_banner("=", banner_text) path = sys.argv[2] if len(sys.argv) > 2 else None working_dir = set_working_dir(path, code_name) diff --git a/steps/tests/test_step_01.py b/steps/tests/test_step_01.py index 40ea1fb..5633f81 100644 --- a/steps/tests/test_step_01.py +++ b/steps/tests/test_step_01.py @@ -17,8 +17,12 @@ class TestStep01(TestCase): def tearDown(self) -> None: - """ Remove any files generated in test_output """ - filtered = [f for f in glob.glob('steps/tests/test_output/*') if not re.match(r'\.keep', f)] + """Remove any files generated in test_output""" + filtered = [ + f + for f in glob.glob("steps/tests/test_output/*") + if not re.match(r"\.keep", f) + ] for file in filtered: try: if Path(file).is_dir(): @@ -29,7 +33,7 @@ def tearDown(self) -> None: # We don't necessarily care that much continue - @patch('builtins.input', lambda *args: 'efiction/tests/test_data/efiction.sql') + @patch("builtins.input", lambda *args: "efiction/tests/test_data/efiction.sql") def test_run(self): step_info = StepInfo("test_output/01", "Test step 01", "02") thing = Step01(test_config, test_logger, test_sql, step_info).run() diff --git a/steps/tests/test_step_02.py b/steps/tests/test_step_02.py index b51064a..ac53d3d 100644 --- a/steps/tests/test_step_02.py +++ b/steps/tests/test_step_02.py @@ -17,8 +17,12 @@ class TestStep02(TestCase): def tearDown(self) -> None: - """ Remove any files generated in test_output """ - filtered = [f for f in glob.glob('steps/tests/test_output/*') if not re.match(r'\.keep', f)] + """Remove any files generated in test_output""" + filtered = [ + f + for f in glob.glob("steps/tests/test_output/*") + if not re.match(r"\.keep", f) + ] for file in filtered: try: if Path(file).is_dir():