Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OD-1730: Switch to Ruff #38

Merged
merged 5 commits into from
Nov 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/python-app-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ jobs:
shell: bash
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
- name: Lint with ruff
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
ruff check .
- name: Check formatting with ruff
run : |
ruff format --check .
- name: Test with pytest
run: |
pytest
12 changes: 6 additions & 6 deletions .github/workflows/python-app-macos-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ jobs:
shell: bash
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
- name: Lint with ruff
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
ruff check .
- name: Check formatting with ruff
run : |
ruff format --check .
- name: Test with pytest - exclude MySQL integration tests
run: |
pytest --ignore "opendoors/tests/test_sql_db.py" --ignore "efiction/tests/test_metadata.py"
201 changes: 129 additions & 72 deletions efiction/chapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,33 @@

from opendoors.mysql import SqlDb
from opendoors.big_insert import BigInsert
from opendoors.utils import get_full_path, normalize, print_progress, make_banner, key_find, get_prefixed_path
from opendoors.utils import (
get_full_path,
normalize,
print_progress,
make_banner,
key_find,
get_prefixed_path,
)


class EFictionChapters:
"""
Process chapter contents and move them into the Open Doors working database.
"""

def __init__(self, config: ConfigParser, logger: Logger, sql: SqlDb = None):
self.sql = sql
self.config = config
self.logger = logger
self.working_original = self.config['Processing']['simplified_original_db']
self.chapters_table = sql.read_table_to_dict(self.config['Processing']['simplified_original_db'], "chapters")
self.working_open_doors = self.config['Processing']['open_doors_working_db']
self.working_original = self.config["Processing"]["simplified_original_db"]
self.chapters_table = sql.read_table_to_dict(
self.config["Processing"]["simplified_original_db"], "chapters"
)
self.working_open_doors = self.config["Processing"]["open_doors_working_db"]

def _are_chapters_in_table(self) -> bool:
return len([c for c in self.chapters_table if c['storytext']]) > 0
return len([c for c in self.chapters_table if c["storytext"]]) > 0

@staticmethod
def __file_with_path(dirpath, subdir, filename):
Expand All @@ -35,9 +45,9 @@ def __file_with_path(dirpath, subdir, filename):
:return: A dict containing metadata about the chapter based on its file path
"""
return {
'path': os.path.join(dirpath, filename),
'chap_id': Path(filename).stem,
'author_id': subdir
"path": os.path.join(dirpath, filename),
"chap_id": Path(filename).stem,
"author_id": subdir,
}

def load_chapter_text_into_db(self, chapter_paths: List[dict]):
Expand All @@ -49,33 +59,39 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]):
warnings = 0
forced_continue = False
self.logger.info("...loading data from chapters table...")
old_chapters, current, total = self.sql.read_table_with_total(self.working_original, "chapters")
old_chapters, current, total = self.sql.read_table_with_total(
self.working_original, "chapters"
)

self.logger.info("...removing rows from existing chapters table...")
self.sql.execute(self.working_open_doors, "TRUNCATE TABLE chapters;")

self.logger.info("...loading text from chapter files...")
insert_op = BigInsert(
self.working_open_doors,
"chapters",
["id", "position", "title", "text", "story_id", "notes"],
self.sql
)
self.working_open_doors,
"chapters",
["id", "position", "title", "text", "story_id", "notes"],
self.sql,
)
try:
encoding = self.config['Archive']['encoding']
encoding = self.config["Archive"]["encoding"]
except KeyError:
encoding = None
if encoding is None:
message_string = """
message_string = (
"""
You have not specified any character encoding in the config file!

If you are unsure which encoding is used in the backup
""".strip() + (
f""", please run the mojibake tool:
""".strip()
+ (
f""", please run the mojibake tool:

mojibake {self.config['Archive']['chapter_path']}

""" if shutil.which('mojibake') is not None else f"""
"""
if shutil.which("mojibake") is not None
else f"""
, you can install the
mojibake tool from its repository:

Expand All @@ -86,49 +102,64 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]):
mojibake {self.config['Archive']['chapter_path']}

""".strip()
)
)
)
print(message_string)
while encoding is None:
encoding_text = input("Enter a valid encoding (press enter for utf8): ")
if encoding_text == "":
encoding_text = 'utf8'
encoding_text = "utf8"
try:
# check if encoding is valid
''.encode(encoding_text)
"".encode(encoding_text)
encoding = encoding_text
except:
except LookupError:
print(f"{encoding_text} is not a valid encoding, try again")
for old_chapter in old_chapters:
chapid = old_chapter['chapid']
chapter = [chapter_path for chapter_path in chapter_paths if chapter_path['chap_id'] == str(chapid)]
chapid = old_chapter["chapid"]
chapter = [
chapter_path
for chapter_path in chapter_paths
if chapter_path["chap_id"] == str(chapid)
]
if chapter:
file = chapter[0]['path']
with open(file, 'rb') as raw_chapter:
file = chapter[0]["path"]
with open(file, "rb") as raw_chapter:
raw = raw_chapter.read()
while isinstance(raw, bytes):
try:
raw = raw.decode(encoding=encoding)
except UnicodeDecodeError as e:
error = f"Failed to decode {file}\n"
line_num = raw[:e.start].decode(encoding).count("\n")
line_num = raw[: e.start].decode(encoding).count("\n")
error += f"At line {line_num}:\t{str(e)}\n"
error += "--\t" + str(raw[max(e.start - 40, 0):e.end + 30]) + "\n"
error += (
"--\t"
+ str(raw[max(e.start - 40, 0) : e.end + 30])
+ "\n"
)
# print `^` under the offending byte
error += "\t" + \
" " * (len(str(raw[max(e.start - 40, 0):e.start])) - 1) + \
"^" * (len(str(raw[e.start:e.end])) - 3) + "\n"
error += (
"\t"
+ " "
* (len(str(raw[max(e.start - 40, 0) : e.start])) - 1)
+ "^" * (len(str(raw[e.start : e.end])) - 3)
+ "\n"
)
error += "Will be converted to:\n"
# remove the offending bytes (usually one)
raw = raw[:e.start] + raw[e.end:]
error += "++\t " + raw[
max(e.start - 40, 0):
e.end + 30
].decode(encoding, errors='ignore') \
.replace("\n", "\\n") \
.replace("\r", "\\r") + "\n"
raw = raw[: e.start] + raw[e.end :]
error += (
"++\t "
+ raw[max(e.start - 40, 0) : e.end + 30]
.decode(encoding, errors="ignore")
.replace("\n", "\\n")
.replace("\r", "\\r")
+ "\n"
)
self.logger.warning(error)
warnings += 1
if warnings > len(old_chapters) * .3 and not forced_continue:
if warnings > len(old_chapters) * 0.3 and not forced_continue:
msg = f"""
A total of {warnings} automatic modifications have been performed so far!

Expand All @@ -147,28 +178,35 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]):
raise Exception("Process aborted, too many errors!")

text = normalize(raw)
if key_find('endnotes', old_chapter):
if key_find("endnotes", old_chapter):
text = text + f"\n\n\n<hr>\n{old_chapter['endnotes']}"

insert_op.addRow(
chapid,
old_chapter['inorder'],
old_chapter['title'],
old_chapter["inorder"],
old_chapter["title"],
text,
old_chapter['sid'],
old_chapter['notes']
old_chapter["sid"],
old_chapter["notes"],
)
current = print_progress(current, total, "chapters converted")
# If there were any errors, display a warning for the user to check the affected chapters
if warnings >= 1:
self.logger.warning("If the character deletion is unacceptable please quit this processor and use the mojibake tool,"
" then restart the processor from step 04")
self.logger.warning(
"If the character deletion is unacceptable please quit this processor and use the mojibake tool,"
" then restart the processor from step 04"
)
self.logger.error(
make_banner('-',
f"There were {warnings} warnings; check the affected chapters listed above to make sure curly quotes "
"and accented characters are correctly displayed."))
make_banner(
"-",
f"There were {warnings} warnings; check the affected chapters listed above to make sure curly quotes "
"and accented characters are correctly displayed.",
)
)
insert_op.send()
return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM chapters;")
return self.sql.execute_and_fetchall(
self.working_open_doors, "SELECT * FROM chapters;"
)

def list_chapter_files(self):
"""
Expand All @@ -177,10 +215,21 @@ def list_chapter_files(self):
"""
self.logger.info("Loading chapters from the filesystem...")
chapter_paths = []
for dirpath, dirnames, filenames in os.walk(get_full_path(self.config['Archive']['chapter_path'])):
for dirpath, dirnames, filenames in os.walk(
get_full_path(self.config["Archive"]["chapter_path"])
):
subdir = dirpath.split(os.path.sep)[-1]
if subdir and subdir != self.config['Archive']['chapter_path'].split(os.path.sep)[-1]:
chapter_paths.extend([self.__file_with_path(dirpath, subdir, filename) for filename in filenames])
if (
subdir
and subdir
!= self.config["Archive"]["chapter_path"].split(os.path.sep)[-1]
):
chapter_paths.extend(
[
self.__file_with_path(dirpath, subdir, filename)
for filename in filenames
]
)
return chapter_paths

def load_og_chapters_into_db(self):
Expand All @@ -189,55 +238,63 @@ def load_og_chapters_into_db(self):
:return:
"""
self.logger.info("...loading data from chapters table...")
old_chapters, current, total = self.sql.read_table_with_total(self.working_original, "chapters")
old_chapters, current, total = self.sql.read_table_with_total(
self.working_original, "chapters"
)

self.logger.info("...removing rows from existing chapters table...")
self.sql.execute(self.working_open_doors, "TRUNCATE TABLE chapters;")

self.logger.info("...loading chapters from original chapters table...")
insert_op = BigInsert(
self.working_open_doors,
"chapters",
["id", "position", "title", "text", "story_id", "notes"],
self.sql
)
self.working_open_doors,
"chapters",
["id", "position", "title", "text", "story_id", "notes"],
self.sql,
)

for old_chapter in old_chapters:
text = normalize(old_chapter['storytext'])
if key_find('endnotes', old_chapter):
text = normalize(old_chapter["storytext"])
if key_find("endnotes", old_chapter):
text = text + f"\n\n\n<hr>\n{old_chapter['endnotes']}"

insert_op.addRow(
old_chapter['chapid'],
old_chapter['inorder'],
old_chapter['title'],
old_chapter["chapid"],
old_chapter["inorder"],
old_chapter["title"],
text,
old_chapter['sid'],
old_chapter['notes']
old_chapter["sid"],
old_chapter["notes"],
)

current = print_progress(current, total, "chapters converted")

insert_op.send()
return self.sql.execute_and_fetchall(self.working_open_doors, "SELECT * FROM chapters;")
return self.sql.execute_and_fetchall(
self.working_open_doors, "SELECT * FROM chapters;"
)

def load_chapters(self, step_path: str):
"""
Check if chapters are already present in the database and if not, load them from the filesystem
:return:
"""
if self._are_chapters_in_table():
self.logger.info("Chapters are already present in the original database, converting now")
self.logger.info(
"Chapters are already present in the original database, converting now"
)
self.load_og_chapters_into_db()
else:
if not self.config.has_option('Archive', 'chapter_path'):
if not self.config.has_option("Archive", "chapter_path"):
chapter_path = input("Full path to chapter files\n>> ")
self.config['Archive']['chapter_path'] = os.path.normpath(chapter_path)
self.config["Archive"]["chapter_path"] = os.path.normpath(chapter_path)

chapter_paths = self.list_chapter_files()
self.load_chapter_text_into_db(chapter_paths)

database_dump = get_prefixed_path("04", step_path, f"{self.working_open_doors}.sql")
database_dump = get_prefixed_path(
"04", step_path, f"{self.working_open_doors}.sql"
)
self.logger.info(f"Exporting converted tables to {database_dump}...")
self.sql.dump_database(self.working_open_doors, database_dump)
return True
Loading
Loading