Fix most of PyLint issues

ilyannn · Jan 10, 2024 · eeb6fbc · eeb6fbc
1 parent e524e40
commit eeb6fbc
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 20 deletions.
diff --git a/redact.py b/redact.py
@@ -5,7 +5,7 @@
 from collections import deque
 from glob import glob
 from os import makedirs
-from os.path import abspath, commonpath, dirname, join, relpath, splitext, isfile
+from os.path import abspath, commonpath, dirname, isfile, join, relpath, splitext
 from typing import Optional, Tuple
 from urllib.parse import urlparse
 
@@ -22,18 +22,14 @@
 tokenizer = tiktoken.encoding_for_model("gpt-4")
 
 
-def token_length(value):
-    return len(tokenizer.encode(value))
-
-
 def as_file_destination(dest: str, source: str, base: str) -> Optional[str]:
     """
     Compute a destination path to a file, relative to a base directory.
 
     :param dest: The destination path to convert, e.g. '../some/path/to/file'
     :param source: The source path to derive the base directory from, e.g. '/home/folder'
     :param base: The base directory for all the directories, e.g. '/home'
-    :return: The converted file path relative to `base`, or None if the file is outside the base directory.
+    :return: Converted file path relative to `base`; None if the file is outside the base directory
     """
     try:
         url = urlparse(dest)
@@ -79,8 +75,8 @@ def is_a_secret(key, value):
     - a sequence with a digit
     - of length at least 10
     - which is either
-       - hinted with the key containing one of two strings
-       - or is part of the value separated by the whitespace which looks random for ChatGPT tokenizer
+       - hinted with the key containing one of two strings OR
+       - is part of the value separated by the whitespace which looks random for ChatGPT tokenizer
     """
     key = key.lower()
     return (
@@ -93,11 +89,13 @@ def is_a_secret(key, value):
 def redact_text(text, file_ext) -> Tuple[str, int]:
     """
     :param text: The input text to be redacted. It can be a multiline string.
-    :param file_ext: The file extension specifying the format of the input text (e.g., '.yaml', '.txt').
+    :param file_ext: The file extension for the format of the input text (e.g., '.yaml', '.txt').
     :return: A tuple containing the redacted text (str) and the count of redacted lines (int).
 
-    This method iterates through the key/value pairs (according to the rules specific for the given file extension) of
-    the input text and redacts anything that looks like sensitive information found in the values with "REDACTED".
+    This method iterates through the key/value pairs (according to the rules specific for the given
+    file extension) of the input text and redacts anything that looks like sensitive information
+    found in the values with "REDACTED".
+
     The resulting text is returned along with the count of redacted values.
     """
     count_redacted = 0
@@ -142,11 +140,11 @@ def create_and_write(out_dir, filename, text):
     """
     output_file = join(out_dir, filename)
     makedirs(dirname(output_file), exist_ok=True)
-    with open(output_file, "wt") as output_stream:
+    with open(output_file, "wt", encoding="utf-8") as output_stream:
         output_stream.write(text)
 
 
-class ProcessingMessage(object):
+class ProcessingMessage:
     """A class for producing nicely formatted "processing xxx... done" messages."""
 
     def __init__(self, file_name):
@@ -160,9 +158,9 @@ def __enter__(self):
         click.echo(message, nl=False)
         return self
 
-    def __exit__(self, type, value, traceback):
+    def __exit__(self, exception_type, value, traceback):
         message = (
-            click.style("error", fg="red") if type else click.style("done", "green")
+            click.style("error", fg="red") if exception_type else click.style("done", "green")
         )
         click.echo(message)
 
@@ -184,7 +182,7 @@ def redact(in_dir, out_dir):
     for md_file in glob("**/*.md", recursive=True, root_dir=in_dir):
         with ProcessingMessage(md_file):
             found_links = 0
-            with open(join(in_dir, md_file), "rt") as input_stream:
+            with open(join(in_dir, md_file), "rt", encoding="utf-8") as input_stream:
                 text = input_stream.read()
             document = markdown.parse(text)
             queue = deque([document])
@@ -207,7 +205,7 @@ def redact(in_dir, out_dir):
             with ProcessingMessage(value_file):
                 fullname = join(in_dir, value_file)
                 if isfile(fullname):
-                    with open(fullname, "rt") as input_stream:
+                    with open(fullname, "rt", encoding="utf-8") as input_stream:
                         text = input_stream.read()
                     out_text, found_secrets = redact_text(text, splitext(value_file)[1])
                     click.echo(

diff --git a/to_markdown.py b/to_markdown.py
@@ -2,7 +2,7 @@
 """
 Convert all files into Markdown format.
 
-Activates syntax highlighting for all code and add a preamble to every file, including Markdown files.
+Activates syntax highlighting for all code and adds a preamble to every file, including Markdown.
 This is the format consumed by the Zola Static Site Generator.
 """
 
@@ -11,7 +11,6 @@
 from typing import Iterable
 
 import click
-
 from redact import ProcessingMessage, create_and_write
 
 LANGUAGE_BY_EXTENSION = {
@@ -22,10 +21,12 @@
 
 
 def zola_preamble(title) -> Iterable[str]:
+    """Adds a preamble of the form necessary for Zola"""
     return "+++", f"  title = '{title}'", "+++"
 
 
 def md_code(lang, text) -> Iterable[str]:
+    """Outputs a code block as Markdown"""
     return f"```{lang}", text, "```"
 
 
@@ -48,7 +49,7 @@ def to_markdown(in_dir, out_dir):
                 _, split2 = splitext(filename)
                 ext = split2.lower()[1:] if split2 and split2[0] == "." else ""
 
-                with open(fullname, "rt") as input_stream:
+                with open(fullname, "rt", encoding="utf-8") as input_stream:
                     text = input_stream.read()
 
                 if ext == "md":