Merge branch 'master' into use-pyproject-toml

kha-white · Jun 21, 2024 · a614b17 · a614b17
2 parents 7a8445c + 965d0f6
commit a614b17
Show file tree

Hide file tree

Showing 20 changed files with 586 additions and 363 deletions.
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -0,0 +1,10 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: psf/black@stable
diff --git a/manga_ocr/__main__.py b/manga_ocr/__main__.py
@@ -7,5 +7,5 @@ def main():
     fire.Fire(run)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/manga_ocr/_version.py b/manga_ocr/_version.py
@@ -1 +1 @@
-__version__ = "0.1.11"
+__version__ = "0.1.11"
diff --git a/manga_ocr/ocr.py b/manga_ocr/ocr.py
@@ -9,37 +9,45 @@
 
 
 class MangaOcr:
-    def __init__(self, pretrained_model_name_or_path='kha-white/manga-ocr-base', force_cpu=False):
-        logger.info(f'Loading OCR model from {pretrained_model_name_or_path}')
-        self.processor = ViTImageProcessor.from_pretrained(pretrained_model_name_or_path)
+    def __init__(
+        self, pretrained_model_name_or_path="kha-white/manga-ocr-base", force_cpu=False
+    ):
+        logger.info(f"Loading OCR model from {pretrained_model_name_or_path}")
+        self.processor = ViTImageProcessor.from_pretrained(
+            pretrained_model_name_or_path
+        )
         self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
-        self.model = VisionEncoderDecoderModel.from_pretrained(pretrained_model_name_or_path)
+        self.model = VisionEncoderDecoderModel.from_pretrained(
+            pretrained_model_name_or_path
+        )
 
         if not force_cpu and torch.cuda.is_available():
-            logger.info('Using CUDA')
+            logger.info("Using CUDA")
             self.model.cuda()
         elif not force_cpu and torch.backends.mps.is_available():
-            logger.info('Using MPS')
-            self.model.to('mps')
+            logger.info("Using MPS")
+            self.model.to("mps")
         else:
-            logger.info('Using CPU')
+            logger.info("Using CPU")
 
-        example_path = Path(__file__).parent / 'assets/example.jpg'
+        example_path = Path(__file__).parent / "assets/example.jpg"
         if not example_path.is_file():
-            raise FileNotFoundError(f'Missing example image {example_path}')
+            raise FileNotFoundError(f"Missing example image {example_path}")
         self(example_path)
 
-        logger.info('OCR ready')
+        logger.info("OCR ready")
 
     def __call__(self, img_or_path):
         if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
             img = Image.open(img_or_path)
         elif isinstance(img_or_path, Image.Image):
             img = img_or_path
         else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+            raise ValueError(
+                f"img_or_path must be a path or PIL.Image, instead got: {img_or_path}"
+            )
 
-        img = img.convert('L').convert('RGB')
+        img = img.convert("L").convert("RGB")
 
         x = self._preprocess(img)
         x = self.model.generate(x[None].to(self.model.device), max_length=300)[0].cpu()
@@ -53,9 +61,9 @@ def _preprocess(self, img):
 
 
 def post_process(text):
-    text = ''.join(text.split())
-    text = text.replace('…', '...')
-    text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
+    text = "".join(text.split())
+    text = text.replace("…", "...")
+    text = re.sub("[・.]{2,}", lambda x: (x.end() - x.start()) * ".", text)
     text = jaconv.h2z(text, ascii=True, digit=True)
 
     return text
diff --git a/manga_ocr/run.py b/manga_ocr/run.py
@@ -27,30 +27,33 @@ def process_and_write_results(mocr, img_or_path, write_to):
     text = mocr(img_or_path)
     t1 = time.time()
 
-    logger.info(f'Text recognized in {t1 - t0:0.03f} s: {text}')
+    logger.info(f"Text recognized in {t1 - t0:0.03f} s: {text}")
 
-    if write_to == 'clipboard':
+    if write_to == "clipboard":
         pyperclip.copy(text)
     else:
         write_to = Path(write_to)
-        if write_to.suffix != '.txt':
-            raise ValueError('write_to must be either "clipboard" or a path to a text file')
+        if write_to.suffix != ".txt":
+            raise ValueError(
+                'write_to must be either "clipboard" or a path to a text file'
+            )
 
-        with write_to.open('a', encoding="utf-8") as f:
-            f.write(text + '\n')
+        with write_to.open("a", encoding="utf-8") as f:
+            f.write(text + "\n")
 
 
 def get_path_key(path):
     return path, path.lstat().st_mtime
 
 
-def run(read_from='clipboard',
-        write_to='clipboard',
-        pretrained_model_name_or_path='kha-white/manga-ocr-base',
-        force_cpu=False,
-        delay_secs=0.1,
-        verbose=False
-        ):
+def run(
+    read_from="clipboard",
+    write_to="clipboard",
+    pretrained_model_name_or_path="kha-white/manga-ocr-base",
+    force_cpu=False,
+    delay_secs=0.1,
+    verbose=False,
+):
     """
     Run OCR in the background, waiting for new images to appear either in system clipboard, or a directory.
     Recognized texts can be either saved to system clipboard, or appended to a text file.
@@ -65,21 +68,25 @@ def run(read_from='clipboard',
 
     mocr = MangaOcr(pretrained_model_name_or_path, force_cpu)
 
-    if sys.platform not in ('darwin', 'win32') and write_to == 'clipboard':
+    if sys.platform not in ("darwin", "win32") and write_to == "clipboard":
         # Check if the system is using Wayland
         import os
-        if os.environ.get('WAYLAND_DISPLAY'):
+
+        if os.environ.get("WAYLAND_DISPLAY"):
             # Check if the wl-clipboard package is installed
             if os.system("which wl-copy > /dev/null") == 0:
                 pyperclip.set_clipboard("wl-clipboard")
             else:
-                msg = 'Your session uses wayland and does not have wl-clipboard installed. ' \
-                    'Install wl-clipboard for write in clipboard to work.'
+                msg = (
+                    "Your session uses wayland and does not have wl-clipboard installed. "
+                    "Install wl-clipboard for write in clipboard to work."
+                )
                 raise NotImplementedError(msg)
 
-    if read_from == 'clipboard':
+    if read_from == "clipboard":
         from PIL import ImageGrab
-        logger.info('Reading from clipboard')
+
+        logger.info("Reading from clipboard")
 
         img = None
         while True:
@@ -95,19 +102,25 @@ def run(read_from='clipboard',
                     # Pillow error when clipboard contains text (Linux, X11)
                     pass
                 else:
-                    logger.warning('Error while reading from clipboard ({})'.format(error))
+                    logger.warning(
+                        "Error while reading from clipboard ({})".format(error)
+                    )
             else:
-                if isinstance(img, Image.Image) and not are_images_identical(img, old_img):
+                if isinstance(img, Image.Image) and not are_images_identical(
+                    img, old_img
+                ):
                     process_and_write_results(mocr, img, write_to)
 
             time.sleep(delay_secs)
 
     else:
         read_from = Path(read_from)
         if not read_from.is_dir():
-            raise ValueError('read_from must be either "clipboard" or a path to a directory')
+            raise ValueError(
+                'read_from must be either "clipboard" or a path to a directory'
+            )
 
-        logger.info(f'Reading from directory {read_from}')
+        logger.info(f"Reading from directory {read_from}")
 
         old_paths = set()
         for path in read_from.iterdir():
@@ -123,12 +136,12 @@ def run(read_from='clipboard',
                         img = Image.open(path)
                         img.load()
                     except (UnidentifiedImageError, OSError) as e:
-                        logger.warning(f'Error while reading file {path}: {e}')
+                        logger.warning(f"Error while reading file {path}: {e}")
                     else:
                         process_and_write_results(mocr, img, write_to)
 
             time.sleep(delay_secs)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     fire.Fire(run)
diff --git a/manga_ocr_dev/data/generate_backgrounds.py b/manga_ocr_dev/data/generate_backgrounds.py
@@ -47,8 +47,8 @@ def find_rectangle(mask, y, x, aspect_ratio_range=(0.33, 3.0)):
 
 
 def generate_backgrounds(crops_per_page=5, min_size=40):
-    data = pd.read_csv(MANGA109_ROOT / 'data.csv')
-    frames_df = pd.read_csv(MANGA109_ROOT / 'frames.csv')
+    data = pd.read_csv(MANGA109_ROOT / "data.csv")
+    frames_df = pd.read_csv(MANGA109_ROOT / "frames.csv")
 
     BACKGROUND_DIR.mkdir(parents=True, exist_ok=True)
 
@@ -57,11 +57,11 @@ def generate_backgrounds(crops_per_page=5, min_size=40):
         page = cv2.imread(str(MANGA109_ROOT / page_path))
         mask = np.zeros((page.shape[0], page.shape[1]), dtype=bool)
         for row in data[data.page_path == page_path].itertuples():
-            mask[row.ymin:row.ymax, row.xmin:row.xmax] = True
+            mask[row.ymin : row.ymax, row.xmin : row.xmax] = True
 
         frames_mask = np.zeros((page.shape[0], page.shape[1]), dtype=bool)
         for row in frames_df[frames_df.page_path == page_path].itertuples():
-            frames_mask[row.ymin:row.ymax, row.xmin:row.xmax] = True
+            frames_mask[row.ymin : row.ymax, row.xmin : row.xmax] = True
 
         mask = mask | ~frames_mask
 
@@ -76,10 +76,12 @@ def generate_backgrounds(crops_per_page=5, min_size=40):
             crop = page[ymin:ymax, xmin:xmax]
 
             if crop.shape[0] >= min_size and crop.shape[1] >= min_size:
-                out_filename = '_'.join(
-                    Path(page_path).with_suffix('').parts[-2:]) + f'_{ymin}_{ymax}_{xmin}_{xmax}.png'
+                out_filename = (
+                    "_".join(Path(page_path).with_suffix("").parts[-2:])
+                    + f"_{ymin}_{ymax}_{xmin}_{xmax}.png"
+                )
                 cv2.imwrite(str(BACKGROUND_DIR / out_filename), crop)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     generate_backgrounds()