Skip to content

Commit

Permalink
Merge pull request #48 from hnesk/performance
Browse files Browse the repository at this point in the history
Performance improvements
  • Loading branch information
hnesk authored Jun 15, 2022
2 parents 7430ca1 + c17a3b9 commit c84e55c
Show file tree
Hide file tree
Showing 16 changed files with 109 additions and 63 deletions.
10 changes: 7 additions & 3 deletions ocrd_browser/application.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from gi.repository import Gio, Gtk, GLib, Gdk

import pkg_resources
from typing import List

from ocrd_browser.util.gtk import ActionRegistry
from ocrd_browser.ui import MainWindow, AboutDialog, OpenDialog
from ocrd_browser.view import ViewRegistry

try:
from importlib.metadata import entry_points
except ModuleNotFoundError:
from importlib_metadata import entry_points # type: ignore


class OcrdBrowserApplication(Gtk.Application):
# TODO: Parse arguments (with Gtk or click) to open certain views by mets+page_id+view(view_configuration_dict) and deep filename e.g.
Expand Down Expand Up @@ -36,7 +40,7 @@ def do_startup(self) -> None:
self.set_accels_for_action('view.zoom_to::width', ['<Ctrl>numbersign'])
self.set_accels_for_action('view.zoom_to::page', ['<Ctrl><Alt>numbersign'])

for entry_point in pkg_resources.iter_entry_points('ocrd_browser_ext'):
for entry_point in entry_points().get('ocrd_browser_ext', []):
(entry_point.load())(self)

self.load_css()
Expand Down Expand Up @@ -65,7 +69,7 @@ def on_quit(self, _action: Gio.SimpleAction, _param: str = None) -> None:
open_windows: int = 0
window: MainWindow
for window in self.get_windows():
if isinstance(window, MainWindow) and window.close_confirm(): # type: ignore[unreachable]
if isinstance(window, MainWindow) and window.close_confirm():
window.destroy()
else:
open_windows += 1
Expand Down
21 changes: 20 additions & 1 deletion ocrd_browser/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#!/usr/bin/env python3
# -*- Mode: Python; coding: utf-8; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*-
import os
import sys
import pstats
import io
import cProfile

import gi

Expand All @@ -13,7 +17,7 @@

gi.require_version('WebKit2', '4.0')

from gi.repository import Gtk, Gio # noqa: E402
from gi.repository import Gtk, Gio, GLib # noqa: E402
from pathlib import Path # noqa: E402
from typing import Type # noqa: E402
from types import TracebackType # noqa: E402
Expand All @@ -22,6 +26,11 @@
resources = Gio.resource_load(str(BASE_PATH / "ui.gresource"))
Gio.resources_register(resources)

PROFILER = None
if 'STARTUP_PROFILE' in os.environ:
PROFILER = cProfile.Profile()
PROFILER.enable()


def install_excepthook() -> None:
""" Make sure we exit when an unhandled exception occurs. """
Expand All @@ -36,7 +45,17 @@ def new_hook(type_: Type[BaseException], value: BaseException, traceback: Traceb
sys.excepthook = new_hook


def startup_time() -> None:
PROFILER.disable()
s = io.StringIO()
ps = pstats.Stats(PROFILER, stream=s).sort_stats(pstats.SortKey.TIME)
ps.print_stats(20)
print(s.getvalue())


def main() -> None:
if PROFILER:
GLib.idle_add(startup_time)
from ocrd_utils import initLogging
initLogging()
from ocrd_browser.application import OcrdBrowserApplication
Expand Down
34 changes: 19 additions & 15 deletions ocrd_browser/model/document.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,39 @@
from __future__ import annotations
from typing import Optional, Tuple, List, Set, Union, cast, Callable, Any, Dict, TYPE_CHECKING

import atexit
import errno
import os
import shutil
from functools import wraps

from ocrd import Workspace, Resolver
from ocrd import Resolver
from ocrd_browser.model.page import Page
from ocrd_browser.util.file_groups import best_file_group
from ocrd_browser.util.image import add_dpi_to_png_buffer
from ocrd_browser.util.streams import SilencedStreams
from ocrd_modelfactory import page_from_file
from ocrd_models import OcrdFile
from ocrd_models.ocrd_page_generateds import PcGtsType
from ocrd_models.constants import NAMESPACES as NS
from ocrd_models import OcrdFile
from ocrd_utils import pushd_popd
from ocrd_utils.constants import MIME_TO_EXT
from ocrd_utils import getLogger

from typing import Optional, Tuple, List, Set, Union, cast, Callable, Any, Dict
from collections import OrderedDict
from pathlib import Path
from tempfile import mkdtemp
from datetime import datetime
from urllib.parse import urlparse, unquote
# noinspection PyProtectedMember
from lxml.etree import ElementBase as Element, _ElementTree as ElementTree

from numpy import array as ndarray
from PIL import Image

import cv2

if TYPE_CHECKING:
from ocrd import Workspace
from ocrd_models.ocrd_page_generateds import PcGtsType
# noinspection PyProtectedMember
from lxml.etree import ElementBase as Element, _ElementTree as ElementTree
from numpy import array as ndarray

EventCallBack = Optional[Callable[[str, Any], None]]


Expand Down Expand Up @@ -60,11 +63,11 @@ def __init__(self, workspace: Optional[Workspace], emitter: Optional[EventCallBa
os.chdir(self.workspace.directory)

@classmethod
def create(cls, emitter: EventCallBack = None) -> 'Document':
def create(cls, emitter: EventCallBack = None) -> Document:
return cls(None, emitter=emitter)

@classmethod
def load(cls, mets_url: Union[Path, str] = None, emitter: EventCallBack = None) -> 'Document':
def load(cls, mets_url: Union[Path, str] = None, emitter: EventCallBack = None) -> Document:
"""
Load a project from an url as a readonly view
Expand All @@ -80,7 +83,7 @@ def load(cls, mets_url: Union[Path, str] = None, emitter: EventCallBack = None)
return doc

@classmethod
def clone(cls, mets_url: Union[Path, str], emitter: EventCallBack = None, editable: bool = True) -> 'Document':
def clone(cls, mets_url: Union[Path, str], emitter: EventCallBack = None, editable: bool = True) -> Document:
"""
Clones a project (mets.xml and all used files) to a temporary directory for editing
"""
Expand Down Expand Up @@ -159,7 +162,7 @@ def baseurl_mets(self) -> str:
"""
return str(self.workspace.baseurl) + '/' + self.mets_filename if self.workspace else None

def path(self, other: Union[OcrdFile, Path, str]) -> Path:
def path(self, other: Union[OcrdFile, Path, str]) -> Optional[Path]:
"""
Resolves other relative to current workspace
"""
Expand Down Expand Up @@ -222,7 +225,7 @@ def file_groups_and_mimetypes(self) -> List[Tuple[str, str]]:
@return: List[Tuple[str,str]]
"""
distinct_groups: OrderedDict[Tuple[str, str], None] = OrderedDict()
distinct_groups: Dict[Tuple[str, str], None] = {}
for el in self.xpath('mets:fileSec/mets:fileGrp[@USE]/mets:file[@MIMETYPE]'):
distinct_groups[(el.getparent().get('USE'), el.get('MIMETYPE'))] = None

Expand Down Expand Up @@ -407,7 +410,7 @@ def delete_page(self, page_id: str) -> None:

@check_editable
def add_image(self, image: ndarray, page_id: str, file_id: str, file_group: str = 'OCR-D-IMG', dpi: int = 300,
mimetype: str = 'image/png') -> 'OcrdFile':
mimetype: str = 'image/png') -> OcrdFile:
extension = MIME_TO_EXT[mimetype]
retval, image_array = cv2.imencode(extension, image)
image_bytes = add_dpi_to_png_buffer(image_array.tostring(), dpi)
Expand Down Expand Up @@ -442,6 +445,7 @@ def editable(self, editable: bool) -> None:
if self._original_url:
self.workspace = self._clone_workspace(self._original_url)
else:
# noinspection PyTypeChecker
self.workspace = Resolver().workspace_from_nothing(directory=None, mets_basename='mets.xml')
else:
self.workspace = Resolver().workspace_from_url(self.baseurl_mets)
Expand Down
1 change: 1 addition & 0 deletions ocrd_browser/model/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def file(self) -> Optional[OcrdFile]:
elif self.image_files:
return next(iter(self.image_files))
else:
# noinspection PyTypeChecker
any_files = self.get_files(mimetype=None)
if any_files:
return next(iter(any_files))
Expand Down
9 changes: 5 additions & 4 deletions ocrd_browser/ui/dialogs.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from gi.repository import Gtk, GdkPixbuf
from typing import Any
from pkg_resources import resource_filename

from ocrd_browser.util.gtk import resource_string
from ocrd_browser import __version__
from ocrd_browser.model import Document


@Gtk.Template(filename=resource_filename(__name__, '../resources/about-dialog.ui'))
@Gtk.Template(string=resource_string('about-dialog.ui'))
class AboutDialog(Gtk.AboutDialog):
__gtype_name__ = "AboutDialog"

Expand All @@ -16,7 +17,7 @@ def __init__(self, **kwargs: Any):
self.set_version(__version__)


@Gtk.Template(filename=resource_filename(__name__, '../resources/open-dialog.ui'))
@Gtk.Template(string=resource_string('open-dialog.ui'))
class OpenDialog(Gtk.FileChooserDialog):
__gtype_name__ = "OpenDialog"

Expand All @@ -36,7 +37,7 @@ def __init__(self, **kwargs: Any):
self.add_filter(filter_any)


@Gtk.Template(filename=resource_filename(__name__, '../resources/save-dialog.ui'))
@Gtk.Template(string=resource_string('save-dialog.ui'))
class SaveDialog(Gtk.FileChooserDialog):
__gtype_name__ = "SaveDialog"

Expand Down
4 changes: 2 additions & 2 deletions ocrd_browser/ui/page_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

from typing import List, Callable, Optional, Any, cast

from pkg_resources import resource_filename
from ocrd_browser.util.gtk import resource_string
from ocrd_browser.model import Document
from .page_store import PageListStore, ChangeList


@Gtk.Template(filename=resource_filename(__name__, '../resources/page-list.ui'))
@Gtk.Template(string=resource_string('page-list.ui'))
class PagePreviewList(Gtk.IconView):
__gtype_name__ = "PagePreviewList"

Expand Down
6 changes: 2 additions & 4 deletions ocrd_browser/ui/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,20 @@

from ocrd_browser.model import Document
from ocrd_browser.view import ViewRegistry, ViewPage
from ocrd_browser.util.gtk import ActionRegistry
from ocrd_browser.util.gtk import ActionRegistry, resource_string
from .dialogs import SaveDialog, SaveChangesDialog
from .page_browser import PagePreviewList
from pkg_resources import resource_filename
from typing import List, cast, Any, Optional

from ..view.manager import ViewManager


@Gtk.Template(filename=resource_filename(__name__, '../resources/main-window.ui'))
@Gtk.Template(string=resource_string('main-window.ui'))
class MainWindow(Gtk.ApplicationWindow):
__gtype_name__ = "MainWindow"

header_bar: Gtk.HeaderBar = Gtk.Template.Child()
page_list_scroller: Gtk.ScrolledWindow = Gtk.Template.Child()
panes: Gtk.Paned = Gtk.Template.Child()
current_page_label: Gtk.Label = Gtk.Template.Child()
view_container: Gtk.Box = Gtk.Template.Child()
view_menu_box: Gtk.Box = Gtk.Template.Child()
Expand Down
16 changes: 8 additions & 8 deletions ocrd_browser/util/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import annotations
import os
from collections import OrderedDict
from configparser import ConfigParser
import shlex
from typing import List, Optional, MutableMapping
Expand All @@ -13,7 +13,7 @@

class _SubSettings:
@classmethod
def from_section(cls, _name: str, section: ConfigDict) -> '_SubSettings':
def from_section(cls, _name: str, section: ConfigDict) -> _SubSettings:
raise NotImplementedError('please override from_section')

def validate(self) -> None:
Expand All @@ -28,7 +28,7 @@ def __init__(self, preferred_images: List[str]):
self.preferred_images = preferred_images

@classmethod
def from_section(cls, _name: str, section: ConfigDict) -> '_FileGroups':
def from_section(cls, _name: str, section: ConfigDict) -> _FileGroups:
preferred_images = section.get('preferredImages', 'OCR-D-IMG, OCR-D-IMG.*')
return cls(
[grp.strip() for grp in preferred_images.split(',')]
Expand All @@ -50,7 +50,7 @@ def validate(self) -> None:
raise ValueError('Could not locate executable "{}"'.format(executable))

@classmethod
def from_section(cls, name: str, section: ConfigDict) -> '_Tool':
def from_section(cls, name: str, section: ConfigDict) -> _Tool:
return cls(
name[len(cls.PREFIX):],
section['commandline'],
Expand All @@ -66,7 +66,7 @@ def __init__(self, config: ConfigParser, validate: bool = True):
self.file_groups = _FileGroups.from_section('FileGroups', config['FileGroups'] if 'FileGroups' in config else {'': ''})
if validate:
self.file_groups.validate()
self.tools = OrderedDict()
self.tools = {}
for name, section in config.items():
if name.startswith(_Tool.PREFIX):
tool = _Tool.from_section(name, section)
Expand All @@ -78,13 +78,13 @@ def __repr__(self) -> str:
return '{}({})'.format(self.__class__.__name__, repr(vars(self)))

@classmethod
def get(cls) -> 'Settings':
def get(cls) -> Settings:
if cls._settings is None:
cls._settings = Settings.build_default()
return cls._settings

@classmethod
def build_default(cls, config_dirs: Optional[List[str]] = None, validate: bool = True) -> 'Settings':
def build_default(cls, config_dirs: Optional[List[str]] = None, validate: bool = True) -> Settings:
if config_dirs is None:
config_dirs = GLib.get_system_config_dirs() + [GLib.get_user_config_dir()]
try:
Expand All @@ -97,7 +97,7 @@ def build_default(cls, config_dirs: Optional[List[str]] = None, validate: bool =
return cls.build_from_files(config_files, validate)

@classmethod
def build_from_files(cls, files: List[str], validate: bool = True) -> 'Settings':
def build_from_files(cls, files: List[str], validate: bool = True) -> Settings:
log = getLogger('ocrd_browser.util.config._Settings.build_from_files')
config = ConfigParser()
setattr(config, 'optionxform', lambda option: option)
Expand Down
15 changes: 13 additions & 2 deletions ocrd_browser/util/gtk.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
from __future__ import annotations
from gi.repository import Gio, GLib, Gtk

from typing import Callable, Dict, Optional, Set, Any

try:
from importlib.resources import read_text
except ModuleNotFoundError:
from importlib_resources import read_text # type: ignore


ActionCallback = Optional[Callable[[Gio.SimpleAction, Any], None]]


Expand Down Expand Up @@ -70,14 +77,14 @@ class WhenIdle:
Usage: see WhenIdle.call
"""
_instance: 'WhenIdle' = None
_instance: WhenIdle = None

def __init__(self, runner_callback: Callable): # type: ignore[type-arg]
self._runner_callback = runner_callback
self._callbacks: Set[Callback] = set()

@classmethod
def instance(cls) -> 'WhenIdle':
def instance(cls) -> WhenIdle:
if cls._instance is None:
cls._instance = cls(GLib.idle_add)
return cls._instance
Expand All @@ -102,3 +109,7 @@ def _run(self) -> None:
callback()
self._callbacks.remove(callback)
self._runner_callback(self._run)


def resource_string(resource: str, package: str = 'ocrd_browser.resources') -> str:
return read_text(package, resource)
2 changes: 1 addition & 1 deletion ocrd_browser/view/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from .base import View
from .registry import ViewRegistry
from .html import ViewHtml
from .images import ViewImages
from .text import ViewText
from .xml import ViewXml
from .empty import ViewEmpty
from .diff import ViewDiff
from .page import ViewPage
from .registry import ViewRegistry


__all__ = ['View', 'ViewRegistry', 'ViewImages', 'ViewText', 'ViewXml', 'ViewHtml', 'ViewEmpty', 'ViewDiff', 'ViewPage']
Loading

0 comments on commit c84e55c

Please sign in to comment.