From 8fb3b2d4a6eb09342bccdea003f2e813573411a5 Mon Sep 17 00:00:00 2001
From: Saeed Rasooli <saeed.gnu@gmail.com>
Date: Tue, 31 Dec 2024 08:16:23 +0330
Subject: [PATCH] break up plugins

---
 pyglossary/plugins/aard2_slob/__init__.py     | 393 +----------
 pyglossary/plugins/aard2_slob/reader.py       | 145 ++++
 pyglossary/plugins/aard2_slob/tags.py         |  29 +
 pyglossary/plugins/aard2_slob/writer.py       | 260 +++++++
 pyglossary/plugins/almaany/__init__.py        |  84 +--
 pyglossary/plugins/almaany/reader.py          |  88 +++
 .../plugins/ayandict_sqlite/__init__.py       | 206 +-----
 pyglossary/plugins/ayandict_sqlite/reader.py  |  66 ++
 pyglossary/plugins/ayandict_sqlite/writer.py  | 152 ++++
 pyglossary/plugins/cc_kedict/__init__.py      | 304 +-------
 pyglossary/plugins/cc_kedict/reader.py        | 309 +++++++++
 pyglossary/plugins/crawler_dir/__init__.py    | 163 +----
 pyglossary/plugins/crawler_dir/reader.py      |  88 +++
 pyglossary/plugins/crawler_dir/writer.py      |  93 +++
 pyglossary/plugins/csv_plugin/__init__.py     | 244 +------
 pyglossary/plugins/csv_plugin/reader.py       | 182 +++++
 pyglossary/plugins/csv_plugin/writer.py       | 121 ++++
 pyglossary/plugins/dicformids/__init__.py     | 256 +------
 pyglossary/plugins/dicformids/reader.py       |  76 ++
 pyglossary/plugins/dicformids/writer.py       | 195 ++++++
 pyglossary/plugins/dict_cc/__init__.py        | 200 +-----
 pyglossary/plugins/dict_cc/reader.py          | 205 ++++++
 pyglossary/plugins/dict_cc_split/__init__.py  |  77 +-
 pyglossary/plugins/dict_cc_split/reader.py    |  83 +++
 pyglossary/plugins/dict_org/__init__.py       | 156 +----
 pyglossary/plugins/dict_org/reader.py         |  74 ++
 pyglossary/plugins/dict_org/writer.py         |  98 +++
 .../plugins/dict_org_source/__init__.py       |  40 +-
 pyglossary/plugins/dict_org_source/writer.py  |  42 ++
 pyglossary/plugins/dictunformat/__init__.py   |  90 +--
 pyglossary/plugins/dictunformat/reader.py     |  90 +++
 pyglossary/plugins/digitalnk/__init__.py      |  55 +-
 pyglossary/plugins/digitalnk/reader.py        |  59 ++
 pyglossary/plugins/dikt_json/__init__.py      |  74 +-
 pyglossary/plugins/dikt_json/writer.py        |  80 +++
 pyglossary/plugins/ebook_epub2/__init__.py    | 231 +-----
 pyglossary/plugins/ebook_epub2/writer.py      | 233 +++++++
 pyglossary/plugins/ebook_kobo/__init__.py     | 229 +-----
 pyglossary/plugins/ebook_kobo/writer.py       | 233 +++++++
 .../plugins/ebook_kobo_dictfile/__init__.py   | 185 +----
 .../plugins/ebook_kobo_dictfile/reader.py     | 123 ++++
 .../plugins/ebook_kobo_dictfile/writer.py     |  89 +++
 pyglossary/plugins/ebook_mobi/__init__.py     | 306 +-------
 pyglossary/plugins/ebook_mobi/writer.py       | 308 ++++++++
 pyglossary/plugins/edict2/__init__.py         |  88 +--
 pyglossary/plugins/edict2/reader.py           |  89 +++
 pyglossary/plugins/edlin/__init__.py          | 272 +-------
 pyglossary/plugins/edlin/reader.py            | 131 ++++
 pyglossary/plugins/edlin/writer.py            | 141 ++++
 pyglossary/plugins/gettext_po/__init__.py     | 177 +----
 pyglossary/plugins/gettext_po/reader.py       | 128 ++++
 pyglossary/plugins/gettext_po/writer.py       |  66 ++
 pyglossary/plugins/html_dir/__init__.py       | 490 +------------
 pyglossary/plugins/html_dir/writer.py         | 491 +++++++++++++
 pyglossary/plugins/info_plugin/__init__.py    |  30 +-
 pyglossary/plugins/info_plugin/reader.py      |  36 +
 pyglossary/plugins/jmdict/__init__.py         | 416 +----------
 pyglossary/plugins/jmdict/reader.py           | 417 +++++++++++
 pyglossary/plugins/jmnedict/__init__.py       | 295 +-------
 pyglossary/plugins/jmnedict/reader.py         | 298 ++++++++
 pyglossary/plugins/json_plugin/__init__.py    |  64 +-
 pyglossary/plugins/json_plugin/writer.py      |  68 ++
 pyglossary/plugins/lingoes_ldf/__init__.py    | 134 +---
 pyglossary/plugins/lingoes_ldf/reader.py      |  77 ++
 pyglossary/plugins/lingoes_ldf/writer.py      |  66 ++
 .../plugins/makindo_medical/__init__.py       |  54 +-
 pyglossary/plugins/makindo_medical/reader.py  |  58 ++
 .../plugins/octopus_mdict_new/__init__.py     | 220 +-----
 .../plugins/octopus_mdict_new/reader.py       | 221 ++++++
 pyglossary/plugins/sql/__init__.py            | 138 +---
 pyglossary/plugins/sql/writer.py              | 140 ++++
 .../plugins/stardict_merge_syns/__init__.py   | 133 +---
 .../plugins/stardict_merge_syns/writer.py     | 137 ++++
 .../plugins/stardict_textual/__init__.py      | 359 +---------
 pyglossary/plugins/stardict_textual/reader.py | 212 ++++++
 pyglossary/plugins/stardict_textual/writer.py | 162 +++++
 pyglossary/plugins/tabfile/__init__.py        | 119 +---
 pyglossary/plugins/tabfile/reader.py          |  49 ++
 pyglossary/plugins/tabfile/writer.py          |  59 ++
 pyglossary/plugins/testformat/__init__.py     |  94 +--
 pyglossary/plugins/testformat/reader.py       |  57 ++
 pyglossary/plugins/testformat/writer.py       |  43 ++
 pyglossary/plugins/wiktextract/__init__.py    | 655 +----------------
 pyglossary/plugins/wiktextract/reader.py      | 656 ++++++++++++++++++
 pyglossary/plugins/wordnet/__init__.py        | 324 +--------
 pyglossary/plugins/wordnet/reader.py          | 330 +++++++++
 pyglossary/plugins/wordset/__init__.py        |  94 +--
 pyglossary/plugins/wordset/reader.py          |  97 +++
 pyglossary/plugins/xdxf/__init__.py           | 253 +------
 pyglossary/plugins/xdxf/reader.py             | 252 +++++++
 pyglossary/plugins/xdxf_css/__init__.py       | 282 +-------
 pyglossary/plugins/xdxf_css/reader.py         | 284 ++++++++
 pyglossary/plugins/xdxf_lax/__init__.py       | 246 +------
 pyglossary/plugins/xdxf_lax/reader.py         | 246 +++++++
 pyglossary/plugins/yomichan/__init__.py       | 247 +------
 pyglossary/plugins/yomichan/writer.py         | 249 +++++++
 pyglossary/plugins/zimfile/__init__.py        | 184 +----
 pyglossary/plugins/zimfile/reader.py          | 184 +++++
 tests/deprecated/glossary_security_test.py    |   1 +
 99 files changed, 9068 insertions(+), 8559 deletions(-)
 create mode 100644 pyglossary/plugins/aard2_slob/reader.py
 create mode 100644 pyglossary/plugins/aard2_slob/tags.py
 create mode 100644 pyglossary/plugins/aard2_slob/writer.py
 create mode 100644 pyglossary/plugins/almaany/reader.py
 create mode 100644 pyglossary/plugins/ayandict_sqlite/reader.py
 create mode 100644 pyglossary/plugins/ayandict_sqlite/writer.py
 create mode 100644 pyglossary/plugins/cc_kedict/reader.py
 create mode 100644 pyglossary/plugins/crawler_dir/reader.py
 create mode 100644 pyglossary/plugins/crawler_dir/writer.py
 create mode 100644 pyglossary/plugins/csv_plugin/reader.py
 create mode 100644 pyglossary/plugins/csv_plugin/writer.py
 create mode 100644 pyglossary/plugins/dicformids/reader.py
 create mode 100644 pyglossary/plugins/dicformids/writer.py
 create mode 100644 pyglossary/plugins/dict_cc/reader.py
 create mode 100644 pyglossary/plugins/dict_cc_split/reader.py
 create mode 100644 pyglossary/plugins/dict_org/reader.py
 create mode 100644 pyglossary/plugins/dict_org/writer.py
 create mode 100644 pyglossary/plugins/dict_org_source/writer.py
 create mode 100644 pyglossary/plugins/dictunformat/reader.py
 create mode 100644 pyglossary/plugins/digitalnk/reader.py
 create mode 100644 pyglossary/plugins/dikt_json/writer.py
 create mode 100644 pyglossary/plugins/ebook_epub2/writer.py
 create mode 100644 pyglossary/plugins/ebook_kobo/writer.py
 create mode 100644 pyglossary/plugins/ebook_kobo_dictfile/reader.py
 create mode 100644 pyglossary/plugins/ebook_kobo_dictfile/writer.py
 create mode 100644 pyglossary/plugins/ebook_mobi/writer.py
 create mode 100644 pyglossary/plugins/edict2/reader.py
 create mode 100644 pyglossary/plugins/edlin/reader.py
 create mode 100644 pyglossary/plugins/edlin/writer.py
 create mode 100644 pyglossary/plugins/gettext_po/reader.py
 create mode 100644 pyglossary/plugins/gettext_po/writer.py
 create mode 100644 pyglossary/plugins/html_dir/writer.py
 create mode 100644 pyglossary/plugins/info_plugin/reader.py
 create mode 100644 pyglossary/plugins/jmdict/reader.py
 create mode 100644 pyglossary/plugins/jmnedict/reader.py
 create mode 100644 pyglossary/plugins/json_plugin/writer.py
 create mode 100644 pyglossary/plugins/lingoes_ldf/reader.py
 create mode 100644 pyglossary/plugins/lingoes_ldf/writer.py
 create mode 100644 pyglossary/plugins/makindo_medical/reader.py
 create mode 100644 pyglossary/plugins/octopus_mdict_new/reader.py
 create mode 100644 pyglossary/plugins/sql/writer.py
 create mode 100644 pyglossary/plugins/stardict_merge_syns/writer.py
 create mode 100644 pyglossary/plugins/stardict_textual/reader.py
 create mode 100644 pyglossary/plugins/stardict_textual/writer.py
 create mode 100644 pyglossary/plugins/tabfile/reader.py
 create mode 100644 pyglossary/plugins/tabfile/writer.py
 create mode 100644 pyglossary/plugins/testformat/reader.py
 create mode 100644 pyglossary/plugins/testformat/writer.py
 create mode 100644 pyglossary/plugins/wiktextract/reader.py
 create mode 100644 pyglossary/plugins/wordnet/reader.py
 create mode 100644 pyglossary/plugins/wordset/reader.py
 create mode 100644 pyglossary/plugins/xdxf/reader.py
 create mode 100644 pyglossary/plugins/xdxf_css/reader.py
 create mode 100644 pyglossary/plugins/xdxf_lax/reader.py
 create mode 100644 pyglossary/plugins/yomichan/writer.py
 create mode 100644 pyglossary/plugins/zimfile/reader.py

diff --git a/pyglossary/plugins/aard2_slob/__init__.py b/pyglossary/plugins/aard2_slob/__init__.py
index 8d75434ff..6e63ead7a 100644
--- a/pyglossary/plugins/aard2_slob/__init__.py
+++ b/pyglossary/plugins/aard2_slob/__init__.py
@@ -1,19 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import os
-import re
-import shutil
-from os.path import isfile, splitext
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-	from collections.abc import Generator, Iterator
-
-	from pyglossary import slob
-	from pyglossary.glossary_types import EntryType, GlossaryType
-
-from pyglossary.core import cacheDir, exc_note, log, pip
 from pyglossary.option import (
 	BoolOption,
 	FileSizeOption,
@@ -22,6 +9,9 @@
 	StrOption,
 )
 
+from .reader import Reader
+from .writer import Writer
+
 __all__ = [
 	"Reader",
 	"Writer",
@@ -92,380 +82,3 @@
 		" instructions on how to install PyICU.",
 	),
 ]
-
-t_created_at = "created.at"
-t_label = "label"
-t_created_by = "created.by"
-t_copyright = "copyright"
-t_license_name = "license.name"
-t_license_url = "license.url"
-t_uri = "uri"
-t_edition = "edition"
-
-supported_tags = {
-	t_label,
-	t_created_at,
-	t_created_by,
-	t_copyright,
-	t_uri,
-	t_edition,
-}
-
-
-class Reader:
-	depends = {
-		"icu": "PyICU",  # >=1.5
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-		self._re_bword = re.compile(
-			"(<a href=[^<>]+?>)",
-			re.IGNORECASE,
-		)
-
-	def close(self) -> None:
-		if self._slobObj is not None:
-			self._slobObj.close()
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._slobObj: slob.Slob | None = None
-
-	# TODO: PLR0912 Too many branches (13 > 12)
-	def open(self, filename: str) -> None:  # noqa: PLR0912
-		try:
-			import icu  # type: ignore # noqa: F401
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install PyICU` to install")
-			raise
-		from pyglossary import slob
-
-		self._filename = filename
-		self._slobObj = slob.open(filename)
-		tags = dict(self._slobObj.tags.items())
-
-		if t_label in tags:
-			self._glos.setInfo("name", tags[t_label])
-
-		if t_created_at in tags:
-			self._glos.setInfo("creationTime", tags[t_created_at])
-
-		if t_created_by in tags:
-			self._glos.setInfo("author", tags[t_created_by])
-
-		copyrightLines: list[str] = []
-		for key in (t_copyright, t_license_name, t_license_url):
-			try:
-				value = tags.pop(key)
-			except KeyError:
-				continue
-			copyrightLines.append(value)
-		if copyrightLines:
-			self._glos.setInfo("copyright", "\n".join(copyrightLines))
-
-		if t_uri in tags:
-			self._glos.setInfo("website", tags[t_uri])
-
-		if t_edition in tags:
-			self._glos.setInfo("edition", tags[t_edition])
-
-		for key, value in tags.items():
-			if key in supported_tags:
-				continue
-			self._glos.setInfo(f"slob.{key}", value)
-
-	def __len__(self) -> int:
-		if self._slobObj is None:
-			log.error("called len() on a reader which is not open")
-			return 0
-		return len(self._slobObj)
-
-	@staticmethod
-	def _href_sub(m: re.Match) -> str:
-		st = m.group(0)
-		if "//" in st:
-			return st
-		return st.replace('href="', 'href="bword://').replace(
-			"href='",
-			"href='bword://",
-		)
-
-	def __iter__(self) -> Iterator[EntryType | None]:
-		from pyglossary.slob import MIME_HTML, MIME_TEXT
-
-		if self._slobObj is None:
-			raise RuntimeError("iterating over a reader while it's not open")
-
-		slobObj = self._slobObj
-		blobSet = set()
-
-		# slob library gives duplicate blobs when iterating over slobObj
-		# even keeping the last id is not enough, since duplicate blobs
-		# are not all consecutive. so we have to keep a set of blob IDs
-
-		for blob in slobObj:
-			id_ = blob.identity
-			if id_ in blobSet:
-				yield None  # update progressbar
-				continue
-			blobSet.add(id_)
-
-			# blob.key is str, blob.content is bytes
-			word = blob.key
-
-			ctype = blob.content_type.split(";")[0]
-			if ctype not in {MIME_HTML, MIME_TEXT}:
-				log.debug(f"unknown {blob.content_type=} in {word=}")
-				word = word.removeprefix("~/")
-				yield self._glos.newDataEntry(word, blob.content)
-				continue
-			defiFormat = ""
-			if ctype == MIME_HTML:
-				defiFormat = "h"
-			elif ctype == MIME_TEXT:
-				defiFormat = "m"
-
-			defi = blob.content.decode("utf-8")
-			defi = self._re_bword.sub(self._href_sub, defi)
-			yield self._glos.newEntry(word, defi, defiFormat=defiFormat)
-
-
-class Writer:
-	depends = {
-		"icu": "PyICU",
-	}
-
-	_compression: str = "zlib"
-	_content_type: str = ""
-	_file_size_approx: int = 0
-	_file_size_approx_check_num_entries = 100
-	_separate_alternates: bool = False
-	_word_title: bool = False
-	_version_info: bool = False
-
-	_audio_goldendict: bool = False
-
-	resourceMimeTypes = {
-		"png": "image/png",
-		"jpeg": "image/jpeg",
-		"jpg": "image/jpeg",
-		"gif": "image/gif",
-		"svg": "image/svg+xml",
-		"webp": "image/webp",
-		"tiff": "image/tiff",
-		"tif": "image/tiff",
-		"bmp": "image/bmp",
-		"css": "text/css",
-		"js": "application/javascript",
-		"json": "application/json",
-		"woff": "application/font-woff",
-		"woff2": "application/font-woff2",
-		"ttf": "application/x-font-ttf",
-		"otf": "application/x-font-opentype",
-		"mp3": "audio/mpeg",
-		"ogg": "audio/ogg",
-		"spx": "audio/x-speex",
-		"wav": "audio/wav",
-		"ini": "text/plain",
-		# "application/octet-stream+xapian",
-		"eot": "application/vnd.ms-fontobject",
-		"pdf": "application/pdf",
-		"mp4": "video/mp4",
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._resPrefix = ""
-		self._slobWriter: slob.Writer | None = None
-
-	@staticmethod
-	def _slobObserver(
-		event: slob.WriterEvent,  # noqa: F401, F821
-	) -> None:
-		log.debug(f"slob: {event.name}{': ' + event.data if event.data else ''}")
-
-	def _open(self, filepath: str, namePostfix: str) -> slob.Writer:
-		from pyglossary import slob
-
-		if isfile(filepath):
-			shutil.move(filepath, f"{filepath}.bak")
-			log.warning(f"renamed existing {filepath!r} to {filepath + '.bak'!r}")
-		self._slobWriter = slobWriter = slob.Writer(
-			filepath,
-			observer=self._slobObserver,
-			workdir=cacheDir,
-			compression=self._compression,
-			version_info=self._version_info,
-		)
-
-		# "label" tag is a dictionary name shown in UI
-		slobWriter.tag(t_label, self._glos.getInfo("name") + namePostfix)
-
-		createdAt = self._glos.getInfo("creationTime")
-		if createdAt is not None:
-			slobWriter.tag(t_created_at, createdAt)
-		createdBy = self._glos.getInfo("author")
-		if createdBy is not None:
-			slobWriter.tag(t_created_by, createdBy)
-
-		filename = os.path.basename(filepath)
-		dic_uri = re.sub(r"[^A-Za-z0-9_-]+", "_", filename)
-		# "uri" tag is not web url, it's a part of gloss addressing ID: uri + article ID
-		# setting the tag allows bookmark & history migration, if dict file is updated
-		# we use source filename as "uri", since it is stable (most likely)
-		slobWriter.tag(t_uri, dic_uri)
-
-		return slobWriter
-
-	def open(self, filename: str) -> None:
-		try:
-			import icu  # noqa: F401
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install PyICU` to install")
-			raise
-		if isfile(filename):
-			raise OSError(f"File '{filename}' already exists")
-		namePostfix = ""
-		if self._file_size_approx > 0:
-			namePostfix = " (part 1)"
-		self._open(filename, namePostfix)
-		self._filename = filename
-
-	def finish(self) -> None:
-		from time import perf_counter
-
-		self._filename = ""
-		if self._slobWriter is None:
-			return
-		log.info("Finalizing slob file...")
-		t0 = perf_counter()
-		self._slobWriter.finalize()
-		log.info(f"Finalizing slob file took {perf_counter() - t0:.1f} seconds")
-		self._slobWriter = None
-
-	def addDataEntry(self, entry: EntryType) -> None:
-		slobWriter = self._slobWriter
-		if slobWriter is None:
-			raise ValueError("slobWriter is None")
-		rel_path = entry.s_word
-		_, ext = splitext(rel_path)
-		ext = ext.lstrip(os.path.extsep).lower()
-		content_type = self.resourceMimeTypes.get(ext)
-		if not content_type:
-			log.error(f"Aard2 slob: unknown content type for {rel_path!r}")
-			return
-		content = entry.data
-		key = self._resPrefix + rel_path
-		try:
-			key.encode(slobWriter.encoding)
-		except UnicodeEncodeError:
-			log.error(f"Failed to add, broken unicode in key: {key!a}")
-			return
-		slobWriter.add(content, key, content_type=content_type)
-
-	def addEntry(self, entry: EntryType) -> None:
-		words = entry.l_word
-		b_defi = entry.defi.encode("utf-8")
-		ctype = self._content_type
-		writer = self._slobWriter
-		if writer is None:
-			raise ValueError("slobWriter is None")
-
-		entry.detectDefiFormat()
-		defiFormat = entry.defiFormat
-
-		if self._word_title and defiFormat in {"h", "m"}:
-			if defiFormat == "m":
-				defiFormat = "h"
-			title = self._glos.wordTitleStr(
-				words[0],
-			)
-			b_defi = title.encode("utf-8") + b_defi
-
-		if defiFormat == "h":
-			b_defi = b_defi.replace(b'"bword://', b'"')
-			b_defi = b_defi.replace(b"'bword://", b"'")
-
-			if not self._audio_goldendict:
-				b_defi = b_defi.replace(
-					b"""href="sound://""",
-					b'''onclick="new Audio(this.href).play(); return false;" href="''',
-				)
-				b_defi = b_defi.replace(
-					b"""href='sound://""",
-					b"""onclick="new Audio(this.href).play(); return false;" href='""",
-				)
-				b_defi = b_defi.replace(b"""<img src="/""", b'''<img src="''')
-				b_defi = b_defi.replace(b"""<img src='""", b"""<img src='""")
-				b_defi = b_defi.replace(b"""<img src="file:///""", b'''<img src="''')
-				b_defi = b_defi.replace(b"""<img src='file:///""", b"""<img src='""")
-
-		if not ctype:
-			if defiFormat == "h":
-				ctype = "text/html; charset=utf-8"
-			elif defiFormat == "m":
-				ctype = "text/plain; charset=utf-8"
-			else:
-				ctype = "text/plain; charset=utf-8"
-
-		if not self._separate_alternates:
-			writer.add(
-				b_defi,
-				*tuple(words),
-				content_type=ctype,
-			)
-			return
-
-		headword, *alts = words
-		writer.add(
-			b_defi,
-			headword,
-			content_type=ctype,
-		)
-		for alt in alts:
-			writer.add(
-				b_defi,
-				f"{alt}, {headword}",
-				content_type=ctype,
-			)
-
-	def write(self) -> Generator[None, EntryType, None]:
-		slobWriter = self._slobWriter
-		if slobWriter is None:
-			raise ValueError("slobWriter is None")
-		file_size_approx = int(self._file_size_approx * 0.95)
-		entryCount = 0
-		sumBlobSize = 0
-		fileIndex = 0
-		filenameNoExt, _ = splitext(self._filename)
-		while True:
-			entry = yield
-			if entry is None:
-				break
-
-			if entry.isData():
-				self.addDataEntry(entry)
-			else:
-				self.addEntry(entry)
-
-			if file_size_approx <= 0:
-				continue
-
-			# handle file_size_approx
-			check_every = self._file_size_approx_check_num_entries
-			entryCount += 1
-			if entryCount % check_every == 0:
-				sumBlobSize = slobWriter.size_data()
-				if sumBlobSize >= file_size_approx:
-					slobWriter.finalize()
-					fileIndex += 1
-					slobWriter = self._open(
-						f"{filenameNoExt}.{fileIndex}.slob",
-						f" (part {fileIndex + 1})",
-					)
-					sumBlobSize = 0
-					entryCount = 0
diff --git a/pyglossary/plugins/aard2_slob/reader.py b/pyglossary/plugins/aard2_slob/reader.py
new file mode 100644
index 000000000..c80fdffb8
--- /dev/null
+++ b/pyglossary/plugins/aard2_slob/reader.py
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	from collections.abc import Iterator
+
+	from pyglossary import slob
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+from pyglossary.core import exc_note, log, pip
+from pyglossary.plugins.aard2_slob.tags import (
+	supported_tags,
+	t_copyright,
+	t_created_at,
+	t_created_by,
+	t_edition,
+	t_label,
+	t_license_name,
+	t_license_url,
+	t_uri,
+)
+
+
+class Reader:
+	depends = {
+		"icu": "PyICU",  # >=1.5
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+		self._re_bword = re.compile(
+			"(<a href=[^<>]+?>)",
+			re.IGNORECASE,
+		)
+
+	def close(self) -> None:
+		if self._slobObj is not None:
+			self._slobObj.close()
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._slobObj: slob.Slob | None = None
+
+	# TODO: PLR0912 Too many branches (13 > 12)
+	def open(self, filename: str) -> None:  # noqa: PLR0912
+		try:
+			import icu  # type: ignore # noqa: F401
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install PyICU` to install")
+			raise
+		from pyglossary import slob
+
+		self._filename = filename
+		self._slobObj = slob.open(filename)
+		tags = dict(self._slobObj.tags.items())
+
+		if t_label in tags:
+			self._glos.setInfo("name", tags[t_label])
+
+		if t_created_at in tags:
+			self._glos.setInfo("creationTime", tags[t_created_at])
+
+		if t_created_by in tags:
+			self._glos.setInfo("author", tags[t_created_by])
+
+		copyrightLines: list[str] = []
+		for key in (t_copyright, t_license_name, t_license_url):
+			try:
+				value = tags.pop(key)
+			except KeyError:
+				continue
+			copyrightLines.append(value)
+		if copyrightLines:
+			self._glos.setInfo("copyright", "\n".join(copyrightLines))
+
+		if t_uri in tags:
+			self._glos.setInfo("website", tags[t_uri])
+
+		if t_edition in tags:
+			self._glos.setInfo("edition", tags[t_edition])
+
+		for key, value in tags.items():
+			if key in supported_tags:
+				continue
+			self._glos.setInfo(f"slob.{key}", value)
+
+	def __len__(self) -> int:
+		if self._slobObj is None:
+			log.error("called len() on a reader which is not open")
+			return 0
+		return len(self._slobObj)
+
+	@staticmethod
+	def _href_sub(m: re.Match) -> str:
+		st = m.group(0)
+		if "//" in st:
+			return st
+		return st.replace('href="', 'href="bword://').replace(
+			"href='",
+			"href='bword://",
+		)
+
+	def __iter__(self) -> Iterator[EntryType | None]:
+		from pyglossary.slob import MIME_HTML, MIME_TEXT
+
+		if self._slobObj is None:
+			raise RuntimeError("iterating over a reader while it's not open")
+
+		slobObj = self._slobObj
+		blobSet = set()
+
+		# slob library gives duplicate blobs when iterating over slobObj
+		# even keeping the last id is not enough, since duplicate blobs
+		# are not all consecutive. so we have to keep a set of blob IDs
+
+		for blob in slobObj:
+			id_ = blob.identity
+			if id_ in blobSet:
+				yield None  # update progressbar
+				continue
+			blobSet.add(id_)
+
+			# blob.key is str, blob.content is bytes
+			word = blob.key
+
+			ctype = blob.content_type.split(";")[0]
+			if ctype not in {MIME_HTML, MIME_TEXT}:
+				log.debug(f"unknown {blob.content_type=} in {word=}")
+				word = word.removeprefix("~/")
+				yield self._glos.newDataEntry(word, blob.content)
+				continue
+			defiFormat = ""
+			if ctype == MIME_HTML:
+				defiFormat = "h"
+			elif ctype == MIME_TEXT:
+				defiFormat = "m"
+
+			defi = blob.content.decode("utf-8")
+			defi = self._re_bword.sub(self._href_sub, defi)
+			yield self._glos.newEntry(word, defi, defiFormat=defiFormat)
diff --git a/pyglossary/plugins/aard2_slob/tags.py b/pyglossary/plugins/aard2_slob/tags.py
new file mode 100644
index 000000000..e4336a02e
--- /dev/null
+++ b/pyglossary/plugins/aard2_slob/tags.py
@@ -0,0 +1,29 @@
+t_created_at = "created.at"
+t_label = "label"
+t_created_by = "created.by"
+t_copyright = "copyright"
+t_license_name = "license.name"
+t_license_url = "license.url"
+t_uri = "uri"
+t_edition = "edition"
+
+supported_tags = {
+	t_label,
+	t_created_at,
+	t_created_by,
+	t_copyright,
+	t_uri,
+	t_edition,
+}
+
+__all__ = [
+	"supported_tags",
+	"t_copyright",
+	"t_created_at",
+	"t_created_by",
+	"t_edition",
+	"t_label",
+	"t_license_name",
+	"t_license_url",
+	"t_uri",
+]
diff --git a/pyglossary/plugins/aard2_slob/writer.py b/pyglossary/plugins/aard2_slob/writer.py
new file mode 100644
index 000000000..c8519f987
--- /dev/null
+++ b/pyglossary/plugins/aard2_slob/writer.py
@@ -0,0 +1,260 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import os
+import re
+import shutil
+from os.path import isfile, splitext
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary import slob
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+from pyglossary.core import cacheDir, exc_note, log, pip
+from pyglossary.plugins.aard2_slob.tags import (
+	t_created_at,
+	t_created_by,
+	t_label,
+	t_uri,
+)
+
+
+class Writer:
+	depends = {
+		"icu": "PyICU",
+	}
+
+	_compression: str = "zlib"
+	_content_type: str = ""
+	_file_size_approx: int = 0
+	_file_size_approx_check_num_entries = 100
+	_separate_alternates: bool = False
+	_word_title: bool = False
+	_version_info: bool = False
+
+	_audio_goldendict: bool = False
+
+	resourceMimeTypes = {
+		"png": "image/png",
+		"jpeg": "image/jpeg",
+		"jpg": "image/jpeg",
+		"gif": "image/gif",
+		"svg": "image/svg+xml",
+		"webp": "image/webp",
+		"tiff": "image/tiff",
+		"tif": "image/tiff",
+		"bmp": "image/bmp",
+		"css": "text/css",
+		"js": "application/javascript",
+		"json": "application/json",
+		"woff": "application/font-woff",
+		"woff2": "application/font-woff2",
+		"ttf": "application/x-font-ttf",
+		"otf": "application/x-font-opentype",
+		"mp3": "audio/mpeg",
+		"ogg": "audio/ogg",
+		"spx": "audio/x-speex",
+		"wav": "audio/wav",
+		"ini": "text/plain",
+		# "application/octet-stream+xapian",
+		"eot": "application/vnd.ms-fontobject",
+		"pdf": "application/pdf",
+		"mp4": "video/mp4",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._resPrefix = ""
+		self._slobWriter: slob.Writer | None = None
+
+	@staticmethod
+	def _slobObserver(
+		event: slob.WriterEvent,  # noqa: F401, F821
+	) -> None:
+		log.debug(f"slob: {event.name}{': ' + event.data if event.data else ''}")
+
+	def _open(self, filepath: str, namePostfix: str) -> slob.Writer:
+		from pyglossary import slob
+
+		if isfile(filepath):
+			shutil.move(filepath, f"{filepath}.bak")
+			log.warning(f"renamed existing {filepath!r} to {filepath + '.bak'!r}")
+		self._slobWriter = slobWriter = slob.Writer(
+			filepath,
+			observer=self._slobObserver,
+			workdir=cacheDir,
+			compression=self._compression,
+			version_info=self._version_info,
+		)
+
+		# "label" tag is a dictionary name shown in UI
+		slobWriter.tag(t_label, self._glos.getInfo("name") + namePostfix)
+
+		createdAt = self._glos.getInfo("creationTime")
+		if createdAt is not None:
+			slobWriter.tag(t_created_at, createdAt)
+		createdBy = self._glos.getInfo("author")
+		if createdBy is not None:
+			slobWriter.tag(t_created_by, createdBy)
+
+		filename = os.path.basename(filepath)
+		dic_uri = re.sub(r"[^A-Za-z0-9_-]+", "_", filename)
+		# "uri" tag is not web url, it's a part of gloss addressing ID: uri + article ID
+		# setting the tag allows bookmark & history migration, if dict file is updated
+		# we use source filename as "uri", since it is stable (most likely)
+		slobWriter.tag(t_uri, dic_uri)
+
+		return slobWriter
+
+	def open(self, filename: str) -> None:
+		try:
+			import icu  # noqa: F401
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install PyICU` to install")
+			raise
+		if isfile(filename):
+			raise OSError(f"File '{filename}' already exists")
+		namePostfix = ""
+		if self._file_size_approx > 0:
+			namePostfix = " (part 1)"
+		self._open(filename, namePostfix)
+		self._filename = filename
+
+	def finish(self) -> None:
+		from time import perf_counter
+
+		self._filename = ""
+		if self._slobWriter is None:
+			return
+		log.info("Finalizing slob file...")
+		t0 = perf_counter()
+		self._slobWriter.finalize()
+		log.info(f"Finalizing slob file took {perf_counter() - t0:.1f} seconds")
+		self._slobWriter = None
+
+	def addDataEntry(self, entry: EntryType) -> None:
+		slobWriter = self._slobWriter
+		if slobWriter is None:
+			raise ValueError("slobWriter is None")
+		rel_path = entry.s_word
+		_, ext = splitext(rel_path)
+		ext = ext.lstrip(os.path.extsep).lower()
+		content_type = self.resourceMimeTypes.get(ext)
+		if not content_type:
+			log.error(f"Aard2 slob: unknown content type for {rel_path!r}")
+			return
+		content = entry.data
+		key = self._resPrefix + rel_path
+		try:
+			key.encode(slobWriter.encoding)
+		except UnicodeEncodeError:
+			log.error(f"Failed to add, broken unicode in key: {key!a}")
+			return
+		slobWriter.add(content, key, content_type=content_type)
+
+	def addEntry(self, entry: EntryType) -> None:
+		words = entry.l_word
+		b_defi = entry.defi.encode("utf-8")
+		ctype = self._content_type
+		writer = self._slobWriter
+		if writer is None:
+			raise ValueError("slobWriter is None")
+
+		entry.detectDefiFormat()
+		defiFormat = entry.defiFormat
+
+		if self._word_title and defiFormat in {"h", "m"}:
+			if defiFormat == "m":
+				defiFormat = "h"
+			title = self._glos.wordTitleStr(
+				words[0],
+			)
+			b_defi = title.encode("utf-8") + b_defi
+
+		if defiFormat == "h":
+			b_defi = b_defi.replace(b'"bword://', b'"')
+			b_defi = b_defi.replace(b"'bword://", b"'")
+
+			if not self._audio_goldendict:
+				b_defi = b_defi.replace(
+					b"""href="sound://""",
+					b'''onclick="new Audio(this.href).play(); return false;" href="''',
+				)
+				b_defi = b_defi.replace(
+					b"""href='sound://""",
+					b"""onclick="new Audio(this.href).play(); return false;" href='""",
+				)
+				b_defi = b_defi.replace(b"""<img src="/""", b'''<img src="''')
+				b_defi = b_defi.replace(b"""<img src='""", b"""<img src='""")
+				b_defi = b_defi.replace(b"""<img src="file:///""", b'''<img src="''')
+				b_defi = b_defi.replace(b"""<img src='file:///""", b"""<img src='""")
+
+		if not ctype:
+			if defiFormat == "h":
+				ctype = "text/html; charset=utf-8"
+			elif defiFormat == "m":
+				ctype = "text/plain; charset=utf-8"
+			else:
+				ctype = "text/plain; charset=utf-8"
+
+		if not self._separate_alternates:
+			writer.add(
+				b_defi,
+				*tuple(words),
+				content_type=ctype,
+			)
+			return
+
+		headword, *alts = words
+		writer.add(
+			b_defi,
+			headword,
+			content_type=ctype,
+		)
+		for alt in alts:
+			writer.add(
+				b_defi,
+				f"{alt}, {headword}",
+				content_type=ctype,
+			)
+
+	def write(self) -> Generator[None, EntryType, None]:
+		slobWriter = self._slobWriter
+		if slobWriter is None:
+			raise ValueError("slobWriter is None")
+		file_size_approx = int(self._file_size_approx * 0.95)
+		entryCount = 0
+		sumBlobSize = 0
+		fileIndex = 0
+		filenameNoExt, _ = splitext(self._filename)
+		while True:
+			entry = yield
+			if entry is None:
+				break
+
+			if entry.isData():
+				self.addDataEntry(entry)
+			else:
+				self.addEntry(entry)
+
+			if file_size_approx <= 0:
+				continue
+
+			# handle file_size_approx
+			check_every = self._file_size_approx_check_num_entries
+			entryCount += 1
+			if entryCount % check_every == 0:
+				sumBlobSize = slobWriter.size_data()
+				if sumBlobSize >= file_size_approx:
+					slobWriter.finalize()
+					fileIndex += 1
+					slobWriter = self._open(
+						f"{filenameNoExt}.{fileIndex}.slob",
+						f" (part {fileIndex + 1})",
+					)
+					sumBlobSize = 0
+					entryCount = 0
diff --git a/pyglossary/plugins/almaany/__init__.py b/pyglossary/plugins/almaany/__init__.py
index 9a49bb167..8838cfd62 100644
--- a/pyglossary/plugins/almaany/__init__.py
+++ b/pyglossary/plugins/almaany/__init__.py
@@ -1,16 +1,13 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import html
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-	import sqlite3
-	from collections.abc import Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
 	from pyglossary.option import Option
 
+from .reader import Reader
+
 __all__ = [
 	"Reader",
 	"description",
@@ -40,80 +37,3 @@
 	"Almaany.com Arabic Dictionary - Google Play",
 )
 optionsProp: dict[str, Option] = {}
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._con: sqlite3.Connection | None = None
-		self._cur: sqlite3.Cursor | None = None
-
-	def open(self, filename: str) -> None:
-		from sqlite3 import connect
-
-		self._filename = filename
-		self._con = connect(filename)
-		self._cur = self._con.cursor()
-		self._glos.setDefaultDefiFormat("h")
-
-	def __len__(self) -> int:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute("select count(*) from WordsTable")
-		return self._cur.fetchone()[0]
-
-	def __iter__(self) -> Iterator[EntryType]:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		from pyglossary.langs.writing_system import getWritingSystemFromText
-
-		alternateDict: dict[str, list[str]] = {}
-		self._cur.execute("select wordkey, searchwordkey from Keys")
-		for row in self._cur.fetchall():
-			if row[0] in alternateDict:
-				alternateDict[row[0]].append(row[1])
-			else:
-				alternateDict[row[0]] = [row[1]]
-
-		self._cur.execute(
-			"select word, searchword, root, meaning from WordsTable order by id",
-		)
-		# FIXME: iteration over self._cur stops after one entry
-		# and self._cur.fetchone() returns None
-		# for row in self._cur:
-		for row in self._cur.fetchall():
-			word = row[0]
-			searchword = row[1]
-			root = row[2]
-			meaning = row[3]
-			definition = meaning
-			definition = definition.replace("|", "<br>")
-
-			if root:
-				definition += (
-					f'<br>Root: <a href="bword://{html.escape(root)}">{root}</a>'
-				)
-
-			ws = getWritingSystemFromText(meaning)
-			if ws and ws.direction == "rtl":
-				definition = f'<div dir="rtl">{definition}</div>'
-
-			words = [word, searchword]
-			if word in alternateDict:
-				words += alternateDict[word]
-			yield self._glos.newEntry(
-				words,
-				definition,
-				defiFormat="h",
-			)
-
-	def close(self) -> None:
-		if self._cur:
-			self._cur.close()
-		if self._con:
-			self._con.close()
-		self._clear()
diff --git a/pyglossary/plugins/almaany/reader.py b/pyglossary/plugins/almaany/reader.py
new file mode 100644
index 000000000..3447c1010
--- /dev/null
+++ b/pyglossary/plugins/almaany/reader.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import html
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	import sqlite3
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._con: sqlite3.Connection | None = None
+		self._cur: sqlite3.Cursor | None = None
+
+	def open(self, filename: str) -> None:
+		from sqlite3 import connect
+
+		self._filename = filename
+		self._con = connect(filename)
+		self._cur = self._con.cursor()
+		self._glos.setDefaultDefiFormat("h")
+
+	def __len__(self) -> int:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute("select count(*) from WordsTable")
+		return self._cur.fetchone()[0]
+
+	def __iter__(self) -> Iterator[EntryType]:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		from pyglossary.langs.writing_system import getWritingSystemFromText
+
+		alternateDict: dict[str, list[str]] = {}
+		self._cur.execute("select wordkey, searchwordkey from Keys")
+		for row in self._cur.fetchall():
+			if row[0] in alternateDict:
+				alternateDict[row[0]].append(row[1])
+			else:
+				alternateDict[row[0]] = [row[1]]
+
+		self._cur.execute(
+			"select word, searchword, root, meaning from WordsTable order by id",
+		)
+		# FIXME: iteration over self._cur stops after one entry
+		# and self._cur.fetchone() returns None
+		# for row in self._cur:
+		for row in self._cur.fetchall():
+			word = row[0]
+			searchword = row[1]
+			root = row[2]
+			meaning = row[3]
+			definition = meaning
+			definition = definition.replace("|", "<br>")
+
+			if root:
+				definition += (
+					f'<br>Root: <a href="bword://{html.escape(root)}">{root}</a>'
+				)
+
+			ws = getWritingSystemFromText(meaning)
+			if ws and ws.direction == "rtl":
+				definition = f'<div dir="rtl">{definition}</div>'
+
+			words = [word, searchword]
+			if word in alternateDict:
+				words += alternateDict[word]
+			yield self._glos.newEntry(
+				words,
+				definition,
+				defiFormat="h",
+			)
+
+	def close(self) -> None:
+		if self._cur:
+			self._cur.close()
+		if self._con:
+			self._con.close()
+		self._clear()
diff --git a/pyglossary/plugins/ayandict_sqlite/__init__.py b/pyglossary/plugins/ayandict_sqlite/__init__.py
index 5ac40b37b..a86e83029 100644
--- a/pyglossary/plugins/ayandict_sqlite/__init__.py
+++ b/pyglossary/plugins/ayandict_sqlite/__init__.py
@@ -1,20 +1,11 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-from typing import (
-	TYPE_CHECKING,
-)
-
-if TYPE_CHECKING:
-	import sqlite3
-	from collections.abc import Generator, Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
-	from pyglossary.xdxf.transform import XdxfTransformer
-
-from pyglossary.core import log
 from pyglossary.option import BoolOption, Option
 
+from .reader import Reader
+from .writer import Writer
+
 __all__ = [
 	"Reader",
 	"Writer",
@@ -49,194 +40,3 @@
 		comment="Create fuzzy search data",
 	),
 }
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._con: sqlite3.Connection | None = None
-		self._cur: sqlite3.Cursor | None = None
-
-	def open(self, filename: str) -> None:
-		from sqlite3 import connect
-
-		self._filename = filename
-		self._con = connect(filename)
-		self._cur = self._con.cursor()
-		self._glos.setDefaultDefiFormat("h")
-
-		self._cur.execute("SELECT key, value FROM meta;")
-		for row in self._cur.fetchall():
-			if row[0] == "hash":
-				continue
-			self._glos.setInfo(row[0], row[1])
-
-	def __len__(self) -> int:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute("select count(id) from entry")
-		return self._cur.fetchone()[0]
-
-	def __iter__(self) -> Iterator[EntryType]:
-		from json import loads
-
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute(
-			"SELECT entry.term, entry.article, "
-			"json_group_array(alt.term)"
-			"FROM entry LEFT JOIN alt ON entry.id=alt.id "
-			"GROUP BY entry.id;",
-		)
-		for row in self._cur.fetchall():
-			terms = [row[0]] + [alt for alt in loads(row[2]) if alt]
-			article = row[1]
-			yield self._glos.newEntry(terms, article, defiFormat="h")
-
-	def close(self) -> None:
-		if self._cur:
-			self._cur.close()
-		if self._con:
-			self._con.close()
-		self._clear()
-
-
-class Writer:
-	_fuzzy: int = True
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._con: sqlite3.Connection | None = None
-		self._cur: sqlite3.Cursor | None = None
-		self._xdxfTr: XdxfTransformer | None = None
-
-	def open(self, filename: str) -> None:
-		from sqlite3 import connect
-
-		self._filename = filename
-		con = self._con = connect(filename)
-		self._cur = self._con.cursor()
-
-		for query in (
-			"CREATE TABLE meta ('key' TEXT PRIMARY KEY NOT NULL, 'value' TEXT);",
-			(
-				"CREATE TABLE entry ('id' INTEGER PRIMARY KEY NOT NULL, "
-				"'term' TEXT, 'article' TEXT);"
-			),
-			"CREATE TABLE alt ('id' INTEGER NOT NULL, 'term' TEXT);",
-			"CREATE INDEX idx_meta ON meta(key);",
-			"CREATE INDEX idx_entry_term ON entry(term COLLATE NOCASE);",
-			"CREATE INDEX idx_alt_id ON alt(id);",
-			"CREATE INDEX idx_alt_term ON alt(term COLLATE NOCASE);",
-		):
-			try:
-				con.execute(query)
-			except Exception as e:  # noqa: PERF203
-				log.error(f"query: {query}")
-				raise e
-
-		for key, value in self._glos.iterInfo():
-			con.execute(
-				"INSERT	INTO meta (key, value) VALUES (?, ?);",
-				(key, value),
-			)
-
-		if self._fuzzy:
-			con.execute(
-				"CREATE TABLE fuzzy3 ('sub' TEXT NOT NULL, "
-				"'term' TEXT NOT NULL, "
-				"id INTEGER NOT NULL);",
-			)
-			con.execute(
-				"CREATE INDEX idx_fuzzy3_sub ON fuzzy3(sub COLLATE NOCASE);",
-			)
-
-		con.commit()
-
-	def finish(self) -> None:
-		if self._con is None or self._cur is None:
-			return
-
-		self._con.commit()
-		self._con.close()
-		self._con = None
-		self._cur = None
-
-	def xdxf_setup(self) -> None:
-		from pyglossary.xdxf.transform import XdxfTransformer
-
-		# if self._xsl:
-		# 	self._xdxfTr = XslXdxfTransformer(encoding="utf-8")
-		# 	return
-		self._xdxfTr = XdxfTransformer(encoding="utf-8")
-
-	def xdxf_transform(self, text: str) -> str:
-		if self._xdxfTr is None:
-			self.xdxf_setup()
-		return self._xdxfTr.transformByInnerString(text)  # type: ignore
-
-	def write(self) -> Generator[None, EntryType, None]:
-		import hashlib
-
-		cur = self._cur
-		if cur is None:
-			raise ValueError("cur is None")
-		hash_ = hashlib.md5()
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				# can save it with entry.save(directory)
-				continue
-			defi = entry.defi
-			entry.detectDefiFormat()
-			if entry.defiFormat == "m":
-				if "\n" in defi:
-					defi = f"<pre>{defi}</pre>"
-			elif entry.defiFormat == "x":
-				defi = self.xdxf_transform(defi)
-
-			cur.execute(
-				"INSERT INTO entry(term, article) VALUES (?, ?);",
-				(entry.l_word[0], defi),
-			)
-			id_ = cur.lastrowid
-			if id_ is None:
-				raise ValueError("lastrowid is None")
-			for alt in entry.l_word[1:]:
-				cur.execute(
-					"INSERT INTO alt(id, term) VALUES (?, ?);",
-					(id_, alt),
-				)
-			hash_.update(entry.s_word.encode("utf-8"))
-			if self._fuzzy:
-				self.addFuzzy(id_, entry.l_word)
-
-		cur.execute(
-			"INSERT INTO meta (key, value) VALUES (?, ?);",
-			("hash", hash_.hexdigest()),
-		)
-
-	def addFuzzy(self, id_: int, terms: list[str]) -> None:
-		cur = self._cur
-		if cur is None:
-			raise ValueError("cur is None")
-		for term in terms:
-			subs: set[str] = set()
-			for word in term.split(" "):
-				eword = "\n" + word
-				subs.update(eword[i : i + 3] for i in range(len(eword) - 2))
-			for sub in subs:
-				cur.execute(
-					"INSERT INTO fuzzy3(sub, term, id) VALUES (?, ?, ?);",
-					(sub, term, id_),
-				)
diff --git a/pyglossary/plugins/ayandict_sqlite/reader.py b/pyglossary/plugins/ayandict_sqlite/reader.py
new file mode 100644
index 000000000..b1ed0b6eb
--- /dev/null
+++ b/pyglossary/plugins/ayandict_sqlite/reader.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+from typing import (
+	TYPE_CHECKING,
+)
+
+if TYPE_CHECKING:
+	import sqlite3
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._con: sqlite3.Connection | None = None
+		self._cur: sqlite3.Cursor | None = None
+
+	def open(self, filename: str) -> None:
+		from sqlite3 import connect
+
+		self._filename = filename
+		self._con = connect(filename)
+		self._cur = self._con.cursor()
+		self._glos.setDefaultDefiFormat("h")
+
+		self._cur.execute("SELECT key, value FROM meta;")
+		for row in self._cur.fetchall():
+			if row[0] == "hash":
+				continue
+			self._glos.setInfo(row[0], row[1])
+
+	def __len__(self) -> int:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute("select count(id) from entry")
+		return self._cur.fetchone()[0]
+
+	def __iter__(self) -> Iterator[EntryType]:
+		from json import loads
+
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute(
+			"SELECT entry.term, entry.article, "
+			"json_group_array(alt.term)"
+			"FROM entry LEFT JOIN alt ON entry.id=alt.id "
+			"GROUP BY entry.id;",
+		)
+		for row in self._cur.fetchall():
+			terms = [row[0]] + [alt for alt in loads(row[2]) if alt]
+			article = row[1]
+			yield self._glos.newEntry(terms, article, defiFormat="h")
+
+	def close(self) -> None:
+		if self._cur:
+			self._cur.close()
+		if self._con:
+			self._con.close()
+		self._clear()
diff --git a/pyglossary/plugins/ayandict_sqlite/writer.py b/pyglossary/plugins/ayandict_sqlite/writer.py
new file mode 100644
index 000000000..810631c71
--- /dev/null
+++ b/pyglossary/plugins/ayandict_sqlite/writer.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+from typing import (
+	TYPE_CHECKING,
+)
+
+if TYPE_CHECKING:
+	import sqlite3
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+	from pyglossary.xdxf.transform import XdxfTransformer
+
+from pyglossary.core import log
+
+
+class Writer:
+	_fuzzy: int = True
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._con: sqlite3.Connection | None = None
+		self._cur: sqlite3.Cursor | None = None
+		self._xdxfTr: XdxfTransformer | None = None
+
+	def open(self, filename: str) -> None:
+		from sqlite3 import connect
+
+		self._filename = filename
+		con = self._con = connect(filename)
+		self._cur = self._con.cursor()
+
+		for query in (
+			"CREATE TABLE meta ('key' TEXT PRIMARY KEY NOT NULL, 'value' TEXT);",
+			(
+				"CREATE TABLE entry ('id' INTEGER PRIMARY KEY NOT NULL, "
+				"'term' TEXT, 'article' TEXT);"
+			),
+			"CREATE TABLE alt ('id' INTEGER NOT NULL, 'term' TEXT);",
+			"CREATE INDEX idx_meta ON meta(key);",
+			"CREATE INDEX idx_entry_term ON entry(term COLLATE NOCASE);",
+			"CREATE INDEX idx_alt_id ON alt(id);",
+			"CREATE INDEX idx_alt_term ON alt(term COLLATE NOCASE);",
+		):
+			try:
+				con.execute(query)
+			except Exception as e:  # noqa: PERF203
+				log.error(f"query: {query}")
+				raise e
+
+		for key, value in self._glos.iterInfo():
+			con.execute(
+				"INSERT	INTO meta (key, value) VALUES (?, ?);",
+				(key, value),
+			)
+
+		if self._fuzzy:
+			con.execute(
+				"CREATE TABLE fuzzy3 ('sub' TEXT NOT NULL, "
+				"'term' TEXT NOT NULL, "
+				"id INTEGER NOT NULL);",
+			)
+			con.execute(
+				"CREATE INDEX idx_fuzzy3_sub ON fuzzy3(sub COLLATE NOCASE);",
+			)
+
+		con.commit()
+
+	def finish(self) -> None:
+		if self._con is None or self._cur is None:
+			return
+
+		self._con.commit()
+		self._con.close()
+		self._con = None
+		self._cur = None
+
+	def xdxf_setup(self) -> None:
+		from pyglossary.xdxf.transform import XdxfTransformer
+
+		# if self._xsl:
+		# 	self._xdxfTr = XslXdxfTransformer(encoding="utf-8")
+		# 	return
+		self._xdxfTr = XdxfTransformer(encoding="utf-8")
+
+	def xdxf_transform(self, text: str) -> str:
+		if self._xdxfTr is None:
+			self.xdxf_setup()
+		return self._xdxfTr.transformByInnerString(text)  # type: ignore
+
+	def write(self) -> Generator[None, EntryType, None]:
+		import hashlib
+
+		cur = self._cur
+		if cur is None:
+			raise ValueError("cur is None")
+		hash_ = hashlib.md5()
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				# can save it with entry.save(directory)
+				continue
+			defi = entry.defi
+			entry.detectDefiFormat()
+			if entry.defiFormat == "m":
+				if "\n" in defi:
+					defi = f"<pre>{defi}</pre>"
+			elif entry.defiFormat == "x":
+				defi = self.xdxf_transform(defi)
+
+			cur.execute(
+				"INSERT INTO entry(term, article) VALUES (?, ?);",
+				(entry.l_word[0], defi),
+			)
+			id_ = cur.lastrowid
+			if id_ is None:
+				raise ValueError("lastrowid is None")
+			for alt in entry.l_word[1:]:
+				cur.execute(
+					"INSERT INTO alt(id, term) VALUES (?, ?);",
+					(id_, alt),
+				)
+			hash_.update(entry.s_word.encode("utf-8"))
+			if self._fuzzy:
+				self.addFuzzy(id_, entry.l_word)
+
+		cur.execute(
+			"INSERT INTO meta (key, value) VALUES (?, ?);",
+			("hash", hash_.hexdigest()),
+		)
+
+	def addFuzzy(self, id_: int, terms: list[str]) -> None:
+		cur = self._cur
+		if cur is None:
+			raise ValueError("cur is None")
+		for term in terms:
+			subs: set[str] = set()
+			for word in term.split(" "):
+				eword = "\n" + word
+				subs.update(eword[i : i + 3] for i in range(len(eword) - 2))
+			for sub in subs:
+				cur.execute(
+					"INSERT INTO fuzzy3(sub, term, id) VALUES (?, ?, ?);",
+					(sub, term, id_),
+				)
diff --git a/pyglossary/plugins/cc_kedict/__init__.py b/pyglossary/plugins/cc_kedict/__init__.py
index 772c2ff6b..5289633ef 100644
--- a/pyglossary/plugins/cc_kedict/__init__.py
+++ b/pyglossary/plugins/cc_kedict/__init__.py
@@ -2,20 +2,12 @@
 # mypy: ignore-errors
 from __future__ import annotations
 
-from io import BytesIO
-from os.path import isdir, join
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-	from collections.abc import Callable, Iterator
-
-	import lxml
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
 	from pyglossary.option import Option
 
-from pyglossary.core import exc_note, log, pip
-from pyglossary.text_reader import TextGlossaryReader
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -46,295 +38,3 @@
 	"@mhagiwara/cc-kedict",
 )
 optionsProp: dict[str, Option] = {}
-
-
-class YamlReader(TextGlossaryReader):
-	tagStyle = (
-		"color:white;"
-		"background:green;"
-		"padding-left:3px;"
-		"padding-right:3px;"
-		"border-radius:0.5ex;"
-		# 0.5ex ~= 0.3em, but "ex" is recommended
-	)
-
-	def __init__(  # noqa: PLR0913
-		self,
-		glos: GlossaryType,
-		spellKey: str = "",
-		posKey: str = "",
-		synsKey: str = "",
-		tagsKey: str = "",
-	) -> None:
-		TextGlossaryReader.__init__(self, glos)
-		self._spellKey = spellKey
-		self._posKey = posKey
-		self._synsKey = synsKey
-		self._tagsKey = tagsKey
-
-		self._posMapping = {
-			"n": "noun",
-			"v": "verb",
-			"a": "adjective",
-			"pron": "pronoun",
-			"propn": "proper noun",
-			"intj": "interjection",
-			"det": "determiner",
-			"part": "particle",
-			"adv": "adverb",
-			"num": "number",
-			"abbrev": "abbreviation",
-			"suf": "suffix",
-			"pref": "prefix",
-		}
-
-	@classmethod
-	def isInfoWord(cls, _word: str) -> bool:
-		return False
-
-	@classmethod
-	def fixInfoWord(cls, _word: str) -> str:
-		return ""
-
-	@staticmethod
-	def _makeList(
-		hf: lxml.etree.htmlfile,
-		input_objects: list[Any],
-		processor: Callable,
-		single_prefix: str | None = None,
-		skip_single: bool = True,
-	) -> None:
-		"""Wrap elements into <ol> if more than one element."""
-		if not input_objects:
-			return
-
-		if skip_single and len(input_objects) == 1:
-			# if single_prefix is None:
-			# 	single_prefix = ET.Element("br")
-			if single_prefix:
-				hf.write(single_prefix)
-			processor(hf, input_objects[0], 1)
-			return
-
-		with hf.element("ol"):
-			for el in input_objects:
-				with hf.element("li"):
-					processor(hf, el, len(input_objects))
-
-	def _processExample(  # noqa: PLR6301
-		self,
-		hf: lxml.etree.htmlfile,
-		exampleDict: dict,
-		_count: int,
-	) -> None:
-		from lxml import etree as ET
-
-		if not exampleDict.get("example"):
-			log.error(f"invalid example: {exampleDict}")
-			return
-
-		hf.write(exampleDict["example"])
-
-		transliteration = exampleDict.get("transliteration")
-		if transliteration:
-			hf.write(ET.Element("br"))
-			with hf.element("font", color="green"):
-				hf.write(f"{transliteration}")
-
-		translation = exampleDict.get("translation")
-		if translation:
-			hf.write(ET.Element("br"))
-			with hf.element("i"):
-				hf.write(f"{translation}")
-
-	def _processDef(
-		self,
-		hf: lxml.etree.htmlfile,
-		defDict: dict,
-		count: int,
-	) -> None:
-		from lxml import etree as ET
-
-		text = defDict.get("def", "")
-		if text:
-			hf.write(text)
-
-		examples = defDict.get("examples")
-		if examples:
-			if text:
-				if count == 1:
-					hf.write(ET.Element("br"))
-				hf.write(ET.Element("br"))
-			with hf.element("i"):
-				hf.write("Examples:")
-			self._makeList(
-				hf,
-				examples,
-				self._processExample,
-				skip_single=False,
-			)
-
-	def _processNote(  # noqa: PLR6301
-		self,
-		hf: lxml.etree.htmlfile,
-		note: str,
-		_count: int,
-	) -> None:
-		hf.write(note)
-
-	def _processEntry(
-		self,
-		hf: lxml.etree.htmlfile,
-		edict: dict,
-	) -> None:
-		from lxml import etree as ET
-
-		if self._spellKey and self._spellKey in edict:
-			spelling = edict[self._spellKey]
-			if not isinstance(spelling, str):
-				log.error(f"{spelling=}, {type(spelling)=}, {edict=}")
-				# https://github.com/mhagiwara/cc-kedict/pull/1
-				spelling = "on" if spelling is True else ""
-			if spelling:
-				with hf.element("font", color="green"):
-					hf.write(spelling)
-				hf.write(ET.Element("br"))
-
-		if self._posKey and self._posKey in edict:
-			pos = edict[self._posKey]
-			pos = self._posMapping.get(pos, pos)
-			with hf.element("i"):
-				hf.write(pos.capitalize())
-			hf.write(ET.Element("br"))
-
-		if self._tagsKey and self._tagsKey in edict:
-			tags = edict[self._tagsKey]
-			for i, tag in enumerate(tags):
-				if i > 0:
-					hf.write(" ")
-				with hf.element("span", style=self.tagStyle):
-					hf.write(tag)
-			hf.write(ET.Element("br"))
-
-		defs = edict.get("defs")
-		if defs:
-			self._makeList(
-				hf,
-				defs,
-				self._processDef,
-			)
-
-		if self._synsKey and self._synsKey in edict:
-			hf.write("Synonyms: ")
-			for i, word in enumerate(edict[self._synsKey]):
-				if i > 0:
-					with hf.element("big"):
-						hf.write(" | ")  # NESTED: 5
-				with hf.element("a", href=f"bword://{word}"):
-					hf.write(word)
-			hf.write(ET.Element("br"))
-
-		notes = edict.get("notes")
-		if notes:
-			hf.write(ET.Element("br"))
-			hf.write("Notes:")
-			self._makeList(
-				hf,
-				notes,
-				self._processNote,
-				skip_single=False,
-			)
-
-	def _createEntry(
-		self,
-		yamlBlock: str,
-	) -> tuple[str, str, None] | None:
-		from lxml import etree as ET
-		from yaml import load
-
-		try:
-			from yaml import CLoader as Loader
-		except ImportError:
-			from yaml import Loader
-
-		edict = load(yamlBlock, Loader=Loader)
-		word = edict.get("word")
-		if not word:
-			log.error(f"no word in {edict}")
-			return None
-
-		f = BytesIO()
-
-		with ET.htmlfile(f, encoding="utf-8") as hf:
-			with hf.element("div"):
-				self._processEntry(hf, edict)
-
-		defi = f.getvalue().decode("utf-8")
-		return word, defi, None
-
-	def nextBlock(self) -> EntryType:
-		if not self._file:
-			raise StopIteration
-		lines: list[str] = []
-		while True:
-			line = self.readline()
-			if not line:
-				break
-			line = line.rstrip("\n\r")
-			if not line:
-				continue
-			if line.startswith("- "):
-				line = " " + line[1:]
-				if lines:
-					self._bufferLine = line
-					return self._createEntry("\n".join(lines))
-
-			lines.append(line)
-
-		if lines:
-			return self._createEntry("\n".join(lines))
-
-		raise StopIteration
-
-
-class Reader:
-	depends = {
-		"yaml": "PyYAML",
-		"lxml": "lxml",
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._yaml = YamlReader(
-			glos,
-			spellKey="romaja",
-			posKey="pos",
-			synsKey="syns",
-			tagsKey="tags",
-		)
-
-	def __len__(self) -> int:
-		return 0
-
-	def open(self, filename: str) -> None:
-		try:
-			from lxml import etree as ET  # noqa: F401
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install lxml` to install")
-			raise
-
-		if isdir(filename):
-			filename = join(filename, "kedict.yml")
-		self._filename = filename
-
-		self._glos.sourceLangName = "Korean"
-		self._glos.targetLangName = "English"
-
-		self._glos.setDefaultDefiFormat("h")
-		self._yaml.open(filename)
-
-	def close(self) -> None:
-		self._yaml.close()
-
-	def __iter__(self) -> Iterator[EntryType]:
-		yield from self._yaml
diff --git a/pyglossary/plugins/cc_kedict/reader.py b/pyglossary/plugins/cc_kedict/reader.py
new file mode 100644
index 000000000..1a9efcb4f
--- /dev/null
+++ b/pyglossary/plugins/cc_kedict/reader.py
@@ -0,0 +1,309 @@
+# -*- coding: utf-8 -*-
+# mypy: ignore-errors
+from __future__ import annotations
+
+from io import BytesIO
+from os.path import isdir, join
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+	from collections.abc import Callable, Iterator
+
+	import lxml
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+from pyglossary.core import exc_note, log, pip
+from pyglossary.text_reader import TextGlossaryReader
+
+
+class YamlReader(TextGlossaryReader):
+	tagStyle = (
+		"color:white;"
+		"background:green;"
+		"padding-left:3px;"
+		"padding-right:3px;"
+		"border-radius:0.5ex;"
+		# 0.5ex ~= 0.3em, but "ex" is recommended
+	)
+
+	def __init__(  # noqa: PLR0913
+		self,
+		glos: GlossaryType,
+		spellKey: str = "",
+		posKey: str = "",
+		synsKey: str = "",
+		tagsKey: str = "",
+	) -> None:
+		TextGlossaryReader.__init__(self, glos)
+		self._spellKey = spellKey
+		self._posKey = posKey
+		self._synsKey = synsKey
+		self._tagsKey = tagsKey
+
+		self._posMapping = {
+			"n": "noun",
+			"v": "verb",
+			"a": "adjective",
+			"pron": "pronoun",
+			"propn": "proper noun",
+			"intj": "interjection",
+			"det": "determiner",
+			"part": "particle",
+			"adv": "adverb",
+			"num": "number",
+			"abbrev": "abbreviation",
+			"suf": "suffix",
+			"pref": "prefix",
+		}
+
+	@classmethod
+	def isInfoWord(cls, _word: str) -> bool:
+		return False
+
+	@classmethod
+	def fixInfoWord(cls, _word: str) -> str:
+		return ""
+
+	@staticmethod
+	def _makeList(
+		hf: lxml.etree.htmlfile,
+		input_objects: list[Any],
+		processor: Callable,
+		single_prefix: str | None = None,
+		skip_single: bool = True,
+	) -> None:
+		"""Wrap elements into <ol> if more than one element."""
+		if not input_objects:
+			return
+
+		if skip_single and len(input_objects) == 1:
+			# if single_prefix is None:
+			# 	single_prefix = ET.Element("br")
+			if single_prefix:
+				hf.write(single_prefix)
+			processor(hf, input_objects[0], 1)
+			return
+
+		with hf.element("ol"):
+			for el in input_objects:
+				with hf.element("li"):
+					processor(hf, el, len(input_objects))
+
+	def _processExample(  # noqa: PLR6301
+		self,
+		hf: lxml.etree.htmlfile,
+		exampleDict: dict,
+		_count: int,
+	) -> None:
+		from lxml import etree as ET
+
+		if not exampleDict.get("example"):
+			log.error(f"invalid example: {exampleDict}")
+			return
+
+		hf.write(exampleDict["example"])
+
+		transliteration = exampleDict.get("transliteration")
+		if transliteration:
+			hf.write(ET.Element("br"))
+			with hf.element("font", color="green"):
+				hf.write(f"{transliteration}")
+
+		translation = exampleDict.get("translation")
+		if translation:
+			hf.write(ET.Element("br"))
+			with hf.element("i"):
+				hf.write(f"{translation}")
+
+	def _processDef(
+		self,
+		hf: lxml.etree.htmlfile,
+		defDict: dict,
+		count: int,
+	) -> None:
+		from lxml import etree as ET
+
+		text = defDict.get("def", "")
+		if text:
+			hf.write(text)
+
+		examples = defDict.get("examples")
+		if examples:
+			if text:
+				if count == 1:
+					hf.write(ET.Element("br"))
+				hf.write(ET.Element("br"))
+			with hf.element("i"):
+				hf.write("Examples:")
+			self._makeList(
+				hf,
+				examples,
+				self._processExample,
+				skip_single=False,
+			)
+
+	def _processNote(  # noqa: PLR6301
+		self,
+		hf: lxml.etree.htmlfile,
+		note: str,
+		_count: int,
+	) -> None:
+		hf.write(note)
+
+	def _processEntry(
+		self,
+		hf: lxml.etree.htmlfile,
+		edict: dict,
+	) -> None:
+		from lxml import etree as ET
+
+		if self._spellKey and self._spellKey in edict:
+			spelling = edict[self._spellKey]
+			if not isinstance(spelling, str):
+				log.error(f"{spelling=}, {type(spelling)=}, {edict=}")
+				# https://github.com/mhagiwara/cc-kedict/pull/1
+				spelling = "on" if spelling is True else ""
+			if spelling:
+				with hf.element("font", color="green"):
+					hf.write(spelling)
+				hf.write(ET.Element("br"))
+
+		if self._posKey and self._posKey in edict:
+			pos = edict[self._posKey]
+			pos = self._posMapping.get(pos, pos)
+			with hf.element("i"):
+				hf.write(pos.capitalize())
+			hf.write(ET.Element("br"))
+
+		if self._tagsKey and self._tagsKey in edict:
+			tags = edict[self._tagsKey]
+			for i, tag in enumerate(tags):
+				if i > 0:
+					hf.write(" ")
+				with hf.element("span", style=self.tagStyle):
+					hf.write(tag)
+			hf.write(ET.Element("br"))
+
+		defs = edict.get("defs")
+		if defs:
+			self._makeList(
+				hf,
+				defs,
+				self._processDef,
+			)
+
+		if self._synsKey and self._synsKey in edict:
+			hf.write("Synonyms: ")
+			for i, word in enumerate(edict[self._synsKey]):
+				if i > 0:
+					with hf.element("big"):
+						hf.write(" | ")  # NESTED: 5
+				with hf.element("a", href=f"bword://{word}"):
+					hf.write(word)
+			hf.write(ET.Element("br"))
+
+		notes = edict.get("notes")
+		if notes:
+			hf.write(ET.Element("br"))
+			hf.write("Notes:")
+			self._makeList(
+				hf,
+				notes,
+				self._processNote,
+				skip_single=False,
+			)
+
+	def _createEntry(
+		self,
+		yamlBlock: str,
+	) -> tuple[str, str, None] | None:
+		from lxml import etree as ET
+		from yaml import load
+
+		try:
+			from yaml import CLoader as Loader
+		except ImportError:
+			from yaml import Loader
+
+		edict = load(yamlBlock, Loader=Loader)
+		word = edict.get("word")
+		if not word:
+			log.error(f"no word in {edict}")
+			return None
+
+		f = BytesIO()
+
+		with ET.htmlfile(f, encoding="utf-8") as hf:
+			with hf.element("div"):
+				self._processEntry(hf, edict)
+
+		defi = f.getvalue().decode("utf-8")
+		return word, defi, None
+
+	def nextBlock(self) -> EntryType:
+		if not self._file:
+			raise StopIteration
+		lines: list[str] = []
+		while True:
+			line = self.readline()
+			if not line:
+				break
+			line = line.rstrip("\n\r")
+			if not line:
+				continue
+			if line.startswith("- "):
+				line = " " + line[1:]
+				if lines:
+					self._bufferLine = line
+					return self._createEntry("\n".join(lines))
+
+			lines.append(line)
+
+		if lines:
+			return self._createEntry("\n".join(lines))
+
+		raise StopIteration
+
+
+class Reader:
+	depends = {
+		"yaml": "PyYAML",
+		"lxml": "lxml",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._yaml = YamlReader(
+			glos,
+			spellKey="romaja",
+			posKey="pos",
+			synsKey="syns",
+			tagsKey="tags",
+		)
+
+	def __len__(self) -> int:
+		return 0
+
+	def open(self, filename: str) -> None:
+		try:
+			from lxml import etree as ET  # noqa: F401
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install lxml` to install")
+			raise
+
+		if isdir(filename):
+			filename = join(filename, "kedict.yml")
+		self._filename = filename
+
+		self._glos.sourceLangName = "Korean"
+		self._glos.targetLangName = "English"
+
+		self._glos.setDefaultDefiFormat("h")
+		self._yaml.open(filename)
+
+	def close(self) -> None:
+		self._yaml.close()
+
+	def __iter__(self) -> Iterator[EntryType]:
+		yield from self._yaml
diff --git a/pyglossary/plugins/crawler_dir/__init__.py b/pyglossary/plugins/crawler_dir/__init__.py
index 9c0ec0557..ae64f6e5c 100644
--- a/pyglossary/plugins/crawler_dir/__init__.py
+++ b/pyglossary/plugins/crawler_dir/__init__.py
@@ -1,28 +1,13 @@
 # mypy: ignore-errors
 from __future__ import annotations
 
-from hashlib import sha1
-from os import listdir, makedirs
-from os.path import dirname, isdir, isfile, join, splitext
-from typing import TYPE_CHECKING
-
-from pyglossary.compression import (
-	compressionOpenFunc,
-)
-from pyglossary.core import log
 from pyglossary.option import (
 	Option,
 	StrOption,
 )
-from pyglossary.text_utils import (
-	escapeNTB,
-	splitByBarUnescapeNTB,
-)
-
-if TYPE_CHECKING:
-	from collections.abc import Generator, Iterator
 
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .reader import Reader
+from .writer import Writer
 
 __all__ = [
 	"Reader",
@@ -56,147 +41,3 @@
 		comment="Compression Algorithm",
 	),
 }
-
-
-class Writer:
-	_compression: str = ""
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = None
-
-	def finish(self) -> None:
-		pass
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-		if not isdir(filename):
-			makedirs(filename)
-
-	@staticmethod
-	def filePathFromWord(b_word: bytes) -> str:
-		bw = b_word.lower()
-		if len(bw) <= 2:
-			return bw.hex()
-		if len(bw) <= 4:
-			return join(
-				bw[:2].hex() + ".d",
-				bw[2:].hex(),
-			)
-		return join(
-			bw[:2].hex() + ".d",
-			bw[2:4].hex() + ".d",
-			bw[4:8].hex() + "-" + sha1(b_word).hexdigest()[:8],  # noqa: S324
-		)
-
-	def write(self) -> None:
-		from pyglossary.json_utils import dataToPrettyJson
-
-		filename = self._filename
-
-		wordCount = 0
-		compression = self._compression
-		c_open = compressionOpenFunc(compression)
-		if not c_open:
-			raise ValueError(f"invalid compression {compression!r}")
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				continue
-			fpath = join(filename, self.filePathFromWord(entry.b_word))
-			if compression:
-				fpath = f"{fpath}.{compression}"
-			parentDir = dirname(fpath)
-			if not isdir(parentDir):
-				makedirs(parentDir)
-			if isfile(fpath):
-				log.warning(f"file exists: {fpath}")
-				fpath += f"-{sha1(entry.b_defi).hexdigest()[:4]}"  # noqa: S324
-			with c_open(fpath, "wt", encoding="utf-8") as _file:
-				_file.write(
-					f"{escapeNTB(entry.s_word)}\n{entry.defi}",
-				)
-			wordCount += 1
-
-		with open(
-			join(filename, "info.json"),
-			mode="w",
-			encoding="utf-8",
-		) as infoFile:
-			info = {}
-			info["name"] = self._glos.getInfo("name")
-			info["wordCount"] = wordCount
-			info |= self._glos.getExtraInfos(["name", "wordCount"])
-
-			infoFile.write(dataToPrettyJson(info))
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = None
-		self._wordCount = 0
-
-	def open(self, filename: str) -> None:
-		from pyglossary.json_utils import jsonToData
-
-		self._filename = filename
-
-		with open(join(filename, "info.json"), encoding="utf-8") as infoFp:
-			info = jsonToData(infoFp.read())
-		self._wordCount = info.pop("wordCount")
-		for key, value in info.items():
-			self._glos.setInfo(key, value)
-
-	def close(self) -> None:
-		pass
-
-	def __len__(self) -> int:
-		return self._wordCount
-
-	def _fromFile(self, fpath: str) -> EntryType:
-		_, ext = splitext(fpath)
-		c_open = compressionOpenFunc(ext.lstrip("."))
-		if not c_open:
-			log.error(f"invalid extension {ext}")
-			c_open = open
-		with c_open(fpath, "rt", encoding="utf-8") as _file:
-			words = splitByBarUnescapeNTB(_file.readline().rstrip("\n"))
-			defi = _file.read()
-			return self._glos.newEntry(words, defi)
-
-	@staticmethod
-	def _listdirSortKey(name: str) -> str:
-		name_nox, ext = splitext(name)
-		if ext == ".d":
-			return name
-		return name_nox
-
-	def _readDir(
-		self,
-		dpath: str,
-		exclude: set[str] | None,
-	) -> Generator[EntryType, None, None]:
-		children = listdir(dpath)
-		if exclude:
-			children = [name for name in children if name not in exclude]
-		children.sort(key=self._listdirSortKey)
-		for name in children:
-			cpath = join(dpath, name)
-			if isfile(cpath):
-				yield self._fromFile(cpath)
-				continue
-			if isdir(cpath):
-				yield from self._readDir(cpath, None)
-				continue
-			log.error(f"Not a file nor a directory: {cpath}")
-
-	def __iter__(self) -> Iterator[EntryType]:
-		yield from self._readDir(
-			self._filename,
-			{
-				"info.json",
-			},
-		)
diff --git a/pyglossary/plugins/crawler_dir/reader.py b/pyglossary/plugins/crawler_dir/reader.py
new file mode 100644
index 000000000..9bb6b0369
--- /dev/null
+++ b/pyglossary/plugins/crawler_dir/reader.py
@@ -0,0 +1,88 @@
+# mypy: ignore-errors
+from __future__ import annotations
+
+from os import listdir
+from os.path import isdir, isfile, join, splitext
+from typing import TYPE_CHECKING
+
+from pyglossary.compression import (
+	compressionOpenFunc,
+)
+from pyglossary.core import log
+from pyglossary.text_utils import (
+	splitByBarUnescapeNTB,
+)
+
+if TYPE_CHECKING:
+	from collections.abc import Generator, Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = None
+		self._wordCount = 0
+
+	def open(self, filename: str) -> None:
+		from pyglossary.json_utils import jsonToData
+
+		self._filename = filename
+
+		with open(join(filename, "info.json"), encoding="utf-8") as infoFp:
+			info = jsonToData(infoFp.read())
+		self._wordCount = info.pop("wordCount")
+		for key, value in info.items():
+			self._glos.setInfo(key, value)
+
+	def close(self) -> None:
+		pass
+
+	def __len__(self) -> int:
+		return self._wordCount
+
+	def _fromFile(self, fpath: str) -> EntryType:
+		_, ext = splitext(fpath)
+		c_open = compressionOpenFunc(ext.lstrip("."))
+		if not c_open:
+			log.error(f"invalid extension {ext}")
+			c_open = open
+		with c_open(fpath, "rt", encoding="utf-8") as _file:
+			words = splitByBarUnescapeNTB(_file.readline().rstrip("\n"))
+			defi = _file.read()
+			return self._glos.newEntry(words, defi)
+
+	@staticmethod
+	def _listdirSortKey(name: str) -> str:
+		name_nox, ext = splitext(name)
+		if ext == ".d":
+			return name
+		return name_nox
+
+	def _readDir(
+		self,
+		dpath: str,
+		exclude: set[str] | None,
+	) -> Generator[EntryType, None, None]:
+		children = listdir(dpath)
+		if exclude:
+			children = [name for name in children if name not in exclude]
+		children.sort(key=self._listdirSortKey)
+		for name in children:
+			cpath = join(dpath, name)
+			if isfile(cpath):
+				yield self._fromFile(cpath)
+				continue
+			if isdir(cpath):
+				yield from self._readDir(cpath, None)
+				continue
+			log.error(f"Not a file nor a directory: {cpath}")
+
+	def __iter__(self) -> Iterator[EntryType]:
+		yield from self._readDir(
+			self._filename,
+			{
+				"info.json",
+			},
+		)
diff --git a/pyglossary/plugins/crawler_dir/writer.py b/pyglossary/plugins/crawler_dir/writer.py
new file mode 100644
index 000000000..6171a341e
--- /dev/null
+++ b/pyglossary/plugins/crawler_dir/writer.py
@@ -0,0 +1,93 @@
+# mypy: ignore-errors
+from __future__ import annotations
+
+from hashlib import sha1
+from os import makedirs
+from os.path import dirname, isdir, isfile, join
+from typing import TYPE_CHECKING
+
+from pyglossary.compression import (
+	compressionOpenFunc,
+)
+from pyglossary.core import log
+from pyglossary.text_utils import (
+	escapeNTB,
+)
+
+if TYPE_CHECKING:
+	from pyglossary.glossary_types import GlossaryType
+
+
+class Writer:
+	_compression: str = ""
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = None
+
+	def finish(self) -> None:
+		pass
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+		if not isdir(filename):
+			makedirs(filename)
+
+	@staticmethod
+	def filePathFromWord(b_word: bytes) -> str:
+		bw = b_word.lower()
+		if len(bw) <= 2:
+			return bw.hex()
+		if len(bw) <= 4:
+			return join(
+				bw[:2].hex() + ".d",
+				bw[2:].hex(),
+			)
+		return join(
+			bw[:2].hex() + ".d",
+			bw[2:4].hex() + ".d",
+			bw[4:8].hex() + "-" + sha1(b_word).hexdigest()[:8],  # noqa: S324
+		)
+
+	def write(self) -> None:
+		from pyglossary.json_utils import dataToPrettyJson
+
+		filename = self._filename
+
+		wordCount = 0
+		compression = self._compression
+		c_open = compressionOpenFunc(compression)
+		if not c_open:
+			raise ValueError(f"invalid compression {compression!r}")
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				continue
+			fpath = join(filename, self.filePathFromWord(entry.b_word))
+			if compression:
+				fpath = f"{fpath}.{compression}"
+			parentDir = dirname(fpath)
+			if not isdir(parentDir):
+				makedirs(parentDir)
+			if isfile(fpath):
+				log.warning(f"file exists: {fpath}")
+				fpath += f"-{sha1(entry.b_defi).hexdigest()[:4]}"  # noqa: S324
+			with c_open(fpath, "wt", encoding="utf-8") as _file:
+				_file.write(
+					f"{escapeNTB(entry.s_word)}\n{entry.defi}",
+				)
+			wordCount += 1
+
+		with open(
+			join(filename, "info.json"),
+			mode="w",
+			encoding="utf-8",
+		) as infoFile:
+			info = {}
+			info["name"] = self._glos.getInfo("name")
+			info["wordCount"] = wordCount
+			info |= self._glos.getExtraInfos(["name", "wordCount"])
+
+			infoFile.write(dataToPrettyJson(info))
diff --git a/pyglossary/plugins/csv_plugin/__init__.py b/pyglossary/plugins/csv_plugin/__init__.py
index 1f9aebb29..36916b243 100644
--- a/pyglossary/plugins/csv_plugin/__init__.py
+++ b/pyglossary/plugins/csv_plugin/__init__.py
@@ -20,16 +20,7 @@
 from __future__ import annotations
 
 import csv
-import os
-from os.path import isdir, join
-from typing import TYPE_CHECKING, cast
 
-from pyglossary.compression import (
-	compressionOpen,
-	stdCompressions,
-)
-from pyglossary.core import log
-from pyglossary.io_utils import nullTextIO
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
@@ -37,11 +28,8 @@
 	Option,
 )
 
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Generator, Iterable, Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .reader import Reader
+from .writer import Writer
 
 __all__ = [
 	"Reader",
@@ -94,231 +82,3 @@
 }
 
 csv.field_size_limit(0x7FFFFFFF)
-
-
-class Reader:
-	compressions = stdCompressions
-
-	_encoding: str = "utf-8"
-	_newline: str = "\n"
-	_delimiter: str = ","
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self.clear()
-
-	def clear(self) -> None:
-		self._filename = ""
-		self._file: io.TextIOBase = nullTextIO
-		self._fileSize = 0
-		self._leadingLinesCount = 0
-		self._wordCount: int | None = None
-		self._pos = -1
-		self._csvReader: Iterable[list[str]] | None = None
-		self._resDir = ""
-		self._resFileNames: list[str] = []
-		self._bufferRow: list[str] | None = None
-
-	def open(
-		self,
-		filename: str,
-	) -> None:
-		from pyglossary.text_reader import TextFilePosWrapper
-
-		self._filename = filename
-		cfile = cast(
-			"io.TextIOBase",
-			compressionOpen(
-				filename,
-				mode="rt",
-				encoding=self._encoding,
-				newline=self._newline,
-			),
-		)
-
-		if self._glos.progressbar:
-			if cfile.seekable():
-				cfile.seek(0, 2)
-				self._fileSize = cfile.tell()
-				cfile.seek(0)
-				# self._glos.setInfo("input_file_size", f"{self._fileSize}")
-			else:
-				log.warning("CSV Reader: file is not seekable")
-
-		self._file = TextFilePosWrapper(cfile, self._encoding)
-		self._csvReader = csv.reader(
-			self._file,
-			dialect="excel",
-			delimiter=self._delimiter,
-		)
-		self._resDir = filename + "_res"
-		if isdir(self._resDir):
-			self._resFileNames = os.listdir(self._resDir)
-		else:
-			self._resDir = ""
-			self._resFileNames = []
-		for row in self._csvReader:
-			if not row:
-				continue
-			if not row[0].startswith("#"):
-				self._bufferRow = row
-				break
-			if len(row) < 2:
-				log.error(f"invalid row: {row}")
-				continue
-			self._glos.setInfo(row[0].lstrip("#"), row[1])
-
-	def close(self) -> None:
-		if self._file:
-			try:
-				self._file.close()
-			except Exception:
-				log.exception("error while closing csv file")
-		self.clear()
-
-	def __len__(self) -> int:
-		from pyglossary.file_utils import fileCountLines
-
-		if self._wordCount is None:
-			if hasattr(self._file, "compression"):
-				return 0
-			log.debug("Try not to use len(reader) as it takes extra time")
-			self._wordCount = fileCountLines(self._filename) - self._leadingLinesCount
-		return self._wordCount + len(self._resFileNames)
-
-	def _iterRows(self) -> Iterator[list[str]]:
-		if self._csvReader is None:
-			raise RuntimeError("self._csvReader is None")
-		if self._bufferRow:
-			yield self._bufferRow
-		yield from self._csvReader
-
-	def _processRow(self, row: list[str]) -> EntryType | None:
-		if not row:
-			return None
-
-		word: str | list[str]
-		try:
-			word = row[0]
-			defi = row[1]
-		except IndexError:
-			log.error(f"invalid row: {row!r}")
-			return None
-
-		try:
-			alts = row[2].split(",")
-		except IndexError:
-			pass
-		else:
-			word = [word] + alts
-
-		return self._glos.newEntry(
-			word,
-			defi,
-			byteProgress=(
-				(self._file.tell(), self._fileSize) if self._fileSize else None
-			),
-		)
-
-	def __iter__(self) -> Iterator[EntryType | None]:
-		if not self._csvReader:
-			raise RuntimeError("iterating over a reader while it's not open")
-
-		wordCount = 0
-		for row in self._iterRows():
-			wordCount += 1
-			yield self._processRow(row)
-
-		self._wordCount = wordCount
-
-		resDir = self._resDir
-		for fname in self._resFileNames:
-			with open(join(resDir, fname), "rb") as _file:
-				yield self._glos.newDataEntry(
-					fname,
-					_file.read(),
-				)
-
-
-class Writer:
-	compressions = stdCompressions
-
-	_encoding: str = "utf-8"
-	_newline: str = "\n"
-	_resources: bool = True
-	_delimiter: str = ","
-	_add_defi_format: bool = False
-	_enable_info: bool = True
-	_word_title: bool = False
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._file: io.TextIOBase = nullTextIO
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-		self._file = cast(
-			"io.TextIOBase",
-			compressionOpen(
-				filename,
-				mode="wt",
-				encoding=self._encoding,
-				newline=self._newline,
-			),
-		)
-		self._resDir = resDir = filename + "_res"
-		self._csvWriter = csv.writer(
-			self._file,
-			dialect="excel",
-			quoting=csv.QUOTE_ALL,  # FIXME
-			delimiter=self._delimiter,
-		)
-		if not isdir(resDir):
-			os.mkdir(resDir)
-		if self._enable_info:
-			for key, value in self._glos.iterInfo():
-				self._csvWriter.writerow([f"#{key}", value])
-
-	def finish(self) -> None:
-		self._filename = ""
-		self._file.close()
-		self._file = nullTextIO
-		if not os.listdir(self._resDir):
-			os.rmdir(self._resDir)
-
-	def write(self) -> Generator[None, EntryType, None]:
-		resources = self._resources
-		add_defi_format = self._add_defi_format
-		glos = self._glos
-		resDir = self._resDir
-		writer = self._csvWriter
-		word_title = self._word_title
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				if resources:
-					entry.save(resDir)
-				continue
-
-			words = entry.l_word
-			if not words:
-				continue
-			word, alts = words[0], words[1:]
-			defi = entry.defi
-
-			if word_title:
-				defi = glos.wordTitleStr(words[0]) + defi
-
-			row = [
-				word,
-				defi,
-			]
-			if add_defi_format:
-				entry.detectDefiFormat()
-				row.append(entry.defiFormat)
-			if alts:
-				row.append(",".join(alts))
-
-			writer.writerow(row)
diff --git a/pyglossary/plugins/csv_plugin/reader.py b/pyglossary/plugins/csv_plugin/reader.py
new file mode 100644
index 000000000..8087e9e92
--- /dev/null
+++ b/pyglossary/plugins/csv_plugin/reader.py
@@ -0,0 +1,182 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2013-2019 Saeed Rasooli <saeed.gnu@gmail.com> (ilius)
+# This file is part of PyGlossary project, https://github.com/ilius/pyglossary
+#
+# This program is a free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. Or on Debian systems, from /usr/share/common-licenses/GPL
+# If not, see <http://www.gnu.org/licenses/gpl.txt>.
+
+from __future__ import annotations
+
+import csv
+import os
+from os.path import isdir, join
+from typing import TYPE_CHECKING, cast
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import log
+from pyglossary.io_utils import nullTextIO
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Iterable, Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	compressions = stdCompressions
+
+	_encoding: str = "utf-8"
+	_newline: str = "\n"
+	_delimiter: str = ","
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self.clear()
+
+	def clear(self) -> None:
+		self._filename = ""
+		self._file: io.TextIOBase = nullTextIO
+		self._fileSize = 0
+		self._leadingLinesCount = 0
+		self._wordCount: int | None = None
+		self._pos = -1
+		self._csvReader: Iterable[list[str]] | None = None
+		self._resDir = ""
+		self._resFileNames: list[str] = []
+		self._bufferRow: list[str] | None = None
+
+	def open(
+		self,
+		filename: str,
+	) -> None:
+		from pyglossary.text_reader import TextFilePosWrapper
+
+		self._filename = filename
+		cfile = cast(
+			"io.TextIOBase",
+			compressionOpen(
+				filename,
+				mode="rt",
+				encoding=self._encoding,
+				newline=self._newline,
+			),
+		)
+
+		if self._glos.progressbar:
+			if cfile.seekable():
+				cfile.seek(0, 2)
+				self._fileSize = cfile.tell()
+				cfile.seek(0)
+				# self._glos.setInfo("input_file_size", f"{self._fileSize}")
+			else:
+				log.warning("CSV Reader: file is not seekable")
+
+		self._file = TextFilePosWrapper(cfile, self._encoding)
+		self._csvReader = csv.reader(
+			self._file,
+			dialect="excel",
+			delimiter=self._delimiter,
+		)
+		self._resDir = filename + "_res"
+		if isdir(self._resDir):
+			self._resFileNames = os.listdir(self._resDir)
+		else:
+			self._resDir = ""
+			self._resFileNames = []
+		for row in self._csvReader:
+			if not row:
+				continue
+			if not row[0].startswith("#"):
+				self._bufferRow = row
+				break
+			if len(row) < 2:
+				log.error(f"invalid row: {row}")
+				continue
+			self._glos.setInfo(row[0].lstrip("#"), row[1])
+
+	def close(self) -> None:
+		if self._file:
+			try:
+				self._file.close()
+			except Exception:
+				log.exception("error while closing csv file")
+		self.clear()
+
+	def __len__(self) -> int:
+		from pyglossary.file_utils import fileCountLines
+
+		if self._wordCount is None:
+			if hasattr(self._file, "compression"):
+				return 0
+			log.debug("Try not to use len(reader) as it takes extra time")
+			self._wordCount = fileCountLines(self._filename) - self._leadingLinesCount
+		return self._wordCount + len(self._resFileNames)
+
+	def _iterRows(self) -> Iterator[list[str]]:
+		if self._csvReader is None:
+			raise RuntimeError("self._csvReader is None")
+		if self._bufferRow:
+			yield self._bufferRow
+		yield from self._csvReader
+
+	def _processRow(self, row: list[str]) -> EntryType | None:
+		if not row:
+			return None
+
+		word: str | list[str]
+		try:
+			word = row[0]
+			defi = row[1]
+		except IndexError:
+			log.error(f"invalid row: {row!r}")
+			return None
+
+		try:
+			alts = row[2].split(",")
+		except IndexError:
+			pass
+		else:
+			word = [word] + alts
+
+		return self._glos.newEntry(
+			word,
+			defi,
+			byteProgress=(
+				(self._file.tell(), self._fileSize) if self._fileSize else None
+			),
+		)
+
+	def __iter__(self) -> Iterator[EntryType | None]:
+		if not self._csvReader:
+			raise RuntimeError("iterating over a reader while it's not open")
+
+		wordCount = 0
+		for row in self._iterRows():
+			wordCount += 1
+			yield self._processRow(row)
+
+		self._wordCount = wordCount
+
+		resDir = self._resDir
+		for fname in self._resFileNames:
+			with open(join(resDir, fname), "rb") as _file:
+				yield self._glos.newDataEntry(
+					fname,
+					_file.read(),
+				)
diff --git a/pyglossary/plugins/csv_plugin/writer.py b/pyglossary/plugins/csv_plugin/writer.py
new file mode 100644
index 000000000..ff1c42920
--- /dev/null
+++ b/pyglossary/plugins/csv_plugin/writer.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2013-2019 Saeed Rasooli <saeed.gnu@gmail.com> (ilius)
+# This file is part of PyGlossary project, https://github.com/ilius/pyglossary
+#
+# This program is a free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. Or on Debian systems, from /usr/share/common-licenses/GPL
+# If not, see <http://www.gnu.org/licenses/gpl.txt>.
+
+from __future__ import annotations
+
+import csv
+import os
+from os.path import isdir
+from typing import TYPE_CHECKING, cast
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.io_utils import nullTextIO
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Writer:
+	compressions = stdCompressions
+
+	_encoding: str = "utf-8"
+	_newline: str = "\n"
+	_resources: bool = True
+	_delimiter: str = ","
+	_add_defi_format: bool = False
+	_enable_info: bool = True
+	_word_title: bool = False
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._file: io.TextIOBase = nullTextIO
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+		self._file = cast(
+			"io.TextIOBase",
+			compressionOpen(
+				filename,
+				mode="wt",
+				encoding=self._encoding,
+				newline=self._newline,
+			),
+		)
+		self._resDir = resDir = filename + "_res"
+		self._csvWriter = csv.writer(
+			self._file,
+			dialect="excel",
+			quoting=csv.QUOTE_ALL,  # FIXME
+			delimiter=self._delimiter,
+		)
+		if not isdir(resDir):
+			os.mkdir(resDir)
+		if self._enable_info:
+			for key, value in self._glos.iterInfo():
+				self._csvWriter.writerow([f"#{key}", value])
+
+	def finish(self) -> None:
+		self._filename = ""
+		self._file.close()
+		self._file = nullTextIO
+		if not os.listdir(self._resDir):
+			os.rmdir(self._resDir)
+
+	def write(self) -> Generator[None, EntryType, None]:
+		resources = self._resources
+		add_defi_format = self._add_defi_format
+		glos = self._glos
+		resDir = self._resDir
+		writer = self._csvWriter
+		word_title = self._word_title
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				if resources:
+					entry.save(resDir)
+				continue
+
+			words = entry.l_word
+			if not words:
+				continue
+			word, alts = words[0], words[1:]
+			defi = entry.defi
+
+			if word_title:
+				defi = glos.wordTitleStr(words[0]) + defi
+
+			row = [
+				word,
+				defi,
+			]
+			if add_defi_format:
+				entry.detectDefiFormat()
+				row.append(entry.defiFormat)
+			if alts:
+				row.append(",".join(alts))
+
+			writer.writerow(row)
diff --git a/pyglossary/plugins/dicformids/__init__.py b/pyglossary/plugins/dicformids/__init__.py
index 625b9b7f3..8e1f4ca76 100644
--- a/pyglossary/plugins/dicformids/__init__.py
+++ b/pyglossary/plugins/dicformids/__init__.py
@@ -2,22 +2,16 @@
 # mypy: ignore-errors
 from __future__ import annotations
 
-import operator
-import os
-import re
-from os.path import join
 from typing import TYPE_CHECKING
 
-from pyglossary.core import log
-from pyglossary.flags import ALWAYS
-from pyglossary.plugins.tabfile import Reader as TabfileReader
-
 if TYPE_CHECKING:
-	from collections.abc import Generator, Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
 	from pyglossary.option import Option
 
+from pyglossary.flags import ALWAYS
+
+from .reader import Reader
+from .writer import Writer
+
 __all__ = [
 	"Reader",
 	"Writer",
@@ -52,243 +46,3 @@
 )
 
 optionsProp: dict[str, Option] = {}
-
-
-PROP_TEMPLATE = """#DictionaryForMIDs property file
-infoText={name}, author: {author}
-indexFileMaxSize={indexFileMaxSize}\n
-language1IndexNumberOfSourceEntries={wordCount}
-language1DictionaryUpdateClassName=de.kugihan.dictionaryformids.dictgen.DictionaryUpdate
-indexCharEncoding=ISO-8859-1
-dictionaryFileSeparationCharacter='\\t'
-language2NormationClassName=de.kugihan.dictionaryformids.translation.Normation
-language2DictionaryUpdateClassName=de.kugihan.dictionaryformids.dictgen.DictionaryUpdate
-logLevel=0
-language1FilePostfix={directoryPostfix}
-dictionaryCharEncoding=UTF-8
-numberOfAvailableLanguages=2
-language1IsSearchable=true
-language2GenerateIndex=false
-dictionaryFileMaxSize={dicMaxSize}
-language2FilePostfix={language2FilePostfix}
-searchListFileMaxSize=20000
-language2IsSearchable=false
-fileEncodingFormat=plain_format1
-language1HasSeparateDictionaryFile=true
-searchListCharEncoding=ISO-8859-1
-searchListFileSeparationCharacter='\t'
-indexFileSeparationCharacter='\t'
-language1DisplayText={sourceLang}
-language2HasSeparateDictionaryFile=false
-dictionaryGenerationInputCharEncoding=UTF-8
-language1GenerateIndex=true
-language2DisplayText={targetLang}
-language1NormationClassName=de.kugihan.dictionaryformids.translation.NormationEng
-"""
-
-
-class Reader:
-	re_number = re.compile(r"\d+")
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._tabFileNames: list[str] = []
-		self._tabFileReader = None
-
-	def open(self, dirname: str) -> None:
-		self._dirname = dirname
-		orderFileNames: list[tuple[int, str]] = []
-		for fname in os.listdir(dirname):
-			if not fname.startswith("directory"):
-				continue
-			try:
-				num = self.re_number.findall(fname)[-1]
-			except IndexError:
-				pass
-			else:
-				orderFileNames.append((num, fname))
-		orderFileNames.sort(
-			key=operator.itemgetter(0),
-			reverse=True,
-		)
-		self._tabFileNames = [x[1] for x in orderFileNames]
-		self.nextTabFile()
-
-	def __len__(self) -> int:
-		raise NotImplementedError  # FIXME
-
-	def __iter__(self) -> Iterator[EntryType]:
-		return self
-
-	def __next__(self) -> EntryType:
-		for _ in range(10):
-			try:
-				return next(self._tabFileReader)
-			except StopIteration:  # noqa: PERF203
-				self._tabFileReader.close()
-				self.nextTabFile()
-		return None
-
-	def nextTabFile(self) -> None:
-		try:
-			tabFileName = self._tabFileNames.pop()
-		except IndexError:
-			raise StopIteration from None
-		self._tabFileReader = TabfileReader(self._glos, hasInfo=False)
-		self._tabFileReader.open(join(self._dirname, tabFileName), newline="\n")
-
-	def close(self) -> None:
-		if self._tabFileReader:
-			try:
-				self._tabFileReader.close()
-			except Exception:
-				pass  # noqa: S110
-		self._tabFileReader = None
-		self._tabFileNames = []
-
-
-class Writer:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self.linesPerDirectoryFile = 500  # 200
-		self.indexFileMaxSize = 32722  # 30000
-		self.directoryPostfix = ""
-		self.indexPostfix = ""
-		self._dirname = ""
-		# looks like we need to remove tabs, because app gives error
-		# but based on the java code, all punctuations should be removed
-		# as well, including '|'
-		self.re_punc = re.compile(
-			r"""[!"$§%&/()=?´`\\{}\[\]^°+*~#'\-_.:,;<>@|]*""",  # noqa: RUF001
-		)
-		self.re_spaces = re.compile(" +")
-		self.re_tabs = re.compile("\t+")
-
-	def normateWord(self, word: str) -> str:
-		word = word.strip()
-		word = self.re_punc.sub("", word)
-		word = self.re_spaces.sub(" ", word)
-		word = self.re_tabs.sub(" ", word)
-		word = word.lower()
-		return word  # noqa: RET504
-
-	def writeProbs(self) -> None:
-		glos = self._glos
-		probsPath = join(
-			self._dirname,
-			"DictionaryForMIDs.properties",
-		)
-		with open(probsPath, mode="w", newline="\n", encoding="utf-8") as fileObj:
-			fileObj.write(
-				PROP_TEMPLATE.format(
-					name=glos.getInfo("name"),
-					author=glos.author,
-					indexFileMaxSize=self.indexFileMaxSize,
-					wordCount=self.wordCount,
-					directoryPostfix=self.directoryPostfix,
-					dicMaxSize=self.dicMaxSize + 1,
-					language2FilePostfix="fa",  # FIXME
-					sourceLang=glos.sourceLangName,
-					targetLang=glos.targetLangName,
-				),
-			)
-
-	def nextIndex(self) -> None:
-		try:
-			self.indexFp.close()
-		except AttributeError:
-			self.indexIndex = 0
-
-		self.indexIndex += 1
-		fname = f"index{self.indexPostfix}{self.indexIndex}.csv"
-		fpath = join(self._dirname, fname)
-		self.indexFp = open(fpath, mode="w", encoding="utf-8", newline="\n")
-
-	def finish(self) -> None:
-		pass
-
-	def open(self, dirname: str) -> None:
-		self._dirname = dirname
-		if not os.path.isdir(dirname):
-			os.mkdir(dirname)
-
-	def write(self) -> Generator[None, EntryType, None]:
-		self.nextIndex()
-
-		dicMaxSize = 0
-		indexData: list[tuple[str, int, int]] = []
-
-		def writeBucket(dicIndex: int, entryList: list[EntryType]) -> None:
-			nonlocal dicMaxSize
-			log.debug(
-				f"{dicIndex=}, {len(entryList)=}, {dicMaxSize=}",
-			)
-			dicFp = open(
-				join(
-					self._dirname,
-					f"directory{self.directoryPostfix}{dicIndex + 1}.csv",
-				),
-				mode="w",
-				encoding="utf-8",
-				newline="\n",
-			)
-			for entry in entryList:
-				word = entry.s_word
-				n_word = self.normateWord(word)
-				defi = entry.defi
-				dicLine = word + "\t" + defi + "\n"
-				dicPos = dicFp.tell()
-				dicFp.write(dicLine)
-				indexData.append((n_word, dicIndex + 1, dicPos))
-
-			dicMaxSize = max(dicMaxSize, dicFp.tell())
-			dicFp.close()
-
-		bucketSize = self.linesPerDirectoryFile
-		wordCount = 0
-		dicIndex = 0
-		entryList: list[EntryType] = []  # aka bucket
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				# FIXME
-				continue
-			wordCount += 1
-			entryList.append(entry)
-			if len(entryList) >= bucketSize:
-				writeBucket(dicIndex, entryList)
-				dicIndex += 1
-				entryList = []
-
-		if entryList:
-			writeBucket(dicIndex, entryList)
-			entryList = []
-
-		self.dicMaxSize = dicMaxSize
-		self.wordCount = wordCount
-
-		langSearchListFp = open(
-			join(
-				self._dirname,
-				f"searchlist{self.directoryPostfix}.csv",
-			),
-			mode="w",
-			newline="\n",
-			encoding="utf-8",
-		)
-
-		langSearchListFp.write(f"{indexData[0][0]}\t{self.indexIndex}\n")
-
-		for word, dicIndex, dicPos in indexData:
-			indexLine = f"{word}\t{dicIndex}-{dicPos}-B\n"
-			if (self.indexFp.tell() + len(indexLine)) > self.indexFileMaxSize - 10:
-				self.nextIndex()
-				langSearchListFp.write(f"{word}\t{self.indexIndex}\n")
-			self.indexFp.write(indexLine)
-
-		self.indexFp.close()
-		langSearchListFp.close()
-
-		self.writeProbs()
diff --git a/pyglossary/plugins/dicformids/reader.py b/pyglossary/plugins/dicformids/reader.py
new file mode 100644
index 000000000..9ae2bd1a8
--- /dev/null
+++ b/pyglossary/plugins/dicformids/reader.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+# mypy: ignore-errors
+from __future__ import annotations
+
+import operator
+import os
+import re
+from os.path import join
+from typing import TYPE_CHECKING
+
+from pyglossary.plugins.tabfile import Reader as TabfileReader
+
+if TYPE_CHECKING:
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	re_number = re.compile(r"\d+")
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._tabFileNames: list[str] = []
+		self._tabFileReader = None
+
+	def open(self, dirname: str) -> None:
+		self._dirname = dirname
+		orderFileNames: list[tuple[int, str]] = []
+		for fname in os.listdir(dirname):
+			if not fname.startswith("directory"):
+				continue
+			try:
+				num = self.re_number.findall(fname)[-1]
+			except IndexError:
+				pass
+			else:
+				orderFileNames.append((num, fname))
+		orderFileNames.sort(
+			key=operator.itemgetter(0),
+			reverse=True,
+		)
+		self._tabFileNames = [x[1] for x in orderFileNames]
+		self.nextTabFile()
+
+	def __len__(self) -> int:
+		raise NotImplementedError  # FIXME
+
+	def __iter__(self) -> Iterator[EntryType]:
+		return self
+
+	def __next__(self) -> EntryType:
+		for _ in range(10):
+			try:
+				return next(self._tabFileReader)
+			except StopIteration:  # noqa: PERF203
+				self._tabFileReader.close()
+				self.nextTabFile()
+		return None
+
+	def nextTabFile(self) -> None:
+		try:
+			tabFileName = self._tabFileNames.pop()
+		except IndexError:
+			raise StopIteration from None
+		self._tabFileReader = TabfileReader(self._glos, hasInfo=False)
+		self._tabFileReader.open(join(self._dirname, tabFileName), newline="\n")
+
+	def close(self) -> None:
+		if self._tabFileReader:
+			try:
+				self._tabFileReader.close()
+			except Exception:
+				pass  # noqa: S110
+		self._tabFileReader = None
+		self._tabFileNames = []
diff --git a/pyglossary/plugins/dicformids/writer.py b/pyglossary/plugins/dicformids/writer.py
new file mode 100644
index 000000000..44dc07ebd
--- /dev/null
+++ b/pyglossary/plugins/dicformids/writer.py
@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+# mypy: ignore-errors
+from __future__ import annotations
+
+import os
+import re
+from os.path import join
+from typing import TYPE_CHECKING
+
+from pyglossary.core import log
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+PROP_TEMPLATE = """#DictionaryForMIDs property file
+infoText={name}, author: {author}
+indexFileMaxSize={indexFileMaxSize}\n
+language1IndexNumberOfSourceEntries={wordCount}
+language1DictionaryUpdateClassName=de.kugihan.dictionaryformids.dictgen.DictionaryUpdate
+indexCharEncoding=ISO-8859-1
+dictionaryFileSeparationCharacter='\\t'
+language2NormationClassName=de.kugihan.dictionaryformids.translation.Normation
+language2DictionaryUpdateClassName=de.kugihan.dictionaryformids.dictgen.DictionaryUpdate
+logLevel=0
+language1FilePostfix={directoryPostfix}
+dictionaryCharEncoding=UTF-8
+numberOfAvailableLanguages=2
+language1IsSearchable=true
+language2GenerateIndex=false
+dictionaryFileMaxSize={dicMaxSize}
+language2FilePostfix={language2FilePostfix}
+searchListFileMaxSize=20000
+language2IsSearchable=false
+fileEncodingFormat=plain_format1
+language1HasSeparateDictionaryFile=true
+searchListCharEncoding=ISO-8859-1
+searchListFileSeparationCharacter='\t'
+indexFileSeparationCharacter='\t'
+language1DisplayText={sourceLang}
+language2HasSeparateDictionaryFile=false
+dictionaryGenerationInputCharEncoding=UTF-8
+language1GenerateIndex=true
+language2DisplayText={targetLang}
+language1NormationClassName=de.kugihan.dictionaryformids.translation.NormationEng
+"""
+
+
+class Writer:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self.linesPerDirectoryFile = 500  # 200
+		self.indexFileMaxSize = 32722  # 30000
+		self.directoryPostfix = ""
+		self.indexPostfix = ""
+		self._dirname = ""
+		# looks like we need to remove tabs, because app gives error
+		# but based on the java code, all punctuations should be removed
+		# as well, including '|'
+		self.re_punc = re.compile(
+			r"""[!"$§%&/()=?´`\\{}\[\]^°+*~#'\-_.:,;<>@|]*""",  # noqa: RUF001
+		)
+		self.re_spaces = re.compile(" +")
+		self.re_tabs = re.compile("\t+")
+
+	def normateWord(self, word: str) -> str:
+		word = word.strip()
+		word = self.re_punc.sub("", word)
+		word = self.re_spaces.sub(" ", word)
+		word = self.re_tabs.sub(" ", word)
+		word = word.lower()
+		return word  # noqa: RET504
+
+	def writeProbs(self) -> None:
+		glos = self._glos
+		probsPath = join(
+			self._dirname,
+			"DictionaryForMIDs.properties",
+		)
+		with open(probsPath, mode="w", newline="\n", encoding="utf-8") as fileObj:
+			fileObj.write(
+				PROP_TEMPLATE.format(
+					name=glos.getInfo("name"),
+					author=glos.author,
+					indexFileMaxSize=self.indexFileMaxSize,
+					wordCount=self.wordCount,
+					directoryPostfix=self.directoryPostfix,
+					dicMaxSize=self.dicMaxSize + 1,
+					language2FilePostfix="fa",  # FIXME
+					sourceLang=glos.sourceLangName,
+					targetLang=glos.targetLangName,
+				),
+			)
+
+	def nextIndex(self) -> None:
+		try:
+			self.indexFp.close()
+		except AttributeError:
+			self.indexIndex = 0
+
+		self.indexIndex += 1
+		fname = f"index{self.indexPostfix}{self.indexIndex}.csv"
+		fpath = join(self._dirname, fname)
+		self.indexFp = open(fpath, mode="w", encoding="utf-8", newline="\n")
+
+	def finish(self) -> None:
+		pass
+
+	def open(self, dirname: str) -> None:
+		self._dirname = dirname
+		if not os.path.isdir(dirname):
+			os.mkdir(dirname)
+
+	def write(self) -> Generator[None, EntryType, None]:
+		self.nextIndex()
+
+		dicMaxSize = 0
+		indexData: list[tuple[str, int, int]] = []
+
+		def writeBucket(dicIndex: int, entryList: list[EntryType]) -> None:
+			nonlocal dicMaxSize
+			log.debug(
+				f"{dicIndex=}, {len(entryList)=}, {dicMaxSize=}",
+			)
+			dicFp = open(
+				join(
+					self._dirname,
+					f"directory{self.directoryPostfix}{dicIndex + 1}.csv",
+				),
+				mode="w",
+				encoding="utf-8",
+				newline="\n",
+			)
+			for entry in entryList:
+				word = entry.s_word
+				n_word = self.normateWord(word)
+				defi = entry.defi
+				dicLine = word + "\t" + defi + "\n"
+				dicPos = dicFp.tell()
+				dicFp.write(dicLine)
+				indexData.append((n_word, dicIndex + 1, dicPos))
+
+			dicMaxSize = max(dicMaxSize, dicFp.tell())
+			dicFp.close()
+
+		bucketSize = self.linesPerDirectoryFile
+		wordCount = 0
+		dicIndex = 0
+		entryList: list[EntryType] = []  # aka bucket
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				# FIXME
+				continue
+			wordCount += 1
+			entryList.append(entry)
+			if len(entryList) >= bucketSize:
+				writeBucket(dicIndex, entryList)
+				dicIndex += 1
+				entryList = []
+
+		if entryList:
+			writeBucket(dicIndex, entryList)
+			entryList = []
+
+		self.dicMaxSize = dicMaxSize
+		self.wordCount = wordCount
+
+		langSearchListFp = open(
+			join(
+				self._dirname,
+				f"searchlist{self.directoryPostfix}.csv",
+			),
+			mode="w",
+			newline="\n",
+			encoding="utf-8",
+		)
+
+		langSearchListFp.write(f"{indexData[0][0]}\t{self.indexIndex}\n")
+
+		for word, dicIndex, dicPos in indexData:
+			indexLine = f"{word}\t{dicIndex}-{dicPos}-B\n"
+			if (self.indexFp.tell() + len(indexLine)) > self.indexFileMaxSize - 10:
+				self.nextIndex()
+				langSearchListFp.write(f"{word}\t{self.indexIndex}\n")
+			self.indexFp.write(indexLine)
+
+		self.indexFp.close()
+		langSearchListFp.close()
+
+		self.writeProbs()
diff --git a/pyglossary/plugins/dict_cc/__init__.py b/pyglossary/plugins/dict_cc/__init__.py
index 9105a963e..c75ec3d64 100644
--- a/pyglossary/plugins/dict_cc/__init__.py
+++ b/pyglossary/plugins/dict_cc/__init__.py
@@ -1,20 +1,13 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import html
-from operator import itemgetter
-from typing import TYPE_CHECKING, cast
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-	import sqlite3
-	from collections.abc import Callable, Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
-	from pyglossary.lxml_types import Element, T_htmlfile
 	from pyglossary.option import Option
 
 
-from pyglossary.core import log
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -45,192 +38,3 @@
 	"dict.cc dictionary - Google Play",
 )
 optionsProp: dict[str, Option] = {}
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._con: sqlite3.Connection | None = None
-		self._cur: sqlite3.Cursor | None = None
-
-	def open(self, filename: str) -> None:
-		from sqlite3 import connect
-
-		self._filename = filename
-		self._con = connect(filename)
-		self._cur = self._con.cursor()
-		self._glos.setDefaultDefiFormat("h")
-
-	def __len__(self) -> int:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute(
-			"select count(distinct term1)+count(distinct term2) from main_ft",
-		)
-		return self._cur.fetchone()[0]
-
-	@staticmethod
-	def makeList(
-		hf: T_htmlfile,
-		input_elements: list[Element],
-		processor: Callable,
-		single_prefix: str = "",
-		skip_single: bool = True,
-	) -> None:
-		"""Wrap elements into <ol> if more than one element."""
-		if not input_elements:
-			return
-
-		if skip_single and len(input_elements) == 1:
-			hf.write(single_prefix)
-			processor(hf, input_elements[0])
-			return
-
-		with hf.element("ol"):
-			for el in input_elements:
-				with hf.element("li"):
-					processor(hf, el)
-
-	@staticmethod
-	def makeGroupsList(
-		hf: T_htmlfile,
-		groups: list[tuple[str, str]],
-		processor: Callable[[T_htmlfile, tuple[str, str]], None],
-		single_prefix: str = "",
-		skip_single: bool = True,
-	) -> None:
-		"""Wrap elements into <ol> if more than one element."""
-		if not groups:
-			return
-
-		if skip_single and len(groups) == 1:
-			hf.write(single_prefix)
-			processor(hf, groups[0])
-			return
-
-		with hf.element("ol"):
-			for el in groups:
-				with hf.element("li"):
-					processor(hf, el)
-
-	def writeSense(  # noqa: PLR6301
-		self,
-		hf: T_htmlfile,
-		row: tuple[str, str],
-	) -> None:
-		from lxml import etree as ET
-
-		trans, entry_type = row
-		if entry_type:
-			with hf.element("i"):
-				hf.write(f"{entry_type}")  # noqa: FURB183
-			hf.write(ET.Element("br"))
-		try:
-			hf.write(trans + " ")
-		except Exception as e:
-			log.error(f"error in writing {trans!r}, {e}")
-			hf.write(repr(trans) + " ")
-		else:
-			with hf.element("big"):
-				with hf.element("a", href=f"bword://{trans}"):
-					hf.write("⏎")
-
-	def iterRows(
-		self,
-		column1: str,
-		column2: str,
-	) -> Iterator[tuple[str, str, str]]:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute(
-			f"select {column1}, {column2}, entry_type from main_ft"
-			f" order by {column1}",
-		)
-		for row in self._cur.fetchall():
-			term1 = row[0]
-			term2 = row[1]
-			try:
-				term1 = html.unescape(term1)
-			except Exception as e:
-				log.error(f"html.unescape({term1!r}) -> {e}")
-			try:
-				term2 = html.unescape(term2)
-			except Exception as e:
-				log.error(f"html.unescape({term2!r}) -> {e}")
-			yield term1, term2, row[2]
-
-	def parseGender(self, headword: str) -> tuple[str | None, str]:  # noqa: PLR6301
-		# {m}	masc	masculine	German: maskulin
-		# {f}	fem 	feminine	German: feminin
-		# {n}	neut	neutral		German: neutral
-		# { }	????
-		i = headword.find(" {")
-		if i <= 0:
-			return None, headword
-		if len(headword) < i + 4:
-			return None, headword
-		if headword[i + 3] != "}":
-			return None, headword
-		g = headword[i + 2]
-		gender = None
-		if g == "m":
-			gender = "masculine"
-		elif g == "f":
-			gender = "feminine"
-		elif g == "n":
-			gender = "neutral"
-		else:
-			log.warning(f"invalid gender {g!r}")
-			return None, headword
-		headword = headword[:i] + headword[i + 4 :]
-		return gender, headword
-
-	def _iterOneDirection(
-		self,
-		column1: str,
-		column2: str,
-	) -> Iterator[EntryType]:
-		from io import BytesIO
-		from itertools import groupby
-
-		from lxml import etree as ET
-
-		glos = self._glos
-		for headwordEscaped, groupsOrig in groupby(
-			self.iterRows(column1, column2),
-			key=itemgetter(0),
-		):
-			headword = html.unescape(headwordEscaped)
-			groups: list[tuple[str, str]] = [
-				(term2, entry_type) for _, term2, entry_type in groupsOrig
-			]
-			f = BytesIO()
-			gender, headword = self.parseGender(headword)
-			with ET.htmlfile(f, encoding="utf-8") as hf:
-				with hf.element("div"):
-					if gender:
-						with hf.element("i"):
-							hf.write(gender)
-						hf.write(ET.Element("br"))
-					self.makeGroupsList(
-						cast("T_htmlfile", hf),
-						groups,
-						self.writeSense,
-					)
-			defi = f.getvalue().decode("utf-8")
-			yield glos.newEntry(headword, defi, defiFormat="h")
-
-	def __iter__(self) -> Iterator[EntryType]:
-		yield from self._iterOneDirection("term1", "term2")
-		yield from self._iterOneDirection("term2", "term1")
-
-	def close(self) -> None:
-		if self._cur:
-			self._cur.close()
-		if self._con:
-			self._con.close()
-		self._clear()
diff --git a/pyglossary/plugins/dict_cc/reader.py b/pyglossary/plugins/dict_cc/reader.py
new file mode 100644
index 000000000..e6615604a
--- /dev/null
+++ b/pyglossary/plugins/dict_cc/reader.py
@@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import html
+from operator import itemgetter
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	import sqlite3
+	from collections.abc import Callable, Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+	from pyglossary.lxml_types import Element, T_htmlfile
+
+
+from pyglossary.core import log
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._con: sqlite3.Connection | None = None
+		self._cur: sqlite3.Cursor | None = None
+
+	def open(self, filename: str) -> None:
+		from sqlite3 import connect
+
+		self._filename = filename
+		self._con = connect(filename)
+		self._cur = self._con.cursor()
+		self._glos.setDefaultDefiFormat("h")
+
+	def __len__(self) -> int:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute(
+			"select count(distinct term1)+count(distinct term2) from main_ft",
+		)
+		return self._cur.fetchone()[0]
+
+	@staticmethod
+	def makeList(
+		hf: T_htmlfile,
+		input_elements: list[Element],
+		processor: Callable,
+		single_prefix: str = "",
+		skip_single: bool = True,
+	) -> None:
+		"""Wrap elements into <ol> if more than one element."""
+		if not input_elements:
+			return
+
+		if skip_single and len(input_elements) == 1:
+			hf.write(single_prefix)
+			processor(hf, input_elements[0])
+			return
+
+		with hf.element("ol"):
+			for el in input_elements:
+				with hf.element("li"):
+					processor(hf, el)
+
+	@staticmethod
+	def makeGroupsList(
+		hf: T_htmlfile,
+		groups: list[tuple[str, str]],
+		processor: Callable[[T_htmlfile, tuple[str, str]], None],
+		single_prefix: str = "",
+		skip_single: bool = True,
+	) -> None:
+		"""Wrap elements into <ol> if more than one element."""
+		if not groups:
+			return
+
+		if skip_single and len(groups) == 1:
+			hf.write(single_prefix)
+			processor(hf, groups[0])
+			return
+
+		with hf.element("ol"):
+			for el in groups:
+				with hf.element("li"):
+					processor(hf, el)
+
+	def writeSense(  # noqa: PLR6301
+		self,
+		hf: T_htmlfile,
+		row: tuple[str, str],
+	) -> None:
+		from lxml import etree as ET
+
+		trans, entry_type = row
+		if entry_type:
+			with hf.element("i"):
+				hf.write(f"{entry_type}")  # noqa: FURB183
+			hf.write(ET.Element("br"))
+		try:
+			hf.write(trans + " ")
+		except Exception as e:
+			log.error(f"error in writing {trans!r}, {e}")
+			hf.write(repr(trans) + " ")
+		else:
+			with hf.element("big"):
+				with hf.element("a", href=f"bword://{trans}"):
+					hf.write("⏎")
+
+	def iterRows(
+		self,
+		column1: str,
+		column2: str,
+	) -> Iterator[tuple[str, str, str]]:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute(
+			f"select {column1}, {column2}, entry_type from main_ft"
+			f" order by {column1}",
+		)
+		for row in self._cur.fetchall():
+			term1 = row[0]
+			term2 = row[1]
+			try:
+				term1 = html.unescape(term1)
+			except Exception as e:
+				log.error(f"html.unescape({term1!r}) -> {e}")
+			try:
+				term2 = html.unescape(term2)
+			except Exception as e:
+				log.error(f"html.unescape({term2!r}) -> {e}")
+			yield term1, term2, row[2]
+
+	def parseGender(self, headword: str) -> tuple[str | None, str]:  # noqa: PLR6301
+		# {m}	masc	masculine	German: maskulin
+		# {f}	fem 	feminine	German: feminin
+		# {n}	neut	neutral		German: neutral
+		# { }	????
+		i = headword.find(" {")
+		if i <= 0:
+			return None, headword
+		if len(headword) < i + 4:
+			return None, headword
+		if headword[i + 3] != "}":
+			return None, headword
+		g = headword[i + 2]
+		gender = None
+		if g == "m":
+			gender = "masculine"
+		elif g == "f":
+			gender = "feminine"
+		elif g == "n":
+			gender = "neutral"
+		else:
+			log.warning(f"invalid gender {g!r}")
+			return None, headword
+		headword = headword[:i] + headword[i + 4 :]
+		return gender, headword
+
+	def _iterOneDirection(
+		self,
+		column1: str,
+		column2: str,
+	) -> Iterator[EntryType]:
+		from io import BytesIO
+		from itertools import groupby
+
+		from lxml import etree as ET
+
+		glos = self._glos
+		for headwordEscaped, groupsOrig in groupby(
+			self.iterRows(column1, column2),
+			key=itemgetter(0),
+		):
+			headword = html.unescape(headwordEscaped)
+			groups: list[tuple[str, str]] = [
+				(term2, entry_type) for _, term2, entry_type in groupsOrig
+			]
+			f = BytesIO()
+			gender, headword = self.parseGender(headword)
+			with ET.htmlfile(f, encoding="utf-8") as hf:
+				with hf.element("div"):
+					if gender:
+						with hf.element("i"):
+							hf.write(gender)
+						hf.write(ET.Element("br"))
+					self.makeGroupsList(
+						cast("T_htmlfile", hf),
+						groups,
+						self.writeSense,
+					)
+			defi = f.getvalue().decode("utf-8")
+			yield glos.newEntry(headword, defi, defiFormat="h")
+
+	def __iter__(self) -> Iterator[EntryType]:
+		yield from self._iterOneDirection("term1", "term2")
+		yield from self._iterOneDirection("term2", "term1")
+
+	def close(self) -> None:
+		if self._cur:
+			self._cur.close()
+		if self._con:
+			self._con.close()
+		self._clear()
diff --git a/pyglossary/plugins/dict_cc_split/__init__.py b/pyglossary/plugins/dict_cc_split/__init__.py
index daa096949..69fbb799c 100644
--- a/pyglossary/plugins/dict_cc_split/__init__.py
+++ b/pyglossary/plugins/dict_cc_split/__init__.py
@@ -1,17 +1,12 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import html
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-	import sqlite3
-	from collections.abc import Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
 	from pyglossary.option import Option
 
-from pyglossary.core import log
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -42,73 +37,3 @@
 	"dict.cc dictionary - Google Play",
 )
 optionsProp: dict[str, Option] = {}
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._con: sqlite3.Connection | None = None
-		self._cur: sqlite3.Cursor | None = None
-
-	def open(self, filename: str) -> None:
-		from sqlite3 import connect
-
-		self._filename = filename
-		self._con = connect(filename)
-		self._cur = self._con.cursor()
-		self._glos.setDefaultDefiFormat("m")
-
-	def __len__(self) -> int:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute("select count(*) * 2 from main_ft")
-		return self._cur.fetchone()[0]
-
-	def iterRows(
-		self,
-		column1: str,
-		column2: str,
-	) -> Iterator[tuple[str, str, str]]:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute(
-			f"select {column1}, {column2}, entry_type from main_ft"
-			f" order by {column1}",
-		)
-		for row in self._cur.fetchall():
-			term1 = row[0]
-			term2 = row[1]
-			try:
-				term1 = html.unescape(term1)
-			except Exception as e:
-				log.error(f"html.unescape({term1!r}) -> {e}")
-			try:
-				term2 = html.unescape(term2)
-			except Exception as e:
-				log.error(f"html.unescape({term2!r}) -> {e}")
-			yield term1, term2, row[2]
-
-	def _iterOneDirection(
-		self,
-		column1: str,
-		column2: str,
-	) -> Iterator[EntryType]:
-		for word, defi, entry_type in self.iterRows(column1, column2):
-			if entry_type:
-				word = f"{word} {{{entry_type}}}"  # noqa: PLW2901
-			yield self._glos.newEntry(word, defi, defiFormat="m")
-
-	def __iter__(self) -> Iterator[EntryType]:
-		yield from self._iterOneDirection("term1", "term2")
-		yield from self._iterOneDirection("term2", "term1")
-
-	def close(self) -> None:
-		if self._cur:
-			self._cur.close()
-		if self._con:
-			self._con.close()
-		self._clear()
diff --git a/pyglossary/plugins/dict_cc_split/reader.py b/pyglossary/plugins/dict_cc_split/reader.py
new file mode 100644
index 000000000..1e5205f28
--- /dev/null
+++ b/pyglossary/plugins/dict_cc_split/reader.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import html
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	import sqlite3
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+from pyglossary.core import log
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._con: sqlite3.Connection | None = None
+		self._cur: sqlite3.Cursor | None = None
+
+	def open(self, filename: str) -> None:
+		from sqlite3 import connect
+
+		self._filename = filename
+		self._con = connect(filename)
+		self._cur = self._con.cursor()
+		self._glos.setDefaultDefiFormat("m")
+
+	def __len__(self) -> int:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute("select count(*) * 2 from main_ft")
+		return self._cur.fetchone()[0]
+
+	def iterRows(
+		self,
+		column1: str,
+		column2: str,
+	) -> Iterator[tuple[str, str, str]]:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute(
+			f"select {column1}, {column2}, entry_type from main_ft"
+			f" order by {column1}",
+		)
+		for row in self._cur.fetchall():
+			term1 = row[0]
+			term2 = row[1]
+			try:
+				term1 = html.unescape(term1)
+			except Exception as e:
+				log.error(f"html.unescape({term1!r}) -> {e}")
+			try:
+				term2 = html.unescape(term2)
+			except Exception as e:
+				log.error(f"html.unescape({term2!r}) -> {e}")
+			yield term1, term2, row[2]
+
+	def _iterOneDirection(
+		self,
+		column1: str,
+		column2: str,
+	) -> Iterator[EntryType]:
+		for word, defi, entry_type in self.iterRows(column1, column2):
+			if entry_type:
+				word = f"{word} {{{entry_type}}}"  # noqa: PLW2901
+			yield self._glos.newEntry(word, defi, defiFormat="m")
+
+	def __iter__(self) -> Iterator[EntryType]:
+		yield from self._iterOneDirection("term1", "term2")
+		yield from self._iterOneDirection("term2", "term1")
+
+	def close(self) -> None:
+		if self._cur:
+			self._cur.close()
+		if self._con:
+			self._con.close()
+		self._clear()
diff --git a/pyglossary/plugins/dict_org/__init__.py b/pyglossary/plugins/dict_org/__init__.py
index 8331d3adb..9af2bf0b3 100644
--- a/pyglossary/plugins/dict_org/__init__.py
+++ b/pyglossary/plugins/dict_org/__init__.py
@@ -2,20 +2,11 @@
 
 from __future__ import annotations
 
-import os
-import re
-from os.path import isdir, splitext
-from typing import TYPE_CHECKING
-
-from pyglossary.core import log
 from pyglossary.flags import DEFAULT_NO
 from pyglossary.option import BoolOption, Option
-from pyglossary.plugin_lib.dictdlib import DictDB
-
-if TYPE_CHECKING:
-	from collections.abc import Generator, Iterator
 
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .reader import Reader
+from .writer import Writer
 
 __all__ = [
 	"Reader",
@@ -51,146 +42,3 @@
 	"http://dict.org/bin/Dict",
 	"The DICT Development Group",
 )
-
-
-def installToDictd(filename: str, dictzip: bool) -> None:
-	"""Filename is without extension (neither .index or .dict or .dict.dz)."""
-	import shutil
-	import subprocess
-
-	targetDir = "/usr/share/dictd/"
-	if filename.startswith(targetDir):
-		return
-
-	if not isdir(targetDir):
-		log.warning(f"Directory {targetDir!r} does not exist, skipping install")
-		return
-
-	log.info(f"Installing {filename!r} to DICTD server directory: {targetDir}")
-
-	if dictzip and os.path.isfile(filename + ".dict.dz"):
-		dictExt = ".dict.dz"
-	elif os.path.isfile(filename + ".dict"):
-		dictExt = ".dict"
-	else:
-		log.error(f"No .dict file, could not install dictd file {filename!r}")
-		return
-
-	if not filename.startswith(targetDir):
-		shutil.copy(filename + ".index", targetDir)
-		shutil.copy(filename + dictExt, targetDir)
-
-	# update /var/lib/dictd/db.list
-	if subprocess.call(["/usr/sbin/dictdconfig", "-w"]) != 0:
-		log.error(
-			"failed to update /var/lib/dictd/db.list file"
-			", try manually running: sudo /usr/sbin/dictdconfig -w",
-		)
-
-	log.info("don't forget to restart dictd server")
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._dictdb: DictDB | None = None
-
-		# regular expression patterns used to prettify definition text
-		self._re_newline_in_braces = re.compile(
-			r"\{(?P<left>.*?)\n(?P<right>.*?)?\}",
-		)
-		self._re_words_in_braces = re.compile(
-			r"\{(?P<word>.+?)\}",
-		)
-
-	def open(self, filename: str) -> None:
-		filename = filename.removesuffix(".index")
-		self._filename = filename
-		self._dictdb = DictDB(filename, "read", 1)
-
-	def close(self) -> None:
-		if self._dictdb is not None:
-			self._dictdb.close()
-			# self._dictdb.finish()
-			self._dictdb = None
-
-	def prettifyDefinitionText(self, defi: str) -> str:
-		# Handle words in {}
-		# First, we remove any \n in {} pairs
-		defi = self._re_newline_in_braces.sub(r"{\g<left>\g<right>}", defi)
-
-		# Then, replace any {words} into <a href="bword://words">words</a>,
-		# so it can be rendered as link correctly
-		defi = self._re_words_in_braces.sub(
-			r'<a href="bword://\g<word>">\g<word></a>',
-			defi,
-		)
-
-		# Use <br /> so it can be rendered as newline correctly
-		return defi.replace("\n", "<br />")
-
-	def __len__(self) -> int:
-		if self._dictdb is None:
-			return 0
-		return len(self._dictdb)
-
-	def __iter__(self) -> Iterator[EntryType]:
-		if self._dictdb is None:
-			raise RuntimeError("iterating over a reader while it's not open")
-		dictdb = self._dictdb
-		for word in dictdb.getDefList():
-			b_defi = b"\n\n<hr>\n\n".join(dictdb.getDef(word))
-			try:
-				defi = b_defi.decode("utf_8", "ignore")
-				defi = self.prettifyDefinitionText(defi)
-			except Exception as e:
-				log.error(f"{b_defi = }")
-				raise e
-			yield self._glos.newEntry(word, defi)
-
-
-class Writer:
-	_dictzip: bool = False
-	_install: bool = True
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._dictdb: DictDB | None = None
-
-	def finish(self) -> None:
-		from pyglossary.os_utils import runDictzip
-
-		if self._dictdb is None:
-			raise RuntimeError("self._dictdb is None")
-
-		self._dictdb.finish(dosort=True)
-		if self._dictzip:
-			runDictzip(f"{self._filename}.dict")
-		if self._install:
-			installToDictd(
-				self._filename,
-				self._dictzip,
-			)
-		self._filename = ""
-
-	def open(self, filename: str) -> None:
-		filename_nox, ext = splitext(filename)
-		if ext.lower() == ".index":
-			filename = filename_nox
-		self._dictdb = DictDB(filename, "write", 1)
-		self._filename = filename
-
-	def write(self) -> Generator[None, EntryType, None]:
-		dictdb = self._dictdb
-		if dictdb is None:
-			raise RuntimeError("self._dictdb is None")
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				# does dictd support resources? and how? FIXME
-				continue
-			dictdb.addEntry(entry.defi, entry.l_word)
diff --git a/pyglossary/plugins/dict_org/reader.py b/pyglossary/plugins/dict_org/reader.py
new file mode 100644
index 000000000..71a47fc13
--- /dev/null
+++ b/pyglossary/plugins/dict_org/reader.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+from pyglossary.core import log
+from pyglossary.plugin_lib.dictdlib import DictDB
+
+if TYPE_CHECKING:
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._dictdb: DictDB | None = None
+
+		# regular expression patterns used to prettify definition text
+		self._re_newline_in_braces = re.compile(
+			r"\{(?P<left>.*?)\n(?P<right>.*?)?\}",
+		)
+		self._re_words_in_braces = re.compile(
+			r"\{(?P<word>.+?)\}",
+		)
+
+	def open(self, filename: str) -> None:
+		filename = filename.removesuffix(".index")
+		self._filename = filename
+		self._dictdb = DictDB(filename, "read", 1)
+
+	def close(self) -> None:
+		if self._dictdb is not None:
+			self._dictdb.close()
+			# self._dictdb.finish()
+			self._dictdb = None
+
+	def prettifyDefinitionText(self, defi: str) -> str:
+		# Handle words in {}
+		# First, we remove any \n in {} pairs
+		defi = self._re_newline_in_braces.sub(r"{\g<left>\g<right>}", defi)
+
+		# Then, replace any {words} into <a href="bword://words">words</a>,
+		# so it can be rendered as link correctly
+		defi = self._re_words_in_braces.sub(
+			r'<a href="bword://\g<word>">\g<word></a>',
+			defi,
+		)
+
+		# Use <br /> so it can be rendered as newline correctly
+		return defi.replace("\n", "<br />")
+
+	def __len__(self) -> int:
+		if self._dictdb is None:
+			return 0
+		return len(self._dictdb)
+
+	def __iter__(self) -> Iterator[EntryType]:
+		if self._dictdb is None:
+			raise RuntimeError("iterating over a reader while it's not open")
+		dictdb = self._dictdb
+		for word in dictdb.getDefList():
+			b_defi = b"\n\n<hr>\n\n".join(dictdb.getDef(word))
+			try:
+				defi = b_defi.decode("utf_8", "ignore")
+				defi = self.prettifyDefinitionText(defi)
+			except Exception as e:
+				log.error(f"{b_defi = }")
+				raise e
+			yield self._glos.newEntry(word, defi)
diff --git a/pyglossary/plugins/dict_org/writer.py b/pyglossary/plugins/dict_org/writer.py
new file mode 100644
index 000000000..5cc2762e7
--- /dev/null
+++ b/pyglossary/plugins/dict_org/writer.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from os.path import splitext
+from typing import TYPE_CHECKING
+
+from pyglossary.core import log
+from pyglossary.plugin_lib.dictdlib import DictDB
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+def installToDictd(filename: str, dictzip: bool) -> None:
+	"""Filename is without extension (neither .index or .dict or .dict.dz)."""
+	import shutil
+	import subprocess
+	from os.path import isdir, isfile
+
+	targetDir = "/usr/share/dictd/"
+	if filename.startswith(targetDir):
+		return
+
+	if not isdir(targetDir):
+		log.warning(f"Directory {targetDir!r} does not exist, skipping install")
+		return
+
+	log.info(f"Installing {filename!r} to DICTD server directory: {targetDir}")
+
+	if dictzip and isfile(filename + ".dict.dz"):
+		dictExt = ".dict.dz"
+	elif isfile(filename + ".dict"):
+		dictExt = ".dict"
+	else:
+		log.error(f"No .dict file, could not install dictd file {filename!r}")
+		return
+
+	if not filename.startswith(targetDir):
+		shutil.copy(filename + ".index", targetDir)
+		shutil.copy(filename + dictExt, targetDir)
+
+	# update /var/lib/dictd/db.list
+	if subprocess.call(["/usr/sbin/dictdconfig", "-w"]) != 0:
+		log.error(
+			"failed to update /var/lib/dictd/db.list file"
+			", try manually running: sudo /usr/sbin/dictdconfig -w",
+		)
+
+	log.info("don't forget to restart dictd server")
+
+
+class Writer:
+	_dictzip: bool = False
+	_install: bool = True
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._dictdb: DictDB | None = None
+
+	def finish(self) -> None:
+		from pyglossary.os_utils import runDictzip
+
+		if self._dictdb is None:
+			raise RuntimeError("self._dictdb is None")
+
+		self._dictdb.finish(dosort=True)
+		if self._dictzip:
+			runDictzip(f"{self._filename}.dict")
+		if self._install:
+			installToDictd(
+				self._filename,
+				self._dictzip,
+			)
+		self._filename = ""
+
+	def open(self, filename: str) -> None:
+		filename_nox, ext = splitext(filename)
+		if ext.lower() == ".index":
+			filename = filename_nox
+		self._dictdb = DictDB(filename, "write", 1)
+		self._filename = filename
+
+	def write(self) -> Generator[None, EntryType, None]:
+		dictdb = self._dictdb
+		if dictdb is None:
+			raise RuntimeError("self._dictdb is None")
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				# does dictd support resources? and how? FIXME
+				continue
+			dictdb.addEntry(entry.defi, entry.l_word)
diff --git a/pyglossary/plugins/dict_org_source/__init__.py b/pyglossary/plugins/dict_org_source/__init__.py
index 5c899f1fe..9a9d63233 100644
--- a/pyglossary/plugins/dict_org_source/__init__.py
+++ b/pyglossary/plugins/dict_org_source/__init__.py
@@ -1,14 +1,9 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
 from pyglossary.option import BoolOption, Option
 
-if TYPE_CHECKING:
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .writer import Writer
 
 __all__ = [
 	"Writer",
@@ -41,36 +36,3 @@
 optionsProp: dict[str, Option] = {
 	"remove_html_all": BoolOption(comment="Remove all HTML tags"),
 }
-
-
-class Writer:
-	_remove_html_all: bool = True
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-
-	def finish(self) -> None:
-		self._filename = ""
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-		if self._remove_html_all:
-			self._glos.removeHtmlTagsAll()
-		# TODO: add another bool flag to only remove html tags that are not
-		# supported by GtkTextView
-
-	@staticmethod
-	def _defiEscapeFunc(defi: str) -> str:
-		return defi.replace("\r", "")
-
-	def write(self) -> Generator[None, EntryType, None]:
-		from pyglossary.text_writer import writeTxt
-
-		yield from writeTxt(
-			self._glos,
-			entryFmt=":{word}:{defi}\n",
-			filename=self._filename,
-			defiEscapeFunc=self._defiEscapeFunc,
-			ext=".dtxt",
-		)
diff --git a/pyglossary/plugins/dict_org_source/writer.py b/pyglossary/plugins/dict_org_source/writer.py
new file mode 100644
index 000000000..1548f5975
--- /dev/null
+++ b/pyglossary/plugins/dict_org_source/writer.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Writer:
+	_remove_html_all: bool = True
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+
+	def finish(self) -> None:
+		self._filename = ""
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+		if self._remove_html_all:
+			self._glos.removeHtmlTagsAll()
+		# TODO: add another bool flag to only remove html tags that are not
+		# supported by GtkTextView
+
+	@staticmethod
+	def _defiEscapeFunc(defi: str) -> str:
+		return defi.replace("\r", "")
+
+	def write(self) -> Generator[None, EntryType, None]:
+		from pyglossary.text_writer import writeTxt
+
+		yield from writeTxt(
+			self._glos,
+			entryFmt=":{word}:{defi}\n",
+			filename=self._filename,
+			defiEscapeFunc=self._defiEscapeFunc,
+			ext=".dtxt",
+		)
diff --git a/pyglossary/plugins/dictunformat/__init__.py b/pyglossary/plugins/dictunformat/__init__.py
index a05c55459..77e5f8233 100644
--- a/pyglossary/plugins/dictunformat/__init__.py
+++ b/pyglossary/plugins/dictunformat/__init__.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
-from pyglossary.core import log
 from pyglossary.option import EncodingOption, Option, StrOption
-from pyglossary.text_reader import TextGlossaryReader
+
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -38,89 +38,3 @@
 		comment="separator for headword and alternates",
 	),
 }
-
-
-def unescapeDefi(defi: str) -> str:
-	return defi
-
-
-class Reader(TextGlossaryReader):
-	_headword_separator = ";   "
-	# https://github.com/cheusov/dictd/blob/master/dictfmt/dictunformat.in#L14
-
-	@classmethod
-	def isInfoWord(cls, word: str) -> bool:
-		return word.startswith("00-database-")
-
-	@classmethod
-	def fixInfoWord(cls, word: str) -> str:
-		return word
-
-	def setInfo(self, word: str, defi: str) -> None:
-		if word == "00-database-short":
-			self._glos.setInfo("name", defi)
-			return
-
-		if word != "00-database-info":
-			return
-
-		glos = self._glos
-
-		lastKey = ""
-		for line in defi.split("\n"):
-			if not line.startswith("##:"):
-				if lastKey:
-					glos.setInfo(word, f"{glos.getInfo(lastKey)}\n{line}")
-				continue
-
-			parts = line[3:].split(":")
-			if len(parts) < 2:
-				log.error(f"unexpected line: {line}")
-			key = lastKey = parts[0]
-			value = ":".join(parts[1:])
-			glos.setInfo(key, value)
-
-	def nextBlock(self) -> tuple[str | list[str], str, None] | None:
-		if not self._file:
-			raise StopIteration
-		word = ""
-		defiLines: list[str] = []
-
-		while True:
-			line = self.readline()
-			if not line:
-				break
-			line = line.rstrip("\n\r")
-			if not line:
-				continue
-
-			if not line.strip("_"):
-				if not word:
-					continue
-				if not defiLines:
-					log.warning(f"no definition/value for {word!r}")
-				defi = unescapeDefi("\n".join(defiLines))
-				words = word.split(self._headword_separator)
-				return words, defi, None
-
-			if not word:
-				word = line
-				continue
-
-			if line == word:
-				continue
-			if line.lower() == word:
-				word = line
-				continue
-
-			defiLines.append(line)
-
-		if word:
-			defi = unescapeDefi("\n".join(defiLines))
-			if word.startswith("00-database-") and defi == "unknown":
-				log.info(f"ignoring {word} -> {defi}")
-				return None
-			words = word.split(self._headword_separator)
-			return words, defi, None
-
-		raise StopIteration
diff --git a/pyglossary/plugins/dictunformat/reader.py b/pyglossary/plugins/dictunformat/reader.py
new file mode 100644
index 000000000..c66a0f937
--- /dev/null
+++ b/pyglossary/plugins/dictunformat/reader.py
@@ -0,0 +1,90 @@
+from __future__ import annotations
+
+from pyglossary.core import log
+from pyglossary.text_reader import TextGlossaryReader
+
+
+def unescapeDefi(defi: str) -> str:
+	return defi
+
+
+class Reader(TextGlossaryReader):
+	_headword_separator = ";   "
+	# https://github.com/cheusov/dictd/blob/master/dictfmt/dictunformat.in#L14
+
+	@classmethod
+	def isInfoWord(cls, word: str) -> bool:
+		return word.startswith("00-database-")
+
+	@classmethod
+	def fixInfoWord(cls, word: str) -> str:
+		return word
+
+	def setInfo(self, word: str, defi: str) -> None:
+		if word == "00-database-short":
+			self._glos.setInfo("name", defi)
+			return
+
+		if word != "00-database-info":
+			return
+
+		glos = self._glos
+
+		lastKey = ""
+		for line in defi.split("\n"):
+			if not line.startswith("##:"):
+				if lastKey:
+					glos.setInfo(word, f"{glos.getInfo(lastKey)}\n{line}")
+				continue
+
+			parts = line[3:].split(":")
+			if len(parts) < 2:
+				log.error(f"unexpected line: {line}")
+			key = lastKey = parts[0]
+			value = ":".join(parts[1:])
+			glos.setInfo(key, value)
+
+	def nextBlock(self) -> tuple[str | list[str], str, None] | None:
+		if not self._file:
+			raise StopIteration
+		word = ""
+		defiLines: list[str] = []
+
+		while True:
+			line = self.readline()
+			if not line:
+				break
+			line = line.rstrip("\n\r")
+			if not line:
+				continue
+
+			if not line.strip("_"):
+				if not word:
+					continue
+				if not defiLines:
+					log.warning(f"no definition/value for {word!r}")
+				defi = unescapeDefi("\n".join(defiLines))
+				words = word.split(self._headword_separator)
+				return words, defi, None
+
+			if not word:
+				word = line
+				continue
+
+			if line == word:
+				continue
+			if line.lower() == word:
+				word = line
+				continue
+
+			defiLines.append(line)
+
+		if word:
+			defi = unescapeDefi("\n".join(defiLines))
+			if word.startswith("00-database-") and defi == "unknown":
+				log.info(f"ignoring {word} -> {defi}")
+				return None
+			words = word.split(self._headword_separator)
+			return words, defi, None
+
+		raise StopIteration
diff --git a/pyglossary/plugins/digitalnk/__init__.py b/pyglossary/plugins/digitalnk/__init__.py
index cf35cef73..08c23d4eb 100644
--- a/pyglossary/plugins/digitalnk/__init__.py
+++ b/pyglossary/plugins/digitalnk/__init__.py
@@ -1,16 +1,13 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import html
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-	import sqlite3
-	from collections.abc import Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
 	from pyglossary.option import Option
 
+from .reader import Reader
+
 __all__ = [
 	"Reader",
 	"description",
@@ -40,51 +37,3 @@
 	"@digitalprk/dicrs",
 )
 optionsProp: dict[str, Option] = {}
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._con: sqlite3.Connection | None = None
-		self._cur: sqlite3.Cursor | None = None
-
-	def open(self, filename: str) -> None:
-		from sqlite3 import connect
-
-		self._filename = filename
-		self._con = connect(filename)
-		self._cur = self._con.cursor()
-		self._glos.setDefaultDefiFormat("m")
-
-	def __len__(self) -> int:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute("select count(*) from dictionary")
-		return self._cur.fetchone()[0]
-
-	def __iter__(self) -> Iterator[EntryType]:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute(
-			"select word, definition from dictionary order by word",
-		)
-		# iteration over self._cur stops after one entry
-		# and self._cur.fetchone() returns None
-		# no idea why!
-		# https://github.com/ilius/pyglossary/issues/282
-		# for row in self._cur:
-		for row in self._cur.fetchall():
-			word = html.unescape(row[0])
-			definition = row[1]
-			yield self._glos.newEntry(word, definition, defiFormat="m")
-
-	def close(self) -> None:
-		if self._cur:
-			self._cur.close()
-		if self._con:
-			self._con.close()
-		self._clear()
diff --git a/pyglossary/plugins/digitalnk/reader.py b/pyglossary/plugins/digitalnk/reader.py
new file mode 100644
index 000000000..5eb2ba373
--- /dev/null
+++ b/pyglossary/plugins/digitalnk/reader.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import html
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	import sqlite3
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._con: sqlite3.Connection | None = None
+		self._cur: sqlite3.Cursor | None = None
+
+	def open(self, filename: str) -> None:
+		from sqlite3 import connect
+
+		self._filename = filename
+		self._con = connect(filename)
+		self._cur = self._con.cursor()
+		self._glos.setDefaultDefiFormat("m")
+
+	def __len__(self) -> int:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute("select count(*) from dictionary")
+		return self._cur.fetchone()[0]
+
+	def __iter__(self) -> Iterator[EntryType]:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute(
+			"select word, definition from dictionary order by word",
+		)
+		# iteration over self._cur stops after one entry
+		# and self._cur.fetchone() returns None
+		# no idea why!
+		# https://github.com/ilius/pyglossary/issues/282
+		# for row in self._cur:
+		for row in self._cur.fetchall():
+			word = html.unescape(row[0])
+			definition = row[1]
+			yield self._glos.newEntry(word, definition, defiFormat="m")
+
+	def close(self) -> None:
+		if self._cur:
+			self._cur.close()
+		if self._con:
+			self._con.close()
+		self._clear()
diff --git a/pyglossary/plugins/dikt_json/__init__.py b/pyglossary/plugins/dikt_json/__init__.py
index e47315cd5..39eeecf74 100644
--- a/pyglossary/plugins/dikt_json/__init__.py
+++ b/pyglossary/plugins/dikt_json/__init__.py
@@ -4,23 +4,13 @@
 
 from __future__ import annotations
 
-import re
-from typing import TYPE_CHECKING
-
-from pyglossary.compression import (
-	# compressionOpen,
-	stdCompressions,
-)
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
 	Option,
 )
 
-if TYPE_CHECKING:
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .writer import Writer
 
 __all__ = [
 	"Writer",
@@ -55,65 +45,3 @@
 		comment="add headwords title to beginning of definition",
 	),
 }
-
-
-class Writer:
-	_encoding: str = "utf-8"
-	_enable_info: bool = True
-	_resources: bool = True
-	_word_title: bool = False
-
-	compressions = stdCompressions
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = None
-		glos.preventDuplicateWords()
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-
-	def finish(self) -> None:
-		self._filename = None
-
-	def write(self) -> Generator[None, EntryType, None]:
-		from json import dumps
-
-		from pyglossary.text_writer import writeTxt
-
-		glos = self._glos
-		encoding = self._encoding
-		enable_info = self._enable_info
-		resources = self._resources
-
-		ensure_ascii = encoding == "ascii"
-
-		def escape(st: str) -> str:
-			# remove styling from HTML tags
-			st2 = re.sub(r' style="[^"]*"', "", st)
-			st2 = re.sub(r' class="[^"]*"', "", st2)
-			st2 = re.sub(r"<font [^>]*>", "", st2)
-			st2 = st2.replace("</font>", "")
-			st2 = re.sub(r"\n", "", st2)
-			st2 = st2.replace("<div></div>", "")
-			st2 = st2.replace("<span></span>", "")
-			# fix russian dictionary issues,
-			# such as hyphenation in word (e.g. абб{[']}а{[/']}т)
-			st2 = re.sub(r"\{\['\]\}", "", st2)
-			st2 = re.sub(r"\{\[/'\]\}", "", st2)
-			return dumps(st2, ensure_ascii=ensure_ascii)
-
-		yield from writeTxt(
-			glos,
-			entryFmt="\t{word}: {defi},\n",
-			filename=self._filename,
-			encoding=encoding,
-			writeInfo=enable_info,
-			wordEscapeFunc=escape,
-			defiEscapeFunc=escape,
-			ext=".json",
-			head="{\n",
-			tail='\t"": ""\n}',
-			resources=resources,
-			word_title=self._word_title,
-		)
diff --git a/pyglossary/plugins/dikt_json/writer.py b/pyglossary/plugins/dikt_json/writer.py
new file mode 100644
index 000000000..e7827ae4b
--- /dev/null
+++ b/pyglossary/plugins/dikt_json/writer.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+# mypy: ignore-errors
+# from https://github.com/maxim-saplin/pyglossary
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+from pyglossary.compression import (
+	# compressionOpen,
+	stdCompressions,
+)
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Writer:
+	_encoding: str = "utf-8"
+	_enable_info: bool = True
+	_resources: bool = True
+	_word_title: bool = False
+
+	compressions = stdCompressions
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = None
+		glos.preventDuplicateWords()
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+
+	def finish(self) -> None:
+		self._filename = None
+
+	def write(self) -> Generator[None, EntryType, None]:
+		from json import dumps
+
+		from pyglossary.text_writer import writeTxt
+
+		glos = self._glos
+		encoding = self._encoding
+		enable_info = self._enable_info
+		resources = self._resources
+
+		ensure_ascii = encoding == "ascii"
+
+		def escape(st: str) -> str:
+			# remove styling from HTML tags
+			st2 = re.sub(r' style="[^"]*"', "", st)
+			st2 = re.sub(r' class="[^"]*"', "", st2)
+			st2 = re.sub(r"<font [^>]*>", "", st2)
+			st2 = st2.replace("</font>", "")
+			st2 = re.sub(r"\n", "", st2)
+			st2 = st2.replace("<div></div>", "")
+			st2 = st2.replace("<span></span>", "")
+			# fix russian dictionary issues,
+			# such as hyphenation in word (e.g. абб{[']}а{[/']}т)
+			st2 = re.sub(r"\{\['\]\}", "", st2)
+			st2 = re.sub(r"\{\[/'\]\}", "", st2)
+			return dumps(st2, ensure_ascii=ensure_ascii)
+
+		yield from writeTxt(
+			glos,
+			entryFmt="\t{word}: {defi},\n",
+			filename=self._filename,
+			encoding=encoding,
+			writeInfo=enable_info,
+			wordEscapeFunc=escape,
+			defiEscapeFunc=escape,
+			ext=".json",
+			head="{\n",
+			tail='\t"": ""\n}',
+			resources=resources,
+			word_title=self._word_title,
+		)
diff --git a/pyglossary/plugins/ebook_epub2/__init__.py b/pyglossary/plugins/ebook_epub2/__init__.py
index 8bf34801b..baabf0036 100644
--- a/pyglossary/plugins/ebook_epub2/__init__.py
+++ b/pyglossary/plugins/ebook_epub2/__init__.py
@@ -1,27 +1,7 @@
 # -*- coding: utf-8 -*-
-# The MIT License (MIT)
-# Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it)
-# Copyright © 2016-2019 Saeed Rasooli <saeed.gnu@gmail.com>
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from __future__ import annotations
 
-from pyglossary.ebook_base import EbookWriter
 from pyglossary.flags import ALWAYS
 from pyglossary.option import (
 	BoolOption,
@@ -30,8 +10,7 @@
 	StrOption,
 )
 
-if TYPE_CHECKING:
-	from pyglossary.glossary_types import GlossaryType
+from .writer import Writer
 
 __all__ = [
 	"Writer",
@@ -85,209 +64,3 @@
 		comment="Path to cover file",
 	),
 }
-
-
-class Writer(EbookWriter):
-	# these class attrs are only in Epub
-	# MIMETYPE_CONTENTS, CONTAINER_XML_CONTENTS
-	# NCX_TEMPLATE, NCX_NAVPOINT_TEMPLATE
-
-	MIMETYPE_CONTENTS = "application/epub+zip"
-	CONTAINER_XML_CONTENTS = """<?xml version="1.0" encoding="UTF-8" ?>
-<container version="1.0"
-	xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
-	<rootfiles>
-		<rootfile full-path="OEBPS/content.opf"
-			media-type="application/oebps-package+xml"/>
-	</rootfiles>
-</container>"""
-
-	NCX_TEMPLATE = """<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
-	"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
-<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
-	<head>
-		<meta name="dtb:uid" content="{identifier}" />
-		<meta name="dtb:depth" content="1" />
-		<meta name="dtb:totalPageCount" content="0" />
-		<meta name="dtb:maxPageNumber" content="0" />
-	</head>
-	<docTitle>
-		<text>{title}</text>
-	</docTitle>
-	<navMap>
-{ncx_items}
-	</navMap>
-</ncx>"""
-
-	NCX_NAVPOINT_TEMPLATE = """\t<navPoint id="n{index:06d}" playOrder="{index:d}">
-		<navLabel>
-			<text>{text}</text>
-		</navLabel>
-		<content src="{src}" />
-	</navPoint>"""
-
-	CSS_CONTENTS = b"""@charset "UTF-8";
-body {
-	margin: 10px 25px 10px 25px;
-}
-h1 {
-	font-size: 200%;
-}
-h2 {
-	font-size: 150%;
-}
-p {
-	margin-left: 0em;
-	margin-right: 0em;
-	margin-top: 0em;
-	margin-bottom: 0em;
-	line-height: 2em;
-	text-align: justify;
-}
-a, a:focus, a:active, a:visited {
-	color: black;
-	text-decoration: none;
-}
-body.indexPage {}
-h1.indexTitle {}
-p.indexGroups {
-	font-size: 150%;
-}
-span.indexGroup {}
-body.groupPage {}
-h1.groupTitle {}
-div.groupNavigation {}
-span.groupHeadword {}
-div.groupEntry {
-	margin-top: 0;
-	margin-bottom: 1em;
-}
-h2.groupHeadword {
-	margin-left: 5%;
-}
-p.groupDefinition {
-	margin-left: 10%;
-	margin-right: 10%;
-}
-"""
-
-	GROUP_XHTML_TEMPLATE = """<?xml version="1.0" encoding="utf-8" standalone="no"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-	"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-	<head>
-		<title>{title}</title>
-		<link rel="stylesheet" type="text/css" href="style.css" />
-	</head>
-	<body id="groupPage" class="groupPage">
-		<h1 class="groupTitle">{group_title}</h1>
-		<div class="groupNavigation">
-			<a href="{previous_link}">[ Previous ]</a>
-{index_link}
-			<a href="{next_link}">[ Next ]</a>
-		</div>
-{group_contents}
-	</body>
-</html>"""
-	GROUP_XHTML_INDEX_LINK = '\t\t<a href="index.xhtml">[ Index ]</a>'
-
-	GROUP_XHTML_WORD_DEFINITION_TEMPLATE = """\t<div class="groupEntry">
-		<h2 class="groupHeadword">{headword}</h2>
-		<p class="groupDefinition">{definition}</p>
-	</div>"""
-
-	OPF_TEMPLATE = """<?xml version="1.0" encoding="utf-8" ?>
-<package xmlns="http://www.idpf.org/2007/opf" version="2.0"
-	unique-identifier="uid">
-	<metadata xmlns:opf="http://www.idpf.org/2007/opf"
-		xmlns:dc="http://purl.org/dc/elements/1.1/">
-		<dc:identifier id="uid" opf:scheme="uuid">{identifier}</dc:identifier>
-		<dc:language>{sourceLang}</dc:language>
-		<dc:title>{title}</dc:title>
-		<dc:creator opf:role="aut">{creator}</dc:creator>
-		<dc:rights>{copyright}</dc:rights>
-		<dc:date opf:event="creation">{creationDate}</dc:date>
-		{cover}
-	</metadata>
-	<manifest>
-{manifest}
-	</manifest>
-	<spine toc="toc.ncx">
-{spine}
-	</spine>
-</package>"""
-
-	COVER_TEMPLATE = '<meta name="cover" content="{cover}" />'
-
-	def __init__(self, glos: GlossaryType) -> None:
-		import uuid
-
-		EbookWriter.__init__(
-			self,
-			glos,
-		)
-		glos.setInfo("uuid", str(uuid.uuid4()).replace("-", ""))
-
-	@classmethod
-	def cls_get_prefix(
-		cls: type[EbookWriter],
-		options: dict[str, Any],
-		word: str,
-	) -> str:
-		if not word:
-			return ""
-		length = options.get("group_by_prefix_length", cls._group_by_prefix_length)
-		prefix = word[:length].lower()
-		if prefix[0] < "a":
-			return "SPECIAL"
-		return prefix
-
-	def get_prefix(self, word: str) -> str:
-		if not word:
-			return ""
-		length = self._group_by_prefix_length
-		prefix = word[:length].lower()
-		if prefix[0] < "a":
-			return "SPECIAL"
-		return prefix
-
-	def write_ncx(self, group_labels: list[str]) -> None:
-		"""
-		write_ncx
-		only for epub.
-		"""
-		ncx_items: list[str] = []
-		index = 1
-		if self._include_index_page:
-			ncx_items.append(
-				self.NCX_NAVPOINT_TEMPLATE.format(
-					index=index,
-					text="Index",
-					src="index.xhtml",
-				),
-			)
-			index += 1
-		for group_label in group_labels:
-			ncx_items.append(
-				self.NCX_NAVPOINT_TEMPLATE.format(
-					index=index,
-					text=group_label,
-					src=self.get_group_xhtml_file_name_from_index(index),
-				),
-			)
-			index += 1
-		ncx_items_unicode = "\n".join(ncx_items)
-		ncx_contents = self.NCX_TEMPLATE.format(
-			identifier=self._glos.getInfo("uuid"),
-			title=self._glos.getInfo("name"),
-			ncx_items=ncx_items_unicode,
-		).encode("utf-8")
-		self.add_file_manifest(
-			"OEBPS/toc.ncx",
-			"toc.ncx",
-			ncx_contents,
-			"application/x-dtbncx+xml",
-		)
-
-	# inherits write from EbookWriter
diff --git a/pyglossary/plugins/ebook_epub2/writer.py b/pyglossary/plugins/ebook_epub2/writer.py
new file mode 100644
index 000000000..eba888c33
--- /dev/null
+++ b/pyglossary/plugins/ebook_epub2/writer.py
@@ -0,0 +1,233 @@
+# -*- coding: utf-8 -*-
+# The MIT License (MIT)
+# Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it)
+# Copyright © 2016-2019 Saeed Rasooli <saeed.gnu@gmail.com>
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from pyglossary.ebook_base import EbookWriter
+
+if TYPE_CHECKING:
+	from pyglossary.glossary_types import GlossaryType
+
+
+class Writer(EbookWriter):
+	# these class attrs are only in Epub
+	# MIMETYPE_CONTENTS, CONTAINER_XML_CONTENTS
+	# NCX_TEMPLATE, NCX_NAVPOINT_TEMPLATE
+
+	MIMETYPE_CONTENTS = "application/epub+zip"
+	CONTAINER_XML_CONTENTS = """<?xml version="1.0" encoding="UTF-8" ?>
+<container version="1.0"
+	xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
+	<rootfiles>
+		<rootfile full-path="OEBPS/content.opf"
+			media-type="application/oebps-package+xml"/>
+	</rootfiles>
+</container>"""
+
+	NCX_TEMPLATE = """<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
+	"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
+	<head>
+		<meta name="dtb:uid" content="{identifier}" />
+		<meta name="dtb:depth" content="1" />
+		<meta name="dtb:totalPageCount" content="0" />
+		<meta name="dtb:maxPageNumber" content="0" />
+	</head>
+	<docTitle>
+		<text>{title}</text>
+	</docTitle>
+	<navMap>
+{ncx_items}
+	</navMap>
+</ncx>"""
+
+	NCX_NAVPOINT_TEMPLATE = """\t<navPoint id="n{index:06d}" playOrder="{index:d}">
+		<navLabel>
+			<text>{text}</text>
+		</navLabel>
+		<content src="{src}" />
+	</navPoint>"""
+
+	CSS_CONTENTS = b"""@charset "UTF-8";
+body {
+	margin: 10px 25px 10px 25px;
+}
+h1 {
+	font-size: 200%;
+}
+h2 {
+	font-size: 150%;
+}
+p {
+	margin-left: 0em;
+	margin-right: 0em;
+	margin-top: 0em;
+	margin-bottom: 0em;
+	line-height: 2em;
+	text-align: justify;
+}
+a, a:focus, a:active, a:visited {
+	color: black;
+	text-decoration: none;
+}
+body.indexPage {}
+h1.indexTitle {}
+p.indexGroups {
+	font-size: 150%;
+}
+span.indexGroup {}
+body.groupPage {}
+h1.groupTitle {}
+div.groupNavigation {}
+span.groupHeadword {}
+div.groupEntry {
+	margin-top: 0;
+	margin-bottom: 1em;
+}
+h2.groupHeadword {
+	margin-left: 5%;
+}
+p.groupDefinition {
+	margin-left: 10%;
+	margin-right: 10%;
+}
+"""
+
+	GROUP_XHTML_TEMPLATE = """<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+	"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+	<head>
+		<title>{title}</title>
+		<link rel="stylesheet" type="text/css" href="style.css" />
+	</head>
+	<body id="groupPage" class="groupPage">
+		<h1 class="groupTitle">{group_title}</h1>
+		<div class="groupNavigation">
+			<a href="{previous_link}">[ Previous ]</a>
+{index_link}
+			<a href="{next_link}">[ Next ]</a>
+		</div>
+{group_contents}
+	</body>
+</html>"""
+	GROUP_XHTML_INDEX_LINK = '\t\t<a href="index.xhtml">[ Index ]</a>'
+
+	GROUP_XHTML_WORD_DEFINITION_TEMPLATE = """\t<div class="groupEntry">
+		<h2 class="groupHeadword">{headword}</h2>
+		<p class="groupDefinition">{definition}</p>
+	</div>"""
+
+	OPF_TEMPLATE = """<?xml version="1.0" encoding="utf-8" ?>
+<package xmlns="http://www.idpf.org/2007/opf" version="2.0"
+	unique-identifier="uid">
+	<metadata xmlns:opf="http://www.idpf.org/2007/opf"
+		xmlns:dc="http://purl.org/dc/elements/1.1/">
+		<dc:identifier id="uid" opf:scheme="uuid">{identifier}</dc:identifier>
+		<dc:language>{sourceLang}</dc:language>
+		<dc:title>{title}</dc:title>
+		<dc:creator opf:role="aut">{creator}</dc:creator>
+		<dc:rights>{copyright}</dc:rights>
+		<dc:date opf:event="creation">{creationDate}</dc:date>
+		{cover}
+	</metadata>
+	<manifest>
+{manifest}
+	</manifest>
+	<spine toc="toc.ncx">
+{spine}
+	</spine>
+</package>"""
+
+	COVER_TEMPLATE = '<meta name="cover" content="{cover}" />'
+
+	def __init__(self, glos: GlossaryType) -> None:
+		import uuid
+
+		EbookWriter.__init__(
+			self,
+			glos,
+		)
+		glos.setInfo("uuid", str(uuid.uuid4()).replace("-", ""))
+
+	@classmethod
+	def cls_get_prefix(
+		cls: type[EbookWriter],
+		options: dict[str, Any],
+		word: str,
+	) -> str:
+		if not word:
+			return ""
+		length = options.get("group_by_prefix_length", cls._group_by_prefix_length)
+		prefix = word[:length].lower()
+		if prefix[0] < "a":
+			return "SPECIAL"
+		return prefix
+
+	def get_prefix(self, word: str) -> str:
+		if not word:
+			return ""
+		length = self._group_by_prefix_length
+		prefix = word[:length].lower()
+		if prefix[0] < "a":
+			return "SPECIAL"
+		return prefix
+
+	def write_ncx(self, group_labels: list[str]) -> None:
+		"""
+		write_ncx
+		only for epub.
+		"""
+		ncx_items: list[str] = []
+		index = 1
+		if self._include_index_page:
+			ncx_items.append(
+				self.NCX_NAVPOINT_TEMPLATE.format(
+					index=index,
+					text="Index",
+					src="index.xhtml",
+				),
+			)
+			index += 1
+		for group_label in group_labels:
+			ncx_items.append(
+				self.NCX_NAVPOINT_TEMPLATE.format(
+					index=index,
+					text=group_label,
+					src=self.get_group_xhtml_file_name_from_index(index),
+				),
+			)
+			index += 1
+		ncx_items_unicode = "\n".join(ncx_items)
+		ncx_contents = self.NCX_TEMPLATE.format(
+			identifier=self._glos.getInfo("uuid"),
+			title=self._glos.getInfo("name"),
+			ncx_items=ncx_items_unicode,
+		).encode("utf-8")
+		self.add_file_manifest(
+			"OEBPS/toc.ncx",
+			"toc.ncx",
+			ncx_contents,
+			"application/x-dtbncx+xml",
+		)
+
+	# inherits write from EbookWriter
diff --git a/pyglossary/plugins/ebook_kobo/__init__.py b/pyglossary/plugins/ebook_kobo/__init__.py
index 02a108f88..cbd9b6f90 100644
--- a/pyglossary/plugins/ebook_kobo/__init__.py
+++ b/pyglossary/plugins/ebook_kobo/__init__.py
@@ -1,41 +1,14 @@
 # -*- coding: utf-8 -*-
-# The MIT License (MIT)
-# Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it)
-# Copyright © 2022 Saeed Rasooli <saeed.gnu@gmail.com>
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
+
 from __future__ import annotations
 
-import re
-import unicodedata
-from gzip import compress, decompress
-from operator import itemgetter
-from pathlib import Path
-from pickle import dumps, loads
 from typing import TYPE_CHECKING
 
-from pyglossary import core
-from pyglossary.core import exc_note, log, pip
 from pyglossary.flags import NEVER
-from pyglossary.os_utils import indir
 
-if TYPE_CHECKING:
-	from collections.abc import Generator
+from .writer import Writer
 
-	from pyglossary.glossary_types import EntryType, GlossaryType
+if TYPE_CHECKING:
 	from pyglossary.option import Option
 
 __all__ = [
@@ -75,199 +48,3 @@
 
 
 # Penelope option: marisa_index_size=1000000
-
-
-def is_cyrillic_char(c: str) -> bool:
-	# U+0400 - U+04FF: Cyrillic
-	# U+0500 - U+052F: Cyrillic Supplement
-	if "\u0400" <= c <= "\u052f":
-		return True
-
-	# U+2DE0 - U+2DFF: Cyrillic Extended-A
-	if "\u2de0" <= c <= "\u2dff":
-		return True
-
-	# U+A640 - U+A69F: Cyrillic Extended-B
-	if "\ua640" <= c <= "\ua69f":
-		return True
-
-	# U+1C80 - U+1C8F: Cyrillic Extended-C
-	if "\u1c80" <= c <= "\u1c8f":
-		return True
-
-	# U+FE2E, U+FE2F: Combining Half Marks
-	# U+1D2B, U+1D78: Phonetic Extensions
-	return c in {"\ufe2e", "\ufe2f", "\u1d2b", "\u1d78"}
-
-
-def fixFilename(fname: str) -> str:
-	return Path(fname.replace("/", "2F").replace("\\", "5C")).name
-
-
-class Writer:
-	WORDS_FILE_NAME = "words"
-
-	depends = {
-		"marisa_trie": "marisa-trie",
-	}
-
-	@staticmethod
-	def stripFullHtmlError(entry: EntryType, error: str) -> None:
-		log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}")
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._words: list[str] = []
-		self._img_pattern = re.compile(
-			'<img src="([^<>"]*?)"( [^<>]*?)?>',
-			re.DOTALL,
-		)
-		# img tag has no closing
-		glos.stripFullHtml(errorHandler=self.stripFullHtmlError)
-
-	def get_prefix(self, word: str) -> str:  # noqa: PLR6301
-		if not word:
-			return "11"
-		wo = word[:2].strip().lower()
-		if not wo:
-			return "11"
-		if wo[0] == "\x00":
-			return "11"
-		if len(wo) > 1 and wo[1] == "\x00":
-			wo = wo[:1]
-		if is_cyrillic_char(wo[0]):
-			return wo
-		# if either of the first 2 chars are not unicode letters, return "11"
-		for c in wo:
-			if not unicodedata.category(c).startswith("L"):
-				return "11"
-		return wo.ljust(2, "a")
-
-	def fix_defi(self, defi: str) -> str:
-		# @pgaskin on #219: Kobo supports images in dictionaries,
-		# but these have a lot of gotchas
-		# (see https://pgaskin.net/dictutil/dicthtml/format.html).
-		# Basically, The best way to do it is to encode the images as a
-		# base64 data URL after shrinking it and making it grayscale
-		# (if it's JPG, this is as simple as only keeping the Y channel)
-
-		# for now we just skip data entries and remove '<img' tags
-		return self._img_pattern.sub("[Image: \\1]", defi)
-
-	def write_groups(self) -> Generator[None, EntryType, None]:
-		import gzip
-
-		dataEntryCount = 0
-
-		htmlHeader = '<?xml version="1.0" encoding="utf-8"?><html>\n'
-
-		groupCounter = 0
-		htmlContents = htmlHeader
-
-		def writeGroup(lastPrefix: str) -> None:
-			nonlocal htmlContents
-			group_fname = fixFilename(lastPrefix)
-			htmlContents += "</html>"
-			core.trace(
-				log,
-				f"writeGroup: {lastPrefix!r}, "
-				f"{group_fname!r}, count={groupCounter}",
-			)
-			with gzip.open(group_fname + ".html", mode="wb") as gzipFile:
-				gzipFile.write(htmlContents.encode("utf-8"))
-			htmlContents = htmlHeader
-
-		allWords: list[str] = []
-		# TODO: switch to SQLite, like StarDict writer
-		data: list[tuple[str, bytes]] = []
-
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				dataEntryCount += 1
-				continue
-			l_word = entry.l_word
-			allWords += l_word
-			wordsByPrefix: dict[str, list[str]] = {}
-			for word in l_word:
-				prefix = self.get_prefix(word)
-				if prefix in wordsByPrefix:
-					wordsByPrefix[prefix].append(word)
-				else:
-					wordsByPrefix[prefix] = [word]
-			defi = self.fix_defi(entry.defi)
-			mainHeadword = l_word[0]
-			for prefix, p_words in wordsByPrefix.items():
-				headword, *variants = p_words
-				if headword != mainHeadword:
-					headword = f"{mainHeadword}, {headword}"
-				data.append(
-					(
-						prefix,
-						compress(
-							dumps(
-								(
-									headword,
-									variants,
-									defi,
-								),
-							),
-						),
-					),
-				)
-			del entry
-
-		log.info("Kobo: sorting entries...")
-		data.sort(key=itemgetter(0))
-
-		log.info("Kobo: writing entries...")
-
-		lastPrefix = ""
-		for prefix, row in data:
-			headword, variants, defi = loads(decompress(row))
-			if lastPrefix and prefix != lastPrefix:
-				writeGroup(lastPrefix)
-				groupCounter = 0
-			lastPrefix = prefix
-
-			htmlVariants = "".join(
-				f'<variant name="{v.strip().lower()}"/>' for v in variants
-			)
-			body = f"<div><b>{headword}</b><var>{htmlVariants}</var><br/>{defi}</div>"
-			htmlContents += f'<w><a name="{headword}" />{body}</w>\n'
-			groupCounter += 1
-		del data
-
-		if groupCounter > 0:
-			writeGroup(lastPrefix)
-
-		if dataEntryCount > 0:
-			log.warning(
-				f"ignored {dataEntryCount} files (data entries)"
-				" and replaced '<img ...' tags in definitions with placeholders",
-			)
-
-		self._words = allWords
-
-	def open(self, filename: str) -> None:
-		try:
-			import marisa_trie  # type: ignore # noqa: F401
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install marisa-trie` to install")
-			raise
-		self._filename = filename
-
-	def write(self) -> Generator[None, EntryType, None]:
-		with indir(self._filename, create=True):
-			yield from self.write_groups()
-
-	def finish(self) -> None:
-		import marisa_trie
-
-		with indir(self._filename, create=False):
-			trie = marisa_trie.Trie(self._words)
-			trie.save(self.WORDS_FILE_NAME)
-		self._filename = ""
diff --git a/pyglossary/plugins/ebook_kobo/writer.py b/pyglossary/plugins/ebook_kobo/writer.py
new file mode 100644
index 000000000..5b26aff01
--- /dev/null
+++ b/pyglossary/plugins/ebook_kobo/writer.py
@@ -0,0 +1,233 @@
+# -*- coding: utf-8 -*-
+# The MIT License (MIT)
+# Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it)
+# Copyright © 2022 Saeed Rasooli <saeed.gnu@gmail.com>
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from __future__ import annotations
+
+import re
+import unicodedata
+from gzip import compress, decompress
+from operator import itemgetter
+from pathlib import Path
+from pickle import dumps, loads
+from typing import TYPE_CHECKING
+
+from pyglossary import core
+from pyglossary.core import exc_note, log, pip
+from pyglossary.os_utils import indir
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+def is_cyrillic_char(c: str) -> bool:
+	# U+0400 - U+04FF: Cyrillic
+	# U+0500 - U+052F: Cyrillic Supplement
+	if "\u0400" <= c <= "\u052f":
+		return True
+
+	# U+2DE0 - U+2DFF: Cyrillic Extended-A
+	if "\u2de0" <= c <= "\u2dff":
+		return True
+
+	# U+A640 - U+A69F: Cyrillic Extended-B
+	if "\ua640" <= c <= "\ua69f":
+		return True
+
+	# U+1C80 - U+1C8F: Cyrillic Extended-C
+	if "\u1c80" <= c <= "\u1c8f":
+		return True
+
+	# U+FE2E, U+FE2F: Combining Half Marks
+	# U+1D2B, U+1D78: Phonetic Extensions
+	return c in {"\ufe2e", "\ufe2f", "\u1d2b", "\u1d78"}
+
+
+def fixFilename(fname: str) -> str:
+	return Path(fname.replace("/", "2F").replace("\\", "5C")).name
+
+
+class Writer:
+	WORDS_FILE_NAME = "words"
+
+	depends = {
+		"marisa_trie": "marisa-trie",
+	}
+
+	@staticmethod
+	def stripFullHtmlError(entry: EntryType, error: str) -> None:
+		log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}")
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._words: list[str] = []
+		self._img_pattern = re.compile(
+			'<img src="([^<>"]*?)"( [^<>]*?)?>',
+			re.DOTALL,
+		)
+		# img tag has no closing
+		glos.stripFullHtml(errorHandler=self.stripFullHtmlError)
+
+	def get_prefix(self, word: str) -> str:  # noqa: PLR6301
+		if not word:
+			return "11"
+		wo = word[:2].strip().lower()
+		if not wo:
+			return "11"
+		if wo[0] == "\x00":
+			return "11"
+		if len(wo) > 1 and wo[1] == "\x00":
+			wo = wo[:1]
+		if is_cyrillic_char(wo[0]):
+			return wo
+		# if either of the first 2 chars are not unicode letters, return "11"
+		for c in wo:
+			if not unicodedata.category(c).startswith("L"):
+				return "11"
+		return wo.ljust(2, "a")
+
+	def fix_defi(self, defi: str) -> str:
+		# @pgaskin on #219: Kobo supports images in dictionaries,
+		# but these have a lot of gotchas
+		# (see https://pgaskin.net/dictutil/dicthtml/format.html).
+		# Basically, The best way to do it is to encode the images as a
+		# base64 data URL after shrinking it and making it grayscale
+		# (if it's JPG, this is as simple as only keeping the Y channel)
+
+		# for now we just skip data entries and remove '<img' tags
+		return self._img_pattern.sub("[Image: \\1]", defi)
+
+	def write_groups(self) -> Generator[None, EntryType, None]:
+		import gzip
+
+		dataEntryCount = 0
+
+		htmlHeader = '<?xml version="1.0" encoding="utf-8"?><html>\n'
+
+		groupCounter = 0
+		htmlContents = htmlHeader
+
+		def writeGroup(lastPrefix: str) -> None:
+			nonlocal htmlContents
+			group_fname = fixFilename(lastPrefix)
+			htmlContents += "</html>"
+			core.trace(
+				log,
+				f"writeGroup: {lastPrefix!r}, "
+				f"{group_fname!r}, count={groupCounter}",
+			)
+			with gzip.open(group_fname + ".html", mode="wb") as gzipFile:
+				gzipFile.write(htmlContents.encode("utf-8"))
+			htmlContents = htmlHeader
+
+		allWords: list[str] = []
+		# TODO: switch to SQLite, like StarDict writer
+		data: list[tuple[str, bytes]] = []
+
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				dataEntryCount += 1
+				continue
+			l_word = entry.l_word
+			allWords += l_word
+			wordsByPrefix: dict[str, list[str]] = {}
+			for word in l_word:
+				prefix = self.get_prefix(word)
+				if prefix in wordsByPrefix:
+					wordsByPrefix[prefix].append(word)
+				else:
+					wordsByPrefix[prefix] = [word]
+			defi = self.fix_defi(entry.defi)
+			mainHeadword = l_word[0]
+			for prefix, p_words in wordsByPrefix.items():
+				headword, *variants = p_words
+				if headword != mainHeadword:
+					headword = f"{mainHeadword}, {headword}"
+				data.append(
+					(
+						prefix,
+						compress(
+							dumps(
+								(
+									headword,
+									variants,
+									defi,
+								),
+							),
+						),
+					),
+				)
+			del entry
+
+		log.info("Kobo: sorting entries...")
+		data.sort(key=itemgetter(0))
+
+		log.info("Kobo: writing entries...")
+
+		lastPrefix = ""
+		for prefix, row in data:
+			headword, variants, defi = loads(decompress(row))
+			if lastPrefix and prefix != lastPrefix:
+				writeGroup(lastPrefix)
+				groupCounter = 0
+			lastPrefix = prefix
+
+			htmlVariants = "".join(
+				f'<variant name="{v.strip().lower()}"/>' for v in variants
+			)
+			body = f"<div><b>{headword}</b><var>{htmlVariants}</var><br/>{defi}</div>"
+			htmlContents += f'<w><a name="{headword}" />{body}</w>\n'
+			groupCounter += 1
+		del data
+
+		if groupCounter > 0:
+			writeGroup(lastPrefix)
+
+		if dataEntryCount > 0:
+			log.warning(
+				f"ignored {dataEntryCount} files (data entries)"
+				" and replaced '<img ...' tags in definitions with placeholders",
+			)
+
+		self._words = allWords
+
+	def open(self, filename: str) -> None:
+		try:
+			import marisa_trie  # type: ignore # noqa: F401
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install marisa-trie` to install")
+			raise
+		self._filename = filename
+
+	def write(self) -> Generator[None, EntryType, None]:
+		with indir(self._filename, create=True):
+			yield from self.write_groups()
+
+	def finish(self) -> None:
+		import marisa_trie
+
+		with indir(self._filename, create=False):
+			trie = marisa_trie.Trie(self._words)
+			trie.save(self.WORDS_FILE_NAME)
+		self._filename = ""
diff --git a/pyglossary/plugins/ebook_kobo_dictfile/__init__.py b/pyglossary/plugins/ebook_kobo_dictfile/__init__.py
index 7ec327ee3..946b18dfd 100644
--- a/pyglossary/plugins/ebook_kobo_dictfile/__init__.py
+++ b/pyglossary/plugins/ebook_kobo_dictfile/__init__.py
@@ -1,42 +1,15 @@
 # -*- coding: utf-8 -*-
-# The MIT License (MIT)
-# Copyright © 2020-2021 Saeed Rasooli <saeed.gnu@gmail.com>
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-from __future__ import annotations
 
-import os
-from os.path import isdir
-from typing import TYPE_CHECKING
+from __future__ import annotations
 
-from pyglossary.core import exc_note, log, pip
-from pyglossary.image_utils import extractInlineHtmlImages
-from pyglossary.io_utils import nullTextIO
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
 	Option,
 )
-from pyglossary.text_reader import TextGlossaryReader
 
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .reader import Reader
+from .writer import Writer
 
 __all__ = [
 	"Reader",
@@ -73,155 +46,3 @@
 	"encoding": EncodingOption(),
 	"extract_inline_images": BoolOption(comment="Extract inline images"),
 }
-
-
-def fixWord(word: str) -> str:
-	return word.replace("\n", " ")
-
-
-def escapeDefi(defi: str) -> str:
-	return defi.replace("\n@", "\n @").replace("\n:", "\n :").replace("\n&", "\n &")
-
-
-class Reader(TextGlossaryReader):
-	depends = {
-		"mistune": "mistune==3.0.1",
-	}
-
-	_extract_inline_images: bool = True
-
-	def __init__(self, glos: GlossaryType) -> None:
-		TextGlossaryReader.__init__(self, glos, hasInfo=False)
-
-	def open(self, filename: str) -> None:
-		try:
-			import mistune  # type: ignore # noqa: F401
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install mistune` to install")
-			raise
-		TextGlossaryReader.open(self, filename)
-		self._glos.setDefaultDefiFormat("h")
-
-	@classmethod
-	def isInfoWord(cls, _word: str) -> bool:
-		return False
-
-	@classmethod
-	def fixInfoWord(cls, _word: str) -> str:
-		raise NotImplementedError
-
-	def fixDefi(
-		self,
-		defi: str,
-		html: bool,
-	) -> tuple[str, list[tuple[str, str]] | None]:
-		import mistune
-
-		defi = (
-			defi.replace("\n @", "\n@")
-			.replace("\n :", "\n:")
-			.replace("\n &", "\n&")
-			.replace("</p><br />", "</p>")
-			.replace("</p><br/>", "</p>")
-			.replace("</p></br>", "</p>")
-		)
-		defi = defi.strip()
-		if html:
-			pass
-		else:
-			defi = mistune.html(defi)
-		images: list[tuple[str, str]] | None = None
-		if self._extract_inline_images:
-			defi, images = extractInlineHtmlImages(
-				defi,
-				self._glos.tmpDataDir,
-				fnamePrefix="",  # maybe f"{self._pos:06d}-"
-			)
-		return defi, images
-
-	def nextBlock(
-		self,
-	) -> tuple[list[str], str, list[tuple[str, str]] | None]:
-		words: list[str] = []
-		defiLines: list[str] = []
-		html = False
-
-		while True:
-			line = self.readline()
-			if not line:
-				break
-			line = line.rstrip("\n\r")
-			if line.startswith("@"):
-				if words:
-					self._bufferLine = line
-					defi, images = self.fixDefi("\n".join(defiLines), html=html)
-					return words, defi, images
-				words = [line[1:].strip()]
-				continue
-			if line.startswith(": "):
-				defiLines.append(line[2:])
-				continue
-			if line.startswith("::"):
-				continue
-			if line.startswith("&"):
-				words.append(line[1:].strip())
-				continue
-			if line.startswith("<html>"):
-				line = line[6:]
-				html = True
-			defiLines.append(line)
-
-		if words:
-			defi, images = self.fixDefi("\n".join(defiLines), html=html)
-			return words, defi, images
-
-		raise StopIteration
-
-
-class Writer:
-	_encoding: str = "utf-8"
-
-	@staticmethod
-	def stripFullHtmlError(entry: EntryType, error: str) -> None:
-		log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}")
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._file: io.TextIOBase = nullTextIO
-		glos.stripFullHtml(errorHandler=self.stripFullHtmlError)
-
-	def finish(self) -> None:
-		self._file.close()
-		if not os.listdir(self._resDir):
-			os.rmdir(self._resDir)
-
-	def open(self, filename: str) -> None:
-		self._file = open(filename, "w", encoding=self._encoding)
-		# dictgen's ParseDictFile does not seem to support glossary info / metedata
-		self._resDir = filename + "_res"
-		if not isdir(self._resDir):
-			os.mkdir(self._resDir)
-
-	def write(
-		self,
-	) -> Generator[None, EntryType, None]:
-		fileObj = self._file
-		resDir = self._resDir
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				entry.save(resDir)
-				continue
-			words = entry.l_word
-			defi = entry.defi
-
-			entry.detectDefiFormat()
-			if entry.defiFormat == "h":
-				defi = f"<html>{entry.defi}"
-
-			fileObj.write(f"@ {fixWord(words[0])}\n")
-			for alt in words[1:]:
-				fileObj.write(f"& {fixWord(alt)}\n")
-			fileObj.write(f"{escapeDefi(defi)}\n\n")
diff --git a/pyglossary/plugins/ebook_kobo_dictfile/reader.py b/pyglossary/plugins/ebook_kobo_dictfile/reader.py
new file mode 100644
index 000000000..131ab6190
--- /dev/null
+++ b/pyglossary/plugins/ebook_kobo_dictfile/reader.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+# The MIT License (MIT)
+# Copyright © 2020-2021 Saeed Rasooli <saeed.gnu@gmail.com>
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pyglossary.core import exc_note, pip
+from pyglossary.image_utils import extractInlineHtmlImages
+from pyglossary.text_reader import TextGlossaryReader
+
+if TYPE_CHECKING:
+	from pyglossary.glossary_types import GlossaryType
+
+
+class Reader(TextGlossaryReader):
+	depends = {
+		"mistune": "mistune==3.0.1",
+	}
+
+	_extract_inline_images: bool = True
+
+	def __init__(self, glos: GlossaryType) -> None:
+		TextGlossaryReader.__init__(self, glos, hasInfo=False)
+
+	def open(self, filename: str) -> None:
+		try:
+			import mistune  # type: ignore # noqa: F401
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install mistune` to install")
+			raise
+		TextGlossaryReader.open(self, filename)
+		self._glos.setDefaultDefiFormat("h")
+
+	@classmethod
+	def isInfoWord(cls, _word: str) -> bool:
+		return False
+
+	@classmethod
+	def fixInfoWord(cls, _word: str) -> str:
+		raise NotImplementedError
+
+	def fixDefi(
+		self,
+		defi: str,
+		html: bool,
+	) -> tuple[str, list[tuple[str, str]] | None]:
+		import mistune
+
+		defi = (
+			defi.replace("\n @", "\n@")
+			.replace("\n :", "\n:")
+			.replace("\n &", "\n&")
+			.replace("</p><br />", "</p>")
+			.replace("</p><br/>", "</p>")
+			.replace("</p></br>", "</p>")
+		)
+		defi = defi.strip()
+		if html:
+			pass
+		else:
+			defi = mistune.html(defi)
+		images: list[tuple[str, str]] | None = None
+		if self._extract_inline_images:
+			defi, images = extractInlineHtmlImages(
+				defi,
+				self._glos.tmpDataDir,
+				fnamePrefix="",  # maybe f"{self._pos:06d}-"
+			)
+		return defi, images
+
+	def nextBlock(
+		self,
+	) -> tuple[list[str], str, list[tuple[str, str]] | None]:
+		words: list[str] = []
+		defiLines: list[str] = []
+		html = False
+
+		while True:
+			line = self.readline()
+			if not line:
+				break
+			line = line.rstrip("\n\r")
+			if line.startswith("@"):
+				if words:
+					self._bufferLine = line
+					defi, images = self.fixDefi("\n".join(defiLines), html=html)
+					return words, defi, images
+				words = [line[1:].strip()]
+				continue
+			if line.startswith(": "):
+				defiLines.append(line[2:])
+				continue
+			if line.startswith("::"):
+				continue
+			if line.startswith("&"):
+				words.append(line[1:].strip())
+				continue
+			if line.startswith("<html>"):
+				line = line[6:]
+				html = True
+			defiLines.append(line)
+
+		if words:
+			defi, images = self.fixDefi("\n".join(defiLines), html=html)
+			return words, defi, images
+
+		raise StopIteration
diff --git a/pyglossary/plugins/ebook_kobo_dictfile/writer.py b/pyglossary/plugins/ebook_kobo_dictfile/writer.py
new file mode 100644
index 000000000..60c9c9651
--- /dev/null
+++ b/pyglossary/plugins/ebook_kobo_dictfile/writer.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+# The MIT License (MIT)
+# Copyright © 2020-2021 Saeed Rasooli <saeed.gnu@gmail.com>
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from __future__ import annotations
+
+import os
+from os.path import isdir
+from typing import TYPE_CHECKING
+
+from pyglossary.core import log
+from pyglossary.io_utils import nullTextIO
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+def fixWord(word: str) -> str:
+	return word.replace("\n", " ")
+
+
+def escapeDefi(defi: str) -> str:
+	return defi.replace("\n@", "\n @").replace("\n:", "\n :").replace("\n&", "\n &")
+
+
+class Writer:
+	_encoding: str = "utf-8"
+
+	@staticmethod
+	def stripFullHtmlError(entry: EntryType, error: str) -> None:
+		log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}")
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._file: io.TextIOBase = nullTextIO
+		glos.stripFullHtml(errorHandler=self.stripFullHtmlError)
+
+	def finish(self) -> None:
+		self._file.close()
+		if not os.listdir(self._resDir):
+			os.rmdir(self._resDir)
+
+	def open(self, filename: str) -> None:
+		self._file = open(filename, "w", encoding=self._encoding)
+		# dictgen's ParseDictFile does not seem to support glossary info / metedata
+		self._resDir = filename + "_res"
+		if not isdir(self._resDir):
+			os.mkdir(self._resDir)
+
+	def write(
+		self,
+	) -> Generator[None, EntryType, None]:
+		fileObj = self._file
+		resDir = self._resDir
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				entry.save(resDir)
+				continue
+			words = entry.l_word
+			defi = entry.defi
+
+			entry.detectDefiFormat()
+			if entry.defiFormat == "h":
+				defi = f"<html>{entry.defi}"
+
+			fileObj.write(f"@ {fixWord(words[0])}\n")
+			for alt in words[1:]:
+				fileObj.write(f"& {fixWord(alt)}\n")
+			fileObj.write(f"{escapeDefi(defi)}\n\n")
diff --git a/pyglossary/plugins/ebook_mobi/__init__.py b/pyglossary/plugins/ebook_mobi/__init__.py
index 9ac4e18ec..00da1e1ad 100644
--- a/pyglossary/plugins/ebook_mobi/__init__.py
+++ b/pyglossary/plugins/ebook_mobi/__init__.py
@@ -1,33 +1,8 @@
 # -*- coding: utf-8 -*-
-# The MIT License (MIT)
-# Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it)
-# Copyright © 2016-2022 Saeed Rasooli <saeed.gnu@gmail.com>
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-from __future__ import annotations
 
-import os
-from datetime import datetime
-from os.path import join, split
-from typing import TYPE_CHECKING
+from __future__ import annotations
 
-from pyglossary.core import log
-from pyglossary.ebook_base import EbookWriter
 from pyglossary.flags import DEFAULT_YES
-from pyglossary.langs import Lang
 from pyglossary.option import (
 	BoolOption,
 	FileSizeOption,
@@ -36,10 +11,7 @@
 	StrOption,
 )
 
-if TYPE_CHECKING:
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .writer import Writer
 
 __all__ = [
 	"Writer",
@@ -121,277 +93,3 @@
 		" for creating Mobipocket e-books.",
 	),
 ]
-
-
-class GroupStateBySize:
-	def __init__(self, writer: Writer) -> None:
-		self.writer = writer
-		self.group_index = -1
-		self.reset()
-
-	def reset(self) -> None:
-		self.group_contents: list[str] = []
-		self.group_size = 0
-
-	def add(self, entry: EntryType) -> None:
-		defi = entry.defi
-		content = self.writer.format_group_content(
-			entry.l_word[0],
-			defi,
-			variants=entry.l_word[1:],
-		)
-		self.group_contents.append(content)
-		self.group_size += len(content.encode("utf-8"))
-
-
-class Writer(EbookWriter):
-	_compress: bool = False
-	_keep: bool = False
-	_kindlegen_path: str = ""
-	_file_size_approx: int = 271360
-	_hide_word_index: bool = False
-	_spellcheck: bool = True
-	_exact: bool = False
-	CSS_CONTENTS = b""""@charset "UTF-8";"""
-	GROUP_XHTML_TEMPLATE = """<?xml version="1.0" encoding="utf-8" \
-standalone="no"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" \
-"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns:cx=\
-"https://kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.pdf" \
-xmlns:dc="http://purl.org/dc/elements/1.1/" \
-xmlns:idx="https://kindlegen.s3.amazonaws.com\
-/AmazonKindlePublishingGuidelines.pdf" \
-xmlns:math="http://exslt.org/math" \
-xmlns:mbp="https://kindlegen.s3.amazonaws.com\
-/AmazonKindlePublishingGuidelines.pdf" \
-xmlns:mmc="https://kindlegen.s3.amazonaws.com\
-/AmazonKindlePublishingGuidelines.pdf" \
-xmlns:saxon="http://saxon.sf.net/" xmlns:svg="http://www.w3.org/2000/svg" \
-xmlns:tl="https://kindlegen.s3.amazonaws.com\
-/AmazonKindlePublishingGuidelines.pdf" \
-xmlns:xs="http://www.w3.org/2001/XMLSchema" \
-xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-<head>
-<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
-<link href="style.css" rel="stylesheet" type="text/css" />
-</head>
-<body>
-<mbp:frameset>
-{group_contents}
-</mbp:frameset>
-</body>
-</html>"""
-
-	GROUP_XHTML_WORD_DEFINITION_TEMPLATE = """<idx:entry \
-scriptable="yes"{spellcheck_str}>
-<idx:orth{value_headword}>{headword_visible}{infl}
-</idx:orth>
-<br/>{definition}
-</idx:entry>
-<hr/>"""
-
-	GROUP_XHTML_WORD_INFL_TEMPLATE = """<idx:infl>
-{iforms_str}
-</idx:infl>"""
-
-	GROUP_XHTML_WORD_IFORM_TEMPLATE = """<idx:iform \
-value="{inflword}"{exact_str} />"""
-
-	OPF_TEMPLATE = """<?xml version="1.0" encoding="utf-8"?>
-<package unique-identifier="uid">
-<metadata>
-<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core"
-xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
-<dc:Title>{title}</dc:Title>
-<dc:Language>{sourceLang}</dc:Language>
-<dc:Identifier id="uid">{identifier}</dc:Identifier>
-<dc:Creator>{creator}</dc:Creator>
-<dc:Rights>{copyright}</dc:Rights>
-<dc:description>{description}</dc:description>
-<dc:Subject BASICCode="REF008000">Dictionaries</dc:Subject>
-</dc-metadata>
-<x-metadata>
-<output encoding="utf-8"></output>
-<DictionaryInLanguage>{sourceLang}</DictionaryInLanguage>
-<DictionaryOutLanguage>{targetLang}</DictionaryOutLanguage>
-<EmbeddedCover>{cover}</EmbeddedCover>
-</x-metadata>
-</metadata>
-<manifest>
-{manifest}
-</manifest>
-<spine>
-{spine}
-</spine>
-<tours></tours>
-<guide></guide>
-</package>"""
-
-	def __init__(self, glos: GlossaryType) -> None:
-		import uuid
-
-		EbookWriter.__init__(
-			self,
-			glos,
-		)
-		glos.setInfo("uuid", str(uuid.uuid4()).replace("-", ""))
-		# FIXME: check if full html pages/documents as entry do work
-		# glos.stripFullHtml(errorHandler=None)
-
-	def get_prefix(self, word: str) -> str:
-		if not word:
-			return ""
-		length = self._group_by_prefix_length
-		prefix = word[:length].lower()
-		if prefix[0] < "a":
-			return "SPECIAL"
-		return prefix
-
-	def format_group_content(
-		self,
-		word: str,
-		defi: str,
-		variants: list[str] | None = None,
-	) -> str:
-		hide_word_index = self._hide_word_index
-		infl = ""
-		if variants:
-			iforms_list = [
-				self.GROUP_XHTML_WORD_IFORM_TEMPLATE.format(
-					inflword=variant,
-					exact_str=' exact="yes"' if self._exact else "",
-				)
-				for variant in variants
-			]
-			infl = "\n" + self.GROUP_XHTML_WORD_INFL_TEMPLATE.format(
-				iforms_str="\n".join(iforms_list),
-			)
-
-		headword = self.escape_if_needed(word)
-
-		defi = self.escape_if_needed(defi)
-
-		if hide_word_index:
-			headword_visible = ""
-			value_headword = f' value="{headword}"'
-		else:
-			headword_visible = "\n" + self._glos.wordTitleStr(headword)
-			value_headword = ""
-
-		return self.GROUP_XHTML_WORD_DEFINITION_TEMPLATE.format(
-			spellcheck_str=' spell="yes"' if self._spellcheck else "",
-			headword_visible=headword_visible,
-			value_headword=value_headword,
-			definition=defi,
-			infl=infl,
-		)
-
-	@staticmethod
-	def getLangCode(lang: Lang | None) -> str:
-		return lang.code if isinstance(lang, Lang) else ""
-
-	def get_opf_contents(
-		self,
-		manifest_contents: str,
-		spine_contents: str,
-	) -> bytes:
-		cover = ""
-		if self.cover:
-			cover = self.COVER_TEMPLATE.format(cover=self.cover)
-		creationDate = datetime.now().strftime("%Y-%m-%d")
-
-		return self.OPF_TEMPLATE.format(
-			identifier=self._glos.getInfo("uuid"),
-			# use Language code instead name for kindlegen
-			sourceLang=self.getLangCode(self._glos.sourceLang),
-			targetLang=self.getLangCode(self._glos.targetLang),
-			title=self._glos.getInfo("name"),
-			creator=self._glos.author,
-			copyright=self._glos.getInfo("copyright"),
-			description=self._glos.getInfo("description"),
-			creationDate=creationDate,
-			cover=cover,
-			manifest=manifest_contents,
-			spine=spine_contents,
-		).encode("utf-8")
-
-	def write_groups(self) -> Generator[None, EntryType, None]:
-		def add_group(state: GroupStateBySize) -> None:
-			if state.group_size <= 0:
-				return
-			state.group_index += 1
-			index = state.group_index + self.GROUP_START_INDEX
-			group_xhtml_path = self.get_group_xhtml_file_name_from_index(index)
-			self.add_file_manifest(
-				"OEBPS/" + group_xhtml_path,
-				group_xhtml_path,
-				self.GROUP_XHTML_TEMPLATE.format(
-					group_contents=self.GROUP_XHTML_WORD_DEFINITION_JOINER.join(
-						state.group_contents,
-					),
-				).encode("utf-8"),
-				"application/xhtml+xml",
-			)
-
-		state = GroupStateBySize(self)
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				continue
-
-			if state.group_size >= self._file_size_approx:
-				add_group(state)
-				state.reset()
-
-			state.add(entry)
-
-		add_group(state)
-
-	def write(self) -> Generator[None, EntryType, None]:
-		import shutil
-		import subprocess
-
-		filename = self._filename
-		kindlegen_path = self._kindlegen_path
-
-		yield from EbookWriter.write(self)
-
-		# download kindlegen from this page:
-		# https://www.amazon.com/gp/feature.html?ie=UTF8&docId=1000765211
-
-		# run kindlegen
-		if not kindlegen_path:
-			kindlegen_path = shutil.which("kindlegen") or ""
-		if not kindlegen_path:
-			log.warning(
-				f"Not running kindlegen, the raw files are located in {filename}",
-			)
-			log.warning(
-				"Provide KindleGen path with: --write-options 'kindlegen_path=...'",
-			)
-			return
-
-		# name = self._glos.getInfo("name")
-		log.info(f"Creating .mobi file with kindlegen, using {kindlegen_path!r}")
-		direc, filename = split(filename)
-		cmd = [
-			kindlegen_path,
-			join(filename, "OEBPS", "content.opf"),
-			"-gen_ff_mobi7",
-			"-o",
-			"content.mobi",
-		]
-		proc = subprocess.Popen(
-			cmd,
-			cwd=direc,
-			stdout=subprocess.PIPE,
-			stdin=subprocess.PIPE,
-			stderr=subprocess.PIPE,
-		)
-		output = proc.communicate()
-		log.info(output[0].decode("utf-8"))
-		mobi_path_abs = os.path.join(filename, "OEBPS", "content.mobi")
-		log.info(f"Created .mobi file with kindlegen: {mobi_path_abs}")
diff --git a/pyglossary/plugins/ebook_mobi/writer.py b/pyglossary/plugins/ebook_mobi/writer.py
new file mode 100644
index 000000000..36484ff8e
--- /dev/null
+++ b/pyglossary/plugins/ebook_mobi/writer.py
@@ -0,0 +1,308 @@
+# -*- coding: utf-8 -*-
+# The MIT License (MIT)
+# Copyright © 2012-2016 Alberto Pettarin (alberto@albertopettarin.it)
+# Copyright © 2016-2022 Saeed Rasooli <saeed.gnu@gmail.com>
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from __future__ import annotations
+
+import os
+from datetime import datetime
+from os.path import join, split
+from typing import TYPE_CHECKING
+
+from pyglossary.core import log
+from pyglossary.ebook_base import EbookWriter
+from pyglossary.langs import Lang
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class GroupStateBySize:
+	def __init__(self, writer: Writer) -> None:
+		self.writer = writer
+		self.group_index = -1
+		self.reset()
+
+	def reset(self) -> None:
+		self.group_contents: list[str] = []
+		self.group_size = 0
+
+	def add(self, entry: EntryType) -> None:
+		defi = entry.defi
+		content = self.writer.format_group_content(
+			entry.l_word[0],
+			defi,
+			variants=entry.l_word[1:],
+		)
+		self.group_contents.append(content)
+		self.group_size += len(content.encode("utf-8"))
+
+
+class Writer(EbookWriter):
+	_compress: bool = False
+	_keep: bool = False
+	_kindlegen_path: str = ""
+	_file_size_approx: int = 271360
+	_hide_word_index: bool = False
+	_spellcheck: bool = True
+	_exact: bool = False
+	CSS_CONTENTS = b""""@charset "UTF-8";"""
+	GROUP_XHTML_TEMPLATE = """<?xml version="1.0" encoding="utf-8" \
+standalone="no"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" \
+"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns:cx=\
+"https://kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.pdf" \
+xmlns:dc="http://purl.org/dc/elements/1.1/" \
+xmlns:idx="https://kindlegen.s3.amazonaws.com\
+/AmazonKindlePublishingGuidelines.pdf" \
+xmlns:math="http://exslt.org/math" \
+xmlns:mbp="https://kindlegen.s3.amazonaws.com\
+/AmazonKindlePublishingGuidelines.pdf" \
+xmlns:mmc="https://kindlegen.s3.amazonaws.com\
+/AmazonKindlePublishingGuidelines.pdf" \
+xmlns:saxon="http://saxon.sf.net/" xmlns:svg="http://www.w3.org/2000/svg" \
+xmlns:tl="https://kindlegen.s3.amazonaws.com\
+/AmazonKindlePublishingGuidelines.pdf" \
+xmlns:xs="http://www.w3.org/2001/XMLSchema" \
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<head>
+<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+<link href="style.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<mbp:frameset>
+{group_contents}
+</mbp:frameset>
+</body>
+</html>"""
+
+	GROUP_XHTML_WORD_DEFINITION_TEMPLATE = """<idx:entry \
+scriptable="yes"{spellcheck_str}>
+<idx:orth{value_headword}>{headword_visible}{infl}
+</idx:orth>
+<br/>{definition}
+</idx:entry>
+<hr/>"""
+
+	GROUP_XHTML_WORD_INFL_TEMPLATE = """<idx:infl>
+{iforms_str}
+</idx:infl>"""
+
+	GROUP_XHTML_WORD_IFORM_TEMPLATE = """<idx:iform \
+value="{inflword}"{exact_str} />"""
+
+	OPF_TEMPLATE = """<?xml version="1.0" encoding="utf-8"?>
+<package unique-identifier="uid">
+<metadata>
+<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core"
+xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
+<dc:Title>{title}</dc:Title>
+<dc:Language>{sourceLang}</dc:Language>
+<dc:Identifier id="uid">{identifier}</dc:Identifier>
+<dc:Creator>{creator}</dc:Creator>
+<dc:Rights>{copyright}</dc:Rights>
+<dc:description>{description}</dc:description>
+<dc:Subject BASICCode="REF008000">Dictionaries</dc:Subject>
+</dc-metadata>
+<x-metadata>
+<output encoding="utf-8"></output>
+<DictionaryInLanguage>{sourceLang}</DictionaryInLanguage>
+<DictionaryOutLanguage>{targetLang}</DictionaryOutLanguage>
+<EmbeddedCover>{cover}</EmbeddedCover>
+</x-metadata>
+</metadata>
+<manifest>
+{manifest}
+</manifest>
+<spine>
+{spine}
+</spine>
+<tours></tours>
+<guide></guide>
+</package>"""
+
+	def __init__(self, glos: GlossaryType) -> None:
+		import uuid
+
+		EbookWriter.__init__(
+			self,
+			glos,
+		)
+		glos.setInfo("uuid", str(uuid.uuid4()).replace("-", ""))
+		# FIXME: check if full html pages/documents as entry do work
+		# glos.stripFullHtml(errorHandler=None)
+
+	def get_prefix(self, word: str) -> str:
+		if not word:
+			return ""
+		length = self._group_by_prefix_length
+		prefix = word[:length].lower()
+		if prefix[0] < "a":
+			return "SPECIAL"
+		return prefix
+
+	def format_group_content(
+		self,
+		word: str,
+		defi: str,
+		variants: list[str] | None = None,
+	) -> str:
+		hide_word_index = self._hide_word_index
+		infl = ""
+		if variants:
+			iforms_list = [
+				self.GROUP_XHTML_WORD_IFORM_TEMPLATE.format(
+					inflword=variant,
+					exact_str=' exact="yes"' if self._exact else "",
+				)
+				for variant in variants
+			]
+			infl = "\n" + self.GROUP_XHTML_WORD_INFL_TEMPLATE.format(
+				iforms_str="\n".join(iforms_list),
+			)
+
+		headword = self.escape_if_needed(word)
+
+		defi = self.escape_if_needed(defi)
+
+		if hide_word_index:
+			headword_visible = ""
+			value_headword = f' value="{headword}"'
+		else:
+			headword_visible = "\n" + self._glos.wordTitleStr(headword)
+			value_headword = ""
+
+		return self.GROUP_XHTML_WORD_DEFINITION_TEMPLATE.format(
+			spellcheck_str=' spell="yes"' if self._spellcheck else "",
+			headword_visible=headword_visible,
+			value_headword=value_headword,
+			definition=defi,
+			infl=infl,
+		)
+
+	@staticmethod
+	def getLangCode(lang: Lang | None) -> str:
+		return lang.code if isinstance(lang, Lang) else ""
+
+	def get_opf_contents(
+		self,
+		manifest_contents: str,
+		spine_contents: str,
+	) -> bytes:
+		cover = ""
+		if self.cover:
+			cover = self.COVER_TEMPLATE.format(cover=self.cover)
+		creationDate = datetime.now().strftime("%Y-%m-%d")
+
+		return self.OPF_TEMPLATE.format(
+			identifier=self._glos.getInfo("uuid"),
+			# use Language code instead name for kindlegen
+			sourceLang=self.getLangCode(self._glos.sourceLang),
+			targetLang=self.getLangCode(self._glos.targetLang),
+			title=self._glos.getInfo("name"),
+			creator=self._glos.author,
+			copyright=self._glos.getInfo("copyright"),
+			description=self._glos.getInfo("description"),
+			creationDate=creationDate,
+			cover=cover,
+			manifest=manifest_contents,
+			spine=spine_contents,
+		).encode("utf-8")
+
+	def write_groups(self) -> Generator[None, EntryType, None]:
+		def add_group(state: GroupStateBySize) -> None:
+			if state.group_size <= 0:
+				return
+			state.group_index += 1
+			index = state.group_index + self.GROUP_START_INDEX
+			group_xhtml_path = self.get_group_xhtml_file_name_from_index(index)
+			self.add_file_manifest(
+				"OEBPS/" + group_xhtml_path,
+				group_xhtml_path,
+				self.GROUP_XHTML_TEMPLATE.format(
+					group_contents=self.GROUP_XHTML_WORD_DEFINITION_JOINER.join(
+						state.group_contents,
+					),
+				).encode("utf-8"),
+				"application/xhtml+xml",
+			)
+
+		state = GroupStateBySize(self)
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				continue
+
+			if state.group_size >= self._file_size_approx:
+				add_group(state)
+				state.reset()
+
+			state.add(entry)
+
+		add_group(state)
+
+	def write(self) -> Generator[None, EntryType, None]:
+		import shutil
+		import subprocess
+
+		filename = self._filename
+		kindlegen_path = self._kindlegen_path
+
+		yield from EbookWriter.write(self)
+
+		# download kindlegen from this page:
+		# https://www.amazon.com/gp/feature.html?ie=UTF8&docId=1000765211
+
+		# run kindlegen
+		if not kindlegen_path:
+			kindlegen_path = shutil.which("kindlegen") or ""
+		if not kindlegen_path:
+			log.warning(
+				f"Not running kindlegen, the raw files are located in {filename}",
+			)
+			log.warning(
+				"Provide KindleGen path with: --write-options 'kindlegen_path=...'",
+			)
+			return
+
+		# name = self._glos.getInfo("name")
+		log.info(f"Creating .mobi file with kindlegen, using {kindlegen_path!r}")
+		direc, filename = split(filename)
+		cmd = [
+			kindlegen_path,
+			join(filename, "OEBPS", "content.opf"),
+			"-gen_ff_mobi7",
+			"-o",
+			"content.mobi",
+		]
+		proc = subprocess.Popen(
+			cmd,
+			cwd=direc,
+			stdout=subprocess.PIPE,
+			stdin=subprocess.PIPE,
+			stderr=subprocess.PIPE,
+		)
+		output = proc.communicate()
+		log.info(output[0].decode("utf-8"))
+		mobi_path_abs = os.path.join(filename, "OEBPS", "content.mobi")
+		log.info(f"Created .mobi file with kindlegen: {mobi_path_abs}")
diff --git a/pyglossary/plugins/edict2/__init__.py b/pyglossary/plugins/edict2/__init__.py
index f0cb45408..50b9a2466 100644
--- a/pyglossary/plugins/edict2/__init__.py
+++ b/pyglossary/plugins/edict2/__init__.py
@@ -1,23 +1,12 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-from pyglossary.core import log
-from pyglossary.io_utils import nullTextIO
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
 	Option,
 )
 
-from . import conv
-
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
-
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -71,78 +60,3 @@
 		comment="Set to false to disable tones coloring",
 	),
 }
-
-
-class Reader:
-	depends = {
-		"lxml": "lxml",
-	}
-
-	_encoding: str = "utf-8"
-	_traditional_title: bool = False
-	_colorize_tones: bool = True
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self.file: io.TextIOBase = nullTextIO
-		self._fileSize = 0
-
-	def open(self, filename: str) -> None:
-		# self._glos.sourceLangName = "Chinese"
-		# self._glos.targetLangName = "English"
-
-		cfile = self.file = open(filename, encoding=self._encoding)
-
-		if cfile.seekable():
-			cfile.seek(0, 2)
-			self._fileSize = cfile.tell()
-			cfile.seek(0)
-			# self._glos.setInfo("input_file_size", f"{self._fileSize}")
-		else:
-			log.warning("EDICT2 Reader: file is not seekable")
-
-	def close(self) -> None:
-		self.file.close()
-		self.file = nullTextIO
-
-	def __len__(self) -> int:
-		return 0
-
-	def __iter__(self) -> Iterator[EntryType]:
-		file = self.file
-		fileSize = self._fileSize
-		glos = self._glos
-
-		render_syllables = (
-			conv.render_syllables_color
-			if self._colorize_tones
-			else conv.render_syllables_no_color
-		)
-		parse_line = (
-			conv.parse_line_trad if self._traditional_title else conv.parse_line_simp
-		)
-
-		while True:
-			line = file.readline()
-			if not line:
-				break
-			line = line.rstrip("\n")
-			if not line:
-				continue
-			if line.startswith("#"):
-				continue
-			parts = parse_line(line)
-			if parts is None:
-				log.warning(f"bad line: {line!r}")
-				continue
-			names, article_text = conv.render_article(
-				render_syllables,
-				conv.Article(*parts),
-			)
-			entry = glos.newEntry(
-				names,
-				article_text,
-				defiFormat="h",
-				byteProgress=(file.tell(), fileSize) if fileSize else None,
-			)
-			yield entry
diff --git a/pyglossary/plugins/edict2/reader.py b/pyglossary/plugins/edict2/reader.py
new file mode 100644
index 000000000..378cc0251
--- /dev/null
+++ b/pyglossary/plugins/edict2/reader.py
@@ -0,0 +1,89 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pyglossary.core import log
+from pyglossary.io_utils import nullTextIO
+
+from . import conv
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	depends = {
+		"lxml": "lxml",
+	}
+
+	_encoding: str = "utf-8"
+	_traditional_title: bool = False
+	_colorize_tones: bool = True
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self.file: io.TextIOBase = nullTextIO
+		self._fileSize = 0
+
+	def open(self, filename: str) -> None:
+		# self._glos.sourceLangName = "Chinese"
+		# self._glos.targetLangName = "English"
+
+		cfile = self.file = open(filename, encoding=self._encoding)
+
+		if cfile.seekable():
+			cfile.seek(0, 2)
+			self._fileSize = cfile.tell()
+			cfile.seek(0)
+			# self._glos.setInfo("input_file_size", f"{self._fileSize}")
+		else:
+			log.warning("EDICT2 Reader: file is not seekable")
+
+	def close(self) -> None:
+		self.file.close()
+		self.file = nullTextIO
+
+	def __len__(self) -> int:
+		return 0
+
+	def __iter__(self) -> Iterator[EntryType]:
+		file = self.file
+		fileSize = self._fileSize
+		glos = self._glos
+
+		render_syllables = (
+			conv.render_syllables_color
+			if self._colorize_tones
+			else conv.render_syllables_no_color
+		)
+		parse_line = (
+			conv.parse_line_trad if self._traditional_title else conv.parse_line_simp
+		)
+
+		while True:
+			line = file.readline()
+			if not line:
+				break
+			line = line.rstrip("\n")
+			if not line:
+				continue
+			if line.startswith("#"):
+				continue
+			parts = parse_line(line)
+			if parts is None:
+				log.warning(f"bad line: {line!r}")
+				continue
+			names, article_text = conv.render_article(
+				render_syllables,
+				conv.Article(*parts),
+			)
+			entry = glos.newEntry(
+				names,
+				article_text,
+				defiFormat="h",
+				byteProgress=(file.tell(), fileSize) if fileSize else None,
+			)
+			yield entry
diff --git a/pyglossary/plugins/edlin/__init__.py b/pyglossary/plugins/edlin/__init__.py
index fc5e428f8..6f6664762 100644
--- a/pyglossary/plugins/edlin/__init__.py
+++ b/pyglossary/plugins/edlin/__init__.py
@@ -1,45 +1,15 @@
 # -*- coding: utf-8 -*-
-# edlin.py
-#
-# Copyright © 2016-2019 Saeed Rasooli <saeed.gnu@gmail.com> (ilius)
-# This file is part of PyGlossary project, https://github.com/ilius/pyglossary
-#
-# This program is a free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program. Or on Debian systems, from /usr/share/common-licenses/GPL
-# If not, see <http://www.gnu.org/licenses/gpl.txt>.
 
 from __future__ import annotations
 
-import os
-from os.path import dirname, isdir, isfile, join
-from typing import TYPE_CHECKING
-
-from pyglossary.core import log
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
 	Option,
 )
-from pyglossary.text_utils import (
-	escapeNTB,
-	splitByBarUnescapeNTB,
-	unescapeNTB,
-)
-
-if TYPE_CHECKING:
-	from collections.abc import Generator, Iterator
 
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .reader import Reader
+from .writer import Writer
 
 __all__ = [
 	"Reader",
@@ -72,241 +42,3 @@
 	"encoding": EncodingOption(),
 	"prev_link": BoolOption(comment="Enable link to previous entry"),
 }
-
-
-def makeDir(direc: str) -> None:
-	if not isdir(direc):
-		os.makedirs(direc)
-
-
-class Reader:
-	_encoding: str = "utf-8"
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-
-	def close(self) -> None:
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._prev_link = True
-		self._wordCount = None
-		self._rootPath = None
-		self._resDir = ""
-		self._resFileNames: list[str] = []
-
-	def open(self, filename: str) -> None:
-		from pyglossary.json_utils import jsonToData
-
-		if isdir(filename):
-			infoFname = join(filename, "info.json")
-		elif isfile(filename):
-			infoFname = filename
-			filename = dirname(filename)
-		else:
-			raise ValueError(
-				f"error while opening {filename!r}: no such file or directory",
-			)
-		self._filename = filename
-
-		with open(infoFname, encoding=self._encoding) as infoFp:
-			info = jsonToData(infoFp.read())
-		self._wordCount = info.pop("wordCount")
-		self._prev_link = info.pop("prev_link")
-		self._rootPath = info.pop("root")
-		for key, value in info.items():
-			self._glos.setInfo(key, value)
-
-		self._resDir = join(filename, "res")
-		if isdir(self._resDir):
-			self._resFileNames = os.listdir(self._resDir)
-		else:
-			self._resDir = ""
-			self._resFileNames = []
-
-	def __len__(self) -> int:
-		if self._wordCount is None:
-			log.error("called len() on a reader which is not open")
-			return 0
-		return self._wordCount + len(self._resFileNames)
-
-	def __iter__(self) -> Iterator[EntryType]:
-		if not self._rootPath:
-			raise RuntimeError("iterating over a reader while it's not open")
-
-		wordCount = 0
-		nextPath = self._rootPath
-		while nextPath != "END":
-			wordCount += 1
-			# before or after reading word and defi
-			# (and skipping empty entry)? FIXME
-
-			with open(
-				join(self._filename, nextPath),
-				encoding=self._encoding,
-			) as _file:
-				header = _file.readline().rstrip()
-				if self._prev_link:
-					_prevPath, nextPath = header.split(" ")
-				else:
-					nextPath = header
-				word = _file.readline()
-				if not word:
-					yield None  # update progressbar
-					continue
-				defi = _file.read()
-				if not defi:
-					log.warning(
-						f"Edlin Reader: no definition for word {word!r}, skipping",
-					)
-					yield None  # update progressbar
-					continue
-				word = word.rstrip()
-				defi = defi.rstrip()
-
-			if self._glos.alts:
-				word = splitByBarUnescapeNTB(word)
-				if len(word) == 1:
-					word = word[0]
-			else:
-				word = unescapeNTB(word, bar=False)
-
-			# defi = unescapeNTB(defi)
-			yield self._glos.newEntry(word, defi)
-
-		if wordCount != self._wordCount:
-			log.warning(
-				f"{wordCount} words found, "
-				f"wordCount in info.json was {self._wordCount}",
-			)
-			self._wordCount = wordCount
-
-		resDir = self._resDir
-		for fname in self._resFileNames:
-			with open(join(resDir, fname), "rb") as _file:
-				yield self._glos.newDataEntry(
-					fname,
-					_file.read(),
-				)
-
-
-class Writer:
-	_encoding: str = "utf-8"
-	_prev_link: bool = True
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-
-	def finish(self) -> None:
-		self._clear()
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-		self._resDir = join(filename, "res")
-		os.makedirs(filename)
-		os.mkdir(self._resDir)
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._resDir = ""
-		self._encoding = "utf-8"
-		self._hashSet: set[str] = set()
-		# self._wordCount = None
-
-	@staticmethod
-	def hashToPath(h: str) -> str:
-		return h[:2] + "/" + h[2:]
-
-	def getEntryHash(self, entry: EntryType) -> str:
-		"""
-		Return hash string for given entry
-		don't call it twice for one entry, if you do you will get a
-		different hash string.
-		"""
-		from hashlib import sha1
-
-		hash_ = sha1(entry.s_word.encode("utf-8")).hexdigest()[:8]  # noqa: S324
-		if hash_ not in self._hashSet:
-			self._hashSet.add(hash_)
-			return hash_
-		index = 0
-		while True:
-			tmp_hash = hash_ + f"{index:x}"
-			if tmp_hash not in self._hashSet:
-				self._hashSet.add(tmp_hash)
-				return tmp_hash
-			index += 1
-
-	def saveEntry(
-		self,
-		thisEntry: EntryType,
-		thisHash: str,
-		prevHash: str | None,
-		nextHash: str | None,
-	) -> None:
-		dpath = join(self._filename, thisHash[:2])
-		makeDir(dpath)
-		with open(
-			join(dpath, thisHash[2:]),
-			"w",
-			encoding=self._encoding,
-		) as toFile:
-			nextPath = self.hashToPath(nextHash) if nextHash else "END"
-			if self._prev_link:
-				prevPath = self.hashToPath(prevHash) if prevHash else "START"
-				header = prevPath + " " + nextPath
-			else:
-				header = nextPath
-			toFile.write(
-				"\n".join(
-					[
-						header,
-						escapeNTB(thisEntry.s_word, bar=False),
-						thisEntry.defi,
-					],
-				),
-			)
-
-	def write(self) -> Generator[None, EntryType, None]:
-		from pyglossary.json_utils import dataToPrettyJson
-
-		thisEntry = yield
-		if thisEntry is None:
-			raise ValueError("glossary is empty")
-
-		count = 1
-		rootHash = thisHash = self.getEntryHash(thisEntry)
-		prevHash = None
-
-		while True:
-			nextEntry = yield
-			if nextEntry is None:
-				break
-			if nextEntry.isData():
-				nextEntry.save(self._resDir)
-				continue
-			nextHash = self.getEntryHash(nextEntry)
-			self.saveEntry(thisEntry, thisHash, prevHash, nextHash)
-			thisEntry = nextEntry
-			prevHash, thisHash = thisHash, nextHash
-			count += 1
-		self.saveEntry(thisEntry, thisHash, prevHash, None)
-
-		with open(
-			join(self._filename, "info.json"),
-			"w",
-			encoding=self._encoding,
-		) as toFile:
-			info = {}
-			info["name"] = self._glos.getInfo("name")
-			info["root"] = self.hashToPath(rootHash)
-			info["prev_link"] = self._prev_link
-			info["wordCount"] = count
-			# info["modified"] =
-
-			info |= self._glos.getExtraInfos(["name", "root", "prev_link", "wordCount"])
-
-			toFile.write(dataToPrettyJson(info))
diff --git a/pyglossary/plugins/edlin/reader.py b/pyglossary/plugins/edlin/reader.py
new file mode 100644
index 000000000..8fcdf4007
--- /dev/null
+++ b/pyglossary/plugins/edlin/reader.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+import os
+from os.path import dirname, isdir, isfile, join
+from typing import TYPE_CHECKING
+
+from pyglossary.core import log
+from pyglossary.text_utils import (
+	splitByBarUnescapeNTB,
+	unescapeNTB,
+)
+
+if TYPE_CHECKING:
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	_encoding: str = "utf-8"
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+
+	def close(self) -> None:
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._prev_link = True
+		self._wordCount = None
+		self._rootPath = None
+		self._resDir = ""
+		self._resFileNames: list[str] = []
+
+	def open(self, filename: str) -> None:
+		from pyglossary.json_utils import jsonToData
+
+		if isdir(filename):
+			infoFname = join(filename, "info.json")
+		elif isfile(filename):
+			infoFname = filename
+			filename = dirname(filename)
+		else:
+			raise ValueError(
+				f"error while opening {filename!r}: no such file or directory",
+			)
+		self._filename = filename
+
+		with open(infoFname, encoding=self._encoding) as infoFp:
+			info = jsonToData(infoFp.read())
+		self._wordCount = info.pop("wordCount")
+		self._prev_link = info.pop("prev_link")
+		self._rootPath = info.pop("root")
+		for key, value in info.items():
+			self._glos.setInfo(key, value)
+
+		self._resDir = join(filename, "res")
+		if isdir(self._resDir):
+			self._resFileNames = os.listdir(self._resDir)
+		else:
+			self._resDir = ""
+			self._resFileNames = []
+
+	def __len__(self) -> int:
+		if self._wordCount is None:
+			log.error("called len() on a reader which is not open")
+			return 0
+		return self._wordCount + len(self._resFileNames)
+
+	def __iter__(self) -> Iterator[EntryType]:
+		if not self._rootPath:
+			raise RuntimeError("iterating over a reader while it's not open")
+
+		wordCount = 0
+		nextPath = self._rootPath
+		while nextPath != "END":
+			wordCount += 1
+			# before or after reading word and defi
+			# (and skipping empty entry)? FIXME
+
+			with open(
+				join(self._filename, nextPath),
+				encoding=self._encoding,
+			) as _file:
+				header = _file.readline().rstrip()
+				if self._prev_link:
+					_prevPath, nextPath = header.split(" ")
+				else:
+					nextPath = header
+				word = _file.readline()
+				if not word:
+					yield None  # update progressbar
+					continue
+				defi = _file.read()
+				if not defi:
+					log.warning(
+						f"Edlin Reader: no definition for word {word!r}, skipping",
+					)
+					yield None  # update progressbar
+					continue
+				word = word.rstrip()
+				defi = defi.rstrip()
+
+			if self._glos.alts:
+				word = splitByBarUnescapeNTB(word)
+				if len(word) == 1:
+					word = word[0]
+			else:
+				word = unescapeNTB(word, bar=False)
+
+			# defi = unescapeNTB(defi)
+			yield self._glos.newEntry(word, defi)
+
+		if wordCount != self._wordCount:
+			log.warning(
+				f"{wordCount} words found, "
+				f"wordCount in info.json was {self._wordCount}",
+			)
+			self._wordCount = wordCount
+
+		resDir = self._resDir
+		for fname in self._resFileNames:
+			with open(join(resDir, fname), "rb") as _file:
+				yield self._glos.newDataEntry(
+					fname,
+					_file.read(),
+				)
diff --git a/pyglossary/plugins/edlin/writer.py b/pyglossary/plugins/edlin/writer.py
new file mode 100644
index 000000000..10b77b85a
--- /dev/null
+++ b/pyglossary/plugins/edlin/writer.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+import os
+from os.path import isdir, join
+from typing import TYPE_CHECKING
+
+from pyglossary.text_utils import (
+	escapeNTB,
+)
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+def makeDir(direc: str) -> None:
+	if not isdir(direc):
+		os.makedirs(direc)
+
+
+class Writer:
+	_encoding: str = "utf-8"
+	_prev_link: bool = True
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+
+	def finish(self) -> None:
+		self._clear()
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+		self._resDir = join(filename, "res")
+		os.makedirs(filename)
+		os.mkdir(self._resDir)
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._resDir = ""
+		self._encoding = "utf-8"
+		self._hashSet: set[str] = set()
+		# self._wordCount = None
+
+	@staticmethod
+	def hashToPath(h: str) -> str:
+		return h[:2] + "/" + h[2:]
+
+	def getEntryHash(self, entry: EntryType) -> str:
+		"""
+		Return hash string for given entry
+		don't call it twice for one entry, if you do you will get a
+		different hash string.
+		"""
+		from hashlib import sha1
+
+		hash_ = sha1(entry.s_word.encode("utf-8")).hexdigest()[:8]  # noqa: S324
+		if hash_ not in self._hashSet:
+			self._hashSet.add(hash_)
+			return hash_
+		index = 0
+		while True:
+			tmp_hash = hash_ + f"{index:x}"
+			if tmp_hash not in self._hashSet:
+				self._hashSet.add(tmp_hash)
+				return tmp_hash
+			index += 1
+
+	def saveEntry(
+		self,
+		thisEntry: EntryType,
+		thisHash: str,
+		prevHash: str | None,
+		nextHash: str | None,
+	) -> None:
+		dpath = join(self._filename, thisHash[:2])
+		makeDir(dpath)
+		with open(
+			join(dpath, thisHash[2:]),
+			"w",
+			encoding=self._encoding,
+		) as toFile:
+			nextPath = self.hashToPath(nextHash) if nextHash else "END"
+			if self._prev_link:
+				prevPath = self.hashToPath(prevHash) if prevHash else "START"
+				header = prevPath + " " + nextPath
+			else:
+				header = nextPath
+			toFile.write(
+				"\n".join(
+					[
+						header,
+						escapeNTB(thisEntry.s_word, bar=False),
+						thisEntry.defi,
+					],
+				),
+			)
+
+	def write(self) -> Generator[None, EntryType, None]:
+		from pyglossary.json_utils import dataToPrettyJson
+
+		thisEntry = yield
+		if thisEntry is None:
+			raise ValueError("glossary is empty")
+
+		count = 1
+		rootHash = thisHash = self.getEntryHash(thisEntry)
+		prevHash = None
+
+		while True:
+			nextEntry = yield
+			if nextEntry is None:
+				break
+			if nextEntry.isData():
+				nextEntry.save(self._resDir)
+				continue
+			nextHash = self.getEntryHash(nextEntry)
+			self.saveEntry(thisEntry, thisHash, prevHash, nextHash)
+			thisEntry = nextEntry
+			prevHash, thisHash = thisHash, nextHash
+			count += 1
+		self.saveEntry(thisEntry, thisHash, prevHash, None)
+
+		with open(
+			join(self._filename, "info.json"),
+			"w",
+			encoding=self._encoding,
+		) as toFile:
+			info = {}
+			info["name"] = self._glos.getInfo("name")
+			info["root"] = self.hashToPath(rootHash)
+			info["prev_link"] = self._prev_link
+			info["wordCount"] = count
+			# info["modified"] =
+
+			info |= self._glos.getExtraInfos(["name", "root", "prev_link", "wordCount"])
+
+			toFile.write(dataToPrettyJson(info))
diff --git a/pyglossary/plugins/gettext_po/__init__.py b/pyglossary/plugins/gettext_po/__init__.py
index 978b7c455..cd6dd9887 100644
--- a/pyglossary/plugins/gettext_po/__init__.py
+++ b/pyglossary/plugins/gettext_po/__init__.py
@@ -2,23 +2,13 @@
 
 from __future__ import annotations
 
-import os
-from os.path import isdir
-from typing import TYPE_CHECKING
-
-from pyglossary.core import exc_note, log, pip
-from pyglossary.io_utils import nullTextIO
 from pyglossary.option import (
 	BoolOption,
 	Option,
 )
-from pyglossary.text_utils import splitByBar
-
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Generator, Iterator
 
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .reader import Reader
+from .writer import Writer
 
 __all__ = [
 	"Reader",
@@ -52,166 +42,3 @@
 optionsProp: dict[str, Option] = {
 	"resources": BoolOption(comment="Enable resources / data files"),
 }
-
-
-class Reader:
-	depends = {
-		"polib": "polib",
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._alts = glos.alts
-		self.clear()
-
-	def clear(self) -> None:
-		self._filename = ""
-		self._file: io.TextIOBase = nullTextIO
-		self._wordCount: int | None = None
-		self._resDir = ""
-		self._resFileNames: list[str] = []
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-		self._file = open(filename, encoding="utf-8")
-		self._resDir = filename + "_res"
-		if isdir(self._resDir):
-			self._resFileNames = os.listdir(self._resDir)
-		else:
-			self._resDir = ""
-			self._resFileNames = []
-
-	def close(self) -> None:
-		self._file.close()
-		self._file = nullTextIO
-		self.clear()
-
-	def __len__(self) -> int:
-		from pyglossary.file_utils import fileCountLines
-
-		if self._wordCount is None:
-			log.debug("Try not to use len(reader) as it takes extra time")
-			self._wordCount = fileCountLines(
-				self._filename,
-				newline=b"\nmsgid",
-			)
-		return self._wordCount
-
-	def makeEntry(self, word: str, defi: str) -> EntryType:
-		if self._alts:
-			return self._glos.newEntry(splitByBar(word), defi)
-		return self._glos.newEntry(word, defi)
-
-	def __iter__(self) -> Iterator[EntryType]:  # noqa: PLR0912
-		try:
-			from polib import unescape as po_unescape
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install polib` to install")
-			raise
-
-		file = self._file
-
-		word = ""
-		defi = ""
-		msgstr = False
-		wordCount = 0
-		for line_ in file:
-			line = line_.strip()  # noqa: PLW2901
-			if not line:
-				continue
-			if line.startswith("#"):
-				continue
-			if line.startswith("msgid "):
-				if word:
-					yield self.makeEntry(word, defi)
-					wordCount += 1
-					word = ""
-					defi = ""
-				else:
-					pass
-					# TODO: parse defi and set glos info?
-					# but this should be done in self.open
-				word = po_unescape(line[6:])
-				if word.startswith('"'):
-					if len(word) < 2 or word[-1] != '"':
-						raise ValueError("invalid po line: line")
-					word = word[1:-1]
-				msgstr = False
-				continue
-			if line.startswith("msgstr "):
-				if msgstr:
-					log.error("msgid omitted!")
-				defi = po_unescape(line[7:])
-				if defi.startswith('"'):
-					if len(defi) < 2 or defi[-1] != '"':
-						raise ValueError("invalid po line: line")
-					defi = defi[1:-1]
-				msgstr = True
-				continue
-
-			line = po_unescape(line)
-			if line.startswith('"'):
-				if len(line) < 2 or line[-1] != '"':
-					raise ValueError("invalid po line: line")
-				line = line[1:-1]
-
-			if msgstr:
-				defi += line
-			else:
-				word += line
-		if word:
-			yield self.makeEntry(word, defi)
-			wordCount += 1
-		self._wordCount = wordCount
-
-
-class Writer:
-	depends = {
-		"polib": "polib",
-	}
-
-	_resources: bool = True
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._file: io.TextIOBase = nullTextIO
-		glos.preventDuplicateWords()
-
-	def open(self, filename: str) -> None:
-		try:
-			from polib import escape as po_escape
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install polib` to install")
-			raise
-
-		self._filename = filename
-		self._file = file = open(filename, mode="w", encoding="utf-8")
-		file.write('#\nmsgid ""\nmsgstr ""\n')
-		for key, value in self._glos.iterInfo():
-			file.write(f'"{po_escape(key)}: {po_escape(value)}\\n"\n')
-
-	def finish(self) -> None:
-		self._filename = ""
-		self._file.close()
-		self._file = nullTextIO
-
-	def write(self) -> Generator[None, EntryType, None]:
-		from polib import escape as po_escape
-
-		file = self._file
-
-		resources = self._resources
-		filename = self._filename
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				if resources:
-					entry.save(filename + "_res")
-				continue
-			file.write(
-				f'msgid "{po_escape(entry.s_word)}"\n'
-				f'msgstr "{po_escape(entry.defi)}"\n\n',
-			)
diff --git a/pyglossary/plugins/gettext_po/reader.py b/pyglossary/plugins/gettext_po/reader.py
new file mode 100644
index 000000000..126288488
--- /dev/null
+++ b/pyglossary/plugins/gettext_po/reader.py
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+import os
+from os.path import isdir
+from typing import TYPE_CHECKING
+
+from pyglossary.core import exc_note, log, pip
+from pyglossary.io_utils import nullTextIO
+from pyglossary.text_utils import splitByBar
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	depends = {
+		"polib": "polib",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._alts = glos.alts
+		self.clear()
+
+	def clear(self) -> None:
+		self._filename = ""
+		self._file: io.TextIOBase = nullTextIO
+		self._wordCount: int | None = None
+		self._resDir = ""
+		self._resFileNames: list[str] = []
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+		self._file = open(filename, encoding="utf-8")
+		self._resDir = filename + "_res"
+		if isdir(self._resDir):
+			self._resFileNames = os.listdir(self._resDir)
+		else:
+			self._resDir = ""
+			self._resFileNames = []
+
+	def close(self) -> None:
+		self._file.close()
+		self._file = nullTextIO
+		self.clear()
+
+	def __len__(self) -> int:
+		from pyglossary.file_utils import fileCountLines
+
+		if self._wordCount is None:
+			log.debug("Try not to use len(reader) as it takes extra time")
+			self._wordCount = fileCountLines(
+				self._filename,
+				newline=b"\nmsgid",
+			)
+		return self._wordCount
+
+	def makeEntry(self, word: str, defi: str) -> EntryType:
+		if self._alts:
+			return self._glos.newEntry(splitByBar(word), defi)
+		return self._glos.newEntry(word, defi)
+
+	def __iter__(self) -> Iterator[EntryType]:  # noqa: PLR0912
+		try:
+			from polib import unescape as po_unescape
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install polib` to install")
+			raise
+
+		file = self._file
+
+		word = ""
+		defi = ""
+		msgstr = False
+		wordCount = 0
+		for line_ in file:
+			line = line_.strip()  # noqa: PLW2901
+			if not line:
+				continue
+			if line.startswith("#"):
+				continue
+			if line.startswith("msgid "):
+				if word:
+					yield self.makeEntry(word, defi)
+					wordCount += 1
+					word = ""
+					defi = ""
+				else:
+					pass
+					# TODO: parse defi and set glos info?
+					# but this should be done in self.open
+				word = po_unescape(line[6:])
+				if word.startswith('"'):
+					if len(word) < 2 or word[-1] != '"':
+						raise ValueError("invalid po line: line")
+					word = word[1:-1]
+				msgstr = False
+				continue
+			if line.startswith("msgstr "):
+				if msgstr:
+					log.error("msgid omitted!")
+				defi = po_unescape(line[7:])
+				if defi.startswith('"'):
+					if len(defi) < 2 or defi[-1] != '"':
+						raise ValueError("invalid po line: line")
+					defi = defi[1:-1]
+				msgstr = True
+				continue
+
+			line = po_unescape(line)
+			if line.startswith('"'):
+				if len(line) < 2 or line[-1] != '"':
+					raise ValueError("invalid po line: line")
+				line = line[1:-1]
+
+			if msgstr:
+				defi += line
+			else:
+				word += line
+		if word:
+			yield self.makeEntry(word, defi)
+			wordCount += 1
+		self._wordCount = wordCount
diff --git a/pyglossary/plugins/gettext_po/writer.py b/pyglossary/plugins/gettext_po/writer.py
new file mode 100644
index 000000000..685a447ee
--- /dev/null
+++ b/pyglossary/plugins/gettext_po/writer.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pyglossary.core import exc_note, pip
+from pyglossary.io_utils import nullTextIO
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Writer:
+	depends = {
+		"polib": "polib",
+	}
+
+	_resources: bool = True
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._file: io.TextIOBase = nullTextIO
+		glos.preventDuplicateWords()
+
+	def open(self, filename: str) -> None:
+		try:
+			from polib import escape as po_escape
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install polib` to install")
+			raise
+
+		self._filename = filename
+		self._file = file = open(filename, mode="w", encoding="utf-8")
+		file.write('#\nmsgid ""\nmsgstr ""\n')
+		for key, value in self._glos.iterInfo():
+			file.write(f'"{po_escape(key)}: {po_escape(value)}\\n"\n')
+
+	def finish(self) -> None:
+		self._filename = ""
+		self._file.close()
+		self._file = nullTextIO
+
+	def write(self) -> Generator[None, EntryType, None]:
+		from polib import escape as po_escape
+
+		file = self._file
+
+		resources = self._resources
+		filename = self._filename
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				if resources:
+					entry.save(filename + "_res")
+				continue
+			file.write(
+				f'msgid "{po_escape(entry.s_word)}"\n'
+				f'msgstr "{po_escape(entry.defi)}"\n\n',
+			)
diff --git a/pyglossary/plugins/html_dir/__init__.py b/pyglossary/plugins/html_dir/__init__.py
index 8931a0697..d47850759 100644
--- a/pyglossary/plugins/html_dir/__init__.py
+++ b/pyglossary/plugins/html_dir/__init__.py
@@ -1,24 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import html
-import os
-import re
-import time
-from functools import lru_cache
-from os.path import isdir, isfile, join
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import (
-		EntryType,
-		GlossaryType,
-	)
-
-from pyglossary.core import log
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
@@ -26,10 +8,8 @@
 	Option,
 	StrOption,
 )
-from pyglossary.text_utils import (
-	escapeNTB,
-	unescapeNTB,
-)
+
+from .writer import Writer
 
 __all__ = [
 	"Writer",
@@ -80,469 +60,3 @@
 		comment="Add headwords title to beginning of definition",
 	),
 }
-
-nbsp = "\xa0"
-# nbsp = "&nbsp;"
-
-darkStyle = """
-body {{
-	background-color: #373737;
-	color: #eee;
-}}
-a {{ color: #aaaaff; }}
-a.broken {{ color: #e0c0c0; }}
-a.no_ul {{ text-decoration: none; }}
-b.headword {{ font-size: 1.5em; color: #c7ffb9; }}
-h1 {{ font-size: 1.5em; color: #c7ffb9;}}
-h2 {{ font-size: 1.3em;}}
-h3 {{ font-size: 1.0em;}}
-h4 {{ font-size: 1.0em;}}
-h5 {{ font-size: 1.0em;}}
-h6 {{ font-size: 1.0em;}}
-"""
-
-
-class Writer:
-	_encoding: str = "utf-8"
-	_resources: bool = True
-	_max_file_size: int = 102400
-	_filename_format: str = "{n:05d}.html"
-	_escape_defi: bool = False
-	_dark: bool = True
-	_css: str = ""
-	_word_title: bool = True
-
-	@staticmethod
-	def stripFullHtmlError(entry: EntryType, error: str) -> None:
-		log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}")
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._fileObj: io.IOBase | None = None
-		self._encoding = "utf-8"
-		self._filename_format = "{n:05d}.html"
-		self._tail = "</body></html>"
-		self._filenameList: list[str] = []
-		glos.stripFullHtml(errorHandler=self.stripFullHtmlError)
-
-		self._resSrcPattern = re.compile(' src="([^"]*)"')
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-		self._resDir = resDir = join(filename, "res")
-		if not isdir(filename):
-			os.mkdir(filename)
-		if not isdir(resDir):
-			os.mkdir(resDir)
-		if self._css:
-			self.copyCSS(self._css)
-
-	def copyCSS(self, cssPath: str) -> None:
-		import shutil
-
-		shutil.copy(cssPath, join(self._filename, "style.css"))
-
-	def finish(self) -> None:
-		pass
-
-	def getNextFilename(self) -> str:
-		return self._filename_format.format(
-			n=len(self._filenameList),
-		)
-
-	def nextFile(self) -> io.TextIOBase:
-		if self._fileObj:
-			self._fileObj.write(self._tail)
-			self._fileObj.close()
-		filename = self.getNextFilename()
-		self._filenameList.append(filename)
-		self._fileObj = open(
-			join(
-				self._filename,
-				filename,
-			),
-			mode="w",
-			encoding=self._encoding,
-		)
-		return self._fileObj
-
-	def fixLinks(self, linkTargetSet: set[str]) -> None:  # noqa: PLR0912
-		import gc
-
-		gc.collect()
-		dirn = self._filename
-
-		filenameList = self._filenameList
-
-		fileByWord: dict[str, list[tuple[str, int]]] = {}
-		for line in open(join(dirn, "index.txt"), encoding="utf-8"):
-			line = line.rstrip("\n")  # noqa: PLW2901
-			if not line:
-				continue
-			entryIndexStr, wordEsc, filename, _ = line.split("\t")
-			entryIndex = int(entryIndexStr)
-			# entryId = f"entry{entryIndex}"
-			word = unescapeNTB(wordEsc)
-			if word not in linkTargetSet:
-				continue
-			if word in fileByWord:
-				fileByWord[word].append((filename, entryIndex))
-			else:
-				fileByWord[word] = [(filename, entryIndex)]
-
-		# with open(join(dirn, "fileByWord.json"), "w") as fileByWordFile:
-		# 	json.dump(fileByWord, fileByWordFile, ensure_ascii=False, indent="\t")
-
-		@lru_cache(maxsize=10)
-		def getLinksByFile(fileIndex: int) -> io.TextIOBase:
-			return open(
-				join(dirn, f"links{fileIndex}"),
-				mode="a",
-				encoding="utf-8",
-			)
-
-		log.info("")
-		for line in open(join(dirn, "links.txt"), encoding="utf-8"):
-			line = line.rstrip("\n")  # noqa: PLW2901
-			if not line:
-				continue
-			target, fileIndexStr, x_start, x_size = line.split("\t")
-			target = unescapeNTB(target)
-			if target not in fileByWord:
-				targetNew = ""
-			else:
-				targetFilename, targetEntryIndex = fileByWord[target][0]
-				if targetFilename == filename:
-					continue
-				targetNew = f"{targetFilename}#entry{targetEntryIndex}"
-			file = getLinksByFile(int(fileIndexStr))
-			file.write(
-				f"{x_start}\t{x_size}\t{targetNew}\n",
-			)
-			file.flush()
-
-		linkTargetSet.clear()
-		del fileByWord, linkTargetSet
-		gc.collect()
-
-		if os.sep == "\\":
-			time.sleep(0.1)
-
-		entry_url_fmt = self._glos.getInfo("entry_url")
-
-		re_href = re.compile(
-			b' href="[^<>"]*?"',
-			re.IGNORECASE,
-		)
-
-		for fileIndex, filename in enumerate(filenameList):
-			if not isfile(join(dirn, f"links{fileIndex}")):
-				continue
-			with open(join(dirn, filename), mode="rb") as inFile:
-				with open(join(dirn, f"{filename}.new"), mode="wb") as outFile:
-					for linkLine in open(join(dirn, f"links{fileIndex}"), "rb"):
-						outFile.flush()
-						(
-							b_x_start,
-							b_x_size,
-							b_target,
-						) = linkLine.rstrip(b"\n").split(b"\t")
-						outFile.write(
-							inFile.read(
-								int(b_x_start, 16) - inFile.tell(),
-							),
-						)
-						curLink = inFile.read(int(b_x_size, 16))
-
-						if b_target:
-							outFile.write(
-								re_href.sub(
-									b' href="./' + b_target + b'"',
-									curLink,
-								),
-							)
-							continue
-
-						if not entry_url_fmt:
-							outFile.write(
-								curLink.replace(
-									b' href="#',
-									b' class="broken" href="#',
-								),
-							)
-							continue
-
-						st = curLink.decode("utf-8")
-						i = st.find('href="#')
-						j = st.find('"', i + 7)
-						word = st[i + 7 : j]
-						url = entry_url_fmt.format(word=word)
-						outFile.write(
-							(
-								st[:i] + f'class="broken" href="{url}"' + st[j + 1 :]
-							).encode("utf-8"),
-						)
-
-					outFile.write(inFile.read())
-
-			os.remove(join(dirn, filename))
-			os.rename(join(dirn, f"{filename}.new"), join(dirn, filename))
-			os.remove(join(dirn, f"links{fileIndex}"))
-
-	def writeInfo(self, filename: str, header: str) -> None:
-		glos = self._glos
-		title = glos.getInfo("name")
-		customStyle = (
-			"table, th, td {border: 1px solid black; "
-			"border-collapse: collapse; padding: 5px;}"
-		)
-		infoHeader = header.format(
-			pageTitle=f"Info: {title}",
-			customStyle=customStyle,
-		)
-		with open(
-			join(filename, "info.html"),
-			mode="w",
-			encoding=self._encoding,
-		) as _file:
-			_file.write(
-				infoHeader + "<table>"
-				"<tr>"
-				'<th width="%10">Key</th>'
-				'<th width="%90">Value</th>'
-				"</tr>\n",
-			)
-			for key, value in glos.iterInfo():
-				_file.write(
-					f"<tr><td>{key}</td><td>{value}</td></tr>\n",
-				)
-			_file.write("</table></body></html>")
-
-	@staticmethod
-	def _subResSrc(m: re.Match) -> str:
-		url = m.group(1)
-		if "://" in url:
-			return m.group(0)
-		url = "res/" + url
-		return f' src="{url}"'
-
-	def write(self) -> Generator[None, EntryType, None]:  # noqa: PLR0912
-		encoding = self._encoding
-		resources = self._resources
-		max_file_size = self._max_file_size
-		filename_format = self._filename_format
-		escape_defi = self._escape_defi
-
-		wordSep = ' <font color="red">|</font> '
-
-		initFileSizeMax = 100
-
-		glos = self._glos
-
-		filename = self._filename
-		self._encoding = encoding
-		self._filename_format = filename_format
-
-		entry_url_fmt = glos.getInfo("entry_url")
-
-		def getEntryWebLink(entry: EntryType) -> str:
-			if not entry_url_fmt:
-				return ""
-			url = entry_url_fmt.format(word=html.escape(entry.l_word[0]))
-			return f'{nbsp}<a class="no_ul" href="{url}">&#127759;</a>'
-
-		# from math import log2, ceil
-		# maxPosHexLen = int(ceil(log2(max_file_size) / 4))
-
-		indexTxtFileObj = open(
-			join(filename, "index.txt"),
-			mode="w",
-			encoding="utf-8",
-		)
-		linksTxtFileObj = open(
-			join(filename, "links.txt"),
-			mode="w",
-			encoding="utf-8",
-		)
-
-		title = glos.getInfo("name")
-		style = ""
-		if self._dark:
-			style = darkStyle
-
-		cssLink = '<link rel="stylesheet" href="style.css" />' if self._css else ""
-
-		header = (
-			"<!DOCTYPE html>\n"
-			"<html><head>"
-			"<title>{pageTitle}</title>"
-			f'<meta charset="{encoding}">'
-			f'<style type="text/css">{style}{{customStyle}}</style>{cssLink}'
-			"</meta></head><body>\n"
-		)
-
-		def pageHeader(n: int) -> str:
-			return header.format(
-				pageTitle=f"Page {n} of {title}",
-				customStyle="",
-			)
-
-		def navBar() -> str:
-			links: list[str] = []
-			if len(self._filenameList) > 1:
-				links.append(f'<a href="./{self._filenameList[-2]}">&#9664;</a>')
-			links.extend(
-				[
-					f'<a href="./{self.getNextFilename()}">&#9654;</a>',
-					'<a href="./info.html">ℹ️</a></div>',  # noqa: RUF001
-				],
-			)
-			return (
-				'<nav style="text-align: center; font-size: 2.5em;">'
-				+ f"{nbsp}{nbsp}{nbsp}".join(links)
-				+ "</nav>"
-			)
-
-		tailSize = len(self._tail.encode(encoding))
-
-		if max_file_size < len(header) + tailSize:
-			raise ValueError(f"{max_file_size=} is too small")
-
-		max_file_size -= tailSize
-
-		if not isdir(self._filename):
-			os.mkdir(self._filename)
-
-		fileObj = self.nextFile()
-		fileObj.write(pageHeader(0))
-		fileObj.write(navBar())
-
-		re_fixed_link = re.compile(
-			r'<a (?:[^<>]*? )?href="#([^<>"]+?)">[^<>]+?</a>',
-			re.IGNORECASE,
-		)
-
-		linkTargetSet = set()
-
-		def replaceBword(text: str) -> str:
-			return text.replace(
-				' href="bword://',
-				' href="#',
-			)
-
-		def addLinks(text: str, pos: int) -> None:
-			for m in re_fixed_link.finditer(text):
-				if ' class="entry_link"' in m.group(0):
-					continue
-				if m.group(0).count("href=") != 1:
-					log.error(f"unexpected match: {m.group(0)}")
-				target = html.unescape(m.group(1))
-				linkTargetSet.add(target)
-				start = m.start()
-				b_start = len(text[:start].encode(encoding))
-				b_size = len(text[start : m.end()].encode(encoding))
-				linksTxtFileObj.write(
-					f"{escapeNTB(target)}\t"
-					f"{len(self._filenameList) - 1}\t"
-					f"{pos + b_start:x}\t"
-					f"{b_size:x}\n",
-				)
-				linksTxtFileObj.flush()
-
-		self.writeInfo(filename, header)
-
-		word_title = self._word_title
-
-		resDir = self._resDir
-		entryIndex = -1
-		while True:
-			entryIndex += 1
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				if resources:
-					entry.save(resDir)
-				continue
-
-			entry.detectDefiFormat()
-			defi = entry.defi
-			defiFormat = entry.defiFormat
-
-			if defi.startswith("<!DOCTYPE html>") and defiFormat != "h":
-				log.error(f"bad {defiFormat=}")
-				defiFormat = "h"
-
-			if defiFormat == "m":
-				defi = html.escape(defi)
-				if "\n" in defi:
-					# could be markdown or unformatted plaintext
-					# FIXME: this changes the font to a monospace
-					defi = f"<pre>{defi}</pre>"
-			elif defiFormat == "h":
-				defi = self._resSrcPattern.sub(self._subResSrc, defi)
-				if escape_defi:
-					defi = html.escape(defi)
-
-			entryId = f"entry{entryIndex}"
-
-			if word_title:
-				words = [html.escape(word) for word in entry.l_word]
-				title = glos.wordTitleStr(
-					wordSep.join(words),
-					sample=entry.l_word[0],
-					class_="headword",
-				)
-
-			if not title:
-				title = f"Entry {entryIndex}"
-
-			# entry_link_sym = "&#182;"
-			entry_link_sym = "&#128279;"
-			text = (
-				f'<div id="{entryId}">{title}{nbsp}{nbsp}'
-				f'<a class="no_ul" class="entry_link" href="#{entryId}">'
-				f"{entry_link_sym}</a>"
-				f"{getEntryWebLink(entry)}"
-				f"<br>\n{defi}"
-				"</div>\n"
-				"<hr>\n"
-			)
-			pos = fileObj.tell()
-			if pos > initFileSizeMax and pos > max_file_size - len(
-				text.encode(encoding),
-			):
-				fileObj = self.nextFile()
-				fileObj.write(
-					pageHeader(
-						len(self._filenameList) - 1,
-					),
-				)
-				fileObj.write(navBar())
-			pos = fileObj.tell()
-			tmpFilename = escapeNTB(self._filenameList[-1])
-			for word in entry.l_word:
-				indexTxtFileObj.write(
-					f"{entryIndex}\t"
-					f"{escapeNTB(word)}\t"
-					f"{tmpFilename}\t"
-					f"{pos}\n",
-				)
-			del tmpFilename
-			text = replaceBword(text)
-			addLinks(text, pos)
-			fileObj.write(text)
-
-		fileObj.close()
-		self._fileObj = None
-		indexTxtFileObj.close()
-
-		linksTxtFileObj.close()
-
-		if linkTargetSet:
-			log.info(f"{len(linkTargetSet)} link targets found")
-			log.info("Fixing links, please wait...")
-			self.fixLinks(linkTargetSet)
-
-		os.remove(join(filename, "links.txt"))
diff --git a/pyglossary/plugins/html_dir/writer.py b/pyglossary/plugins/html_dir/writer.py
new file mode 100644
index 000000000..6451f09ce
--- /dev/null
+++ b/pyglossary/plugins/html_dir/writer.py
@@ -0,0 +1,491 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import html
+import os
+import re
+import time
+from functools import lru_cache
+from os.path import isdir, isfile, join
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import (
+		EntryType,
+		GlossaryType,
+	)
+
+from pyglossary.core import log
+from pyglossary.text_utils import (
+	escapeNTB,
+	unescapeNTB,
+)
+
+nbsp = "\xa0"
+# nbsp = "&nbsp;"
+
+darkStyle = """
+body {{
+	background-color: #373737;
+	color: #eee;
+}}
+a {{ color: #aaaaff; }}
+a.broken {{ color: #e0c0c0; }}
+a.no_ul {{ text-decoration: none; }}
+b.headword {{ font-size: 1.5em; color: #c7ffb9; }}
+h1 {{ font-size: 1.5em; color: #c7ffb9;}}
+h2 {{ font-size: 1.3em;}}
+h3 {{ font-size: 1.0em;}}
+h4 {{ font-size: 1.0em;}}
+h5 {{ font-size: 1.0em;}}
+h6 {{ font-size: 1.0em;}}
+"""
+
+
+class Writer:
+	_encoding: str = "utf-8"
+	_resources: bool = True
+	_max_file_size: int = 102400
+	_filename_format: str = "{n:05d}.html"
+	_escape_defi: bool = False
+	_dark: bool = True
+	_css: str = ""
+	_word_title: bool = True
+
+	@staticmethod
+	def stripFullHtmlError(entry: EntryType, error: str) -> None:
+		log.error(f"error in stripFullHtml: {error}, words={entry.l_word!r}")
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._fileObj: io.IOBase | None = None
+		self._encoding = "utf-8"
+		self._filename_format = "{n:05d}.html"
+		self._tail = "</body></html>"
+		self._filenameList: list[str] = []
+		glos.stripFullHtml(errorHandler=self.stripFullHtmlError)
+
+		self._resSrcPattern = re.compile(' src="([^"]*)"')
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+		self._resDir = resDir = join(filename, "res")
+		if not isdir(filename):
+			os.mkdir(filename)
+		if not isdir(resDir):
+			os.mkdir(resDir)
+		if self._css:
+			self.copyCSS(self._css)
+
+	def copyCSS(self, cssPath: str) -> None:
+		import shutil
+
+		shutil.copy(cssPath, join(self._filename, "style.css"))
+
+	def finish(self) -> None:
+		pass
+
+	def getNextFilename(self) -> str:
+		return self._filename_format.format(
+			n=len(self._filenameList),
+		)
+
+	def nextFile(self) -> io.TextIOBase:
+		if self._fileObj:
+			self._fileObj.write(self._tail)
+			self._fileObj.close()
+		filename = self.getNextFilename()
+		self._filenameList.append(filename)
+		self._fileObj = open(
+			join(
+				self._filename,
+				filename,
+			),
+			mode="w",
+			encoding=self._encoding,
+		)
+		return self._fileObj
+
+	def fixLinks(self, linkTargetSet: set[str]) -> None:  # noqa: PLR0912
+		import gc
+
+		gc.collect()
+		dirn = self._filename
+
+		filenameList = self._filenameList
+
+		fileByWord: dict[str, list[tuple[str, int]]] = {}
+		for line in open(join(dirn, "index.txt"), encoding="utf-8"):
+			line = line.rstrip("\n")  # noqa: PLW2901
+			if not line:
+				continue
+			entryIndexStr, wordEsc, filename, _ = line.split("\t")
+			entryIndex = int(entryIndexStr)
+			# entryId = f"entry{entryIndex}"
+			word = unescapeNTB(wordEsc)
+			if word not in linkTargetSet:
+				continue
+			if word in fileByWord:
+				fileByWord[word].append((filename, entryIndex))
+			else:
+				fileByWord[word] = [(filename, entryIndex)]
+
+		# with open(join(dirn, "fileByWord.json"), "w") as fileByWordFile:
+		# 	json.dump(fileByWord, fileByWordFile, ensure_ascii=False, indent="\t")
+
+		@lru_cache(maxsize=10)
+		def getLinksByFile(fileIndex: int) -> io.TextIOBase:
+			return open(
+				join(dirn, f"links{fileIndex}"),
+				mode="a",
+				encoding="utf-8",
+			)
+
+		log.info("")
+		for line in open(join(dirn, "links.txt"), encoding="utf-8"):
+			line = line.rstrip("\n")  # noqa: PLW2901
+			if not line:
+				continue
+			target, fileIndexStr, x_start, x_size = line.split("\t")
+			target = unescapeNTB(target)
+			if target not in fileByWord:
+				targetNew = ""
+			else:
+				targetFilename, targetEntryIndex = fileByWord[target][0]
+				if targetFilename == filename:
+					continue
+				targetNew = f"{targetFilename}#entry{targetEntryIndex}"
+			file = getLinksByFile(int(fileIndexStr))
+			file.write(
+				f"{x_start}\t{x_size}\t{targetNew}\n",
+			)
+			file.flush()
+
+		linkTargetSet.clear()
+		del fileByWord, linkTargetSet
+		gc.collect()
+
+		if os.sep == "\\":
+			time.sleep(0.1)
+
+		entry_url_fmt = self._glos.getInfo("entry_url")
+
+		re_href = re.compile(
+			b' href="[^<>"]*?"',
+			re.IGNORECASE,
+		)
+
+		for fileIndex, filename in enumerate(filenameList):
+			if not isfile(join(dirn, f"links{fileIndex}")):
+				continue
+			with open(join(dirn, filename), mode="rb") as inFile:
+				with open(join(dirn, f"{filename}.new"), mode="wb") as outFile:
+					for linkLine in open(join(dirn, f"links{fileIndex}"), "rb"):
+						outFile.flush()
+						(
+							b_x_start,
+							b_x_size,
+							b_target,
+						) = linkLine.rstrip(b"\n").split(b"\t")
+						outFile.write(
+							inFile.read(
+								int(b_x_start, 16) - inFile.tell(),
+							),
+						)
+						curLink = inFile.read(int(b_x_size, 16))
+
+						if b_target:
+							outFile.write(
+								re_href.sub(
+									b' href="./' + b_target + b'"',
+									curLink,
+								),
+							)
+							continue
+
+						if not entry_url_fmt:
+							outFile.write(
+								curLink.replace(
+									b' href="#',
+									b' class="broken" href="#',
+								),
+							)
+							continue
+
+						st = curLink.decode("utf-8")
+						i = st.find('href="#')
+						j = st.find('"', i + 7)
+						word = st[i + 7 : j]
+						url = entry_url_fmt.format(word=word)
+						outFile.write(
+							(
+								st[:i] + f'class="broken" href="{url}"' + st[j + 1 :]
+							).encode("utf-8"),
+						)
+
+					outFile.write(inFile.read())
+
+			os.remove(join(dirn, filename))
+			os.rename(join(dirn, f"{filename}.new"), join(dirn, filename))
+			os.remove(join(dirn, f"links{fileIndex}"))
+
+	def writeInfo(self, filename: str, header: str) -> None:
+		glos = self._glos
+		title = glos.getInfo("name")
+		customStyle = (
+			"table, th, td {border: 1px solid black; "
+			"border-collapse: collapse; padding: 5px;}"
+		)
+		infoHeader = header.format(
+			pageTitle=f"Info: {title}",
+			customStyle=customStyle,
+		)
+		with open(
+			join(filename, "info.html"),
+			mode="w",
+			encoding=self._encoding,
+		) as _file:
+			_file.write(
+				infoHeader + "<table>"
+				"<tr>"
+				'<th width="%10">Key</th>'
+				'<th width="%90">Value</th>'
+				"</tr>\n",
+			)
+			for key, value in glos.iterInfo():
+				_file.write(
+					f"<tr><td>{key}</td><td>{value}</td></tr>\n",
+				)
+			_file.write("</table></body></html>")
+
+	@staticmethod
+	def _subResSrc(m: re.Match) -> str:
+		url = m.group(1)
+		if "://" in url:
+			return m.group(0)
+		url = "res/" + url
+		return f' src="{url}"'
+
+	def write(self) -> Generator[None, EntryType, None]:  # noqa: PLR0912
+		encoding = self._encoding
+		resources = self._resources
+		max_file_size = self._max_file_size
+		filename_format = self._filename_format
+		escape_defi = self._escape_defi
+
+		wordSep = ' <font color="red">|</font> '
+
+		initFileSizeMax = 100
+
+		glos = self._glos
+
+		filename = self._filename
+		self._encoding = encoding
+		self._filename_format = filename_format
+
+		entry_url_fmt = glos.getInfo("entry_url")
+
+		def getEntryWebLink(entry: EntryType) -> str:
+			if not entry_url_fmt:
+				return ""
+			url = entry_url_fmt.format(word=html.escape(entry.l_word[0]))
+			return f'{nbsp}<a class="no_ul" href="{url}">&#127759;</a>'
+
+		# from math import log2, ceil
+		# maxPosHexLen = int(ceil(log2(max_file_size) / 4))
+
+		indexTxtFileObj = open(
+			join(filename, "index.txt"),
+			mode="w",
+			encoding="utf-8",
+		)
+		linksTxtFileObj = open(
+			join(filename, "links.txt"),
+			mode="w",
+			encoding="utf-8",
+		)
+
+		title = glos.getInfo("name")
+		style = ""
+		if self._dark:
+			style = darkStyle
+
+		cssLink = '<link rel="stylesheet" href="style.css" />' if self._css else ""
+
+		header = (
+			"<!DOCTYPE html>\n"
+			"<html><head>"
+			"<title>{pageTitle}</title>"
+			f'<meta charset="{encoding}">'
+			f'<style type="text/css">{style}{{customStyle}}</style>{cssLink}'
+			"</meta></head><body>\n"
+		)
+
+		def pageHeader(n: int) -> str:
+			return header.format(
+				pageTitle=f"Page {n} of {title}",
+				customStyle="",
+			)
+
+		def navBar() -> str:
+			links: list[str] = []
+			if len(self._filenameList) > 1:
+				links.append(f'<a href="./{self._filenameList[-2]}">&#9664;</a>')
+			links.extend(
+				[
+					f'<a href="./{self.getNextFilename()}">&#9654;</a>',
+					'<a href="./info.html">ℹ️</a></div>',  # noqa: RUF001
+				],
+			)
+			return (
+				'<nav style="text-align: center; font-size: 2.5em;">'
+				+ f"{nbsp}{nbsp}{nbsp}".join(links)
+				+ "</nav>"
+			)
+
+		tailSize = len(self._tail.encode(encoding))
+
+		if max_file_size < len(header) + tailSize:
+			raise ValueError(f"{max_file_size=} is too small")
+
+		max_file_size -= tailSize
+
+		if not isdir(self._filename):
+			os.mkdir(self._filename)
+
+		fileObj = self.nextFile()
+		fileObj.write(pageHeader(0))
+		fileObj.write(navBar())
+
+		re_fixed_link = re.compile(
+			r'<a (?:[^<>]*? )?href="#([^<>"]+?)">[^<>]+?</a>',
+			re.IGNORECASE,
+		)
+
+		linkTargetSet = set()
+
+		def replaceBword(text: str) -> str:
+			return text.replace(
+				' href="bword://',
+				' href="#',
+			)
+
+		def addLinks(text: str, pos: int) -> None:
+			for m in re_fixed_link.finditer(text):
+				if ' class="entry_link"' in m.group(0):
+					continue
+				if m.group(0).count("href=") != 1:
+					log.error(f"unexpected match: {m.group(0)}")
+				target = html.unescape(m.group(1))
+				linkTargetSet.add(target)
+				start = m.start()
+				b_start = len(text[:start].encode(encoding))
+				b_size = len(text[start : m.end()].encode(encoding))
+				linksTxtFileObj.write(
+					f"{escapeNTB(target)}\t"
+					f"{len(self._filenameList) - 1}\t"
+					f"{pos + b_start:x}\t"
+					f"{b_size:x}\n",
+				)
+				linksTxtFileObj.flush()
+
+		self.writeInfo(filename, header)
+
+		word_title = self._word_title
+
+		resDir = self._resDir
+		entryIndex = -1
+		while True:
+			entryIndex += 1
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				if resources:
+					entry.save(resDir)
+				continue
+
+			entry.detectDefiFormat()
+			defi = entry.defi
+			defiFormat = entry.defiFormat
+
+			if defi.startswith("<!DOCTYPE html>") and defiFormat != "h":
+				log.error(f"bad {defiFormat=}")
+				defiFormat = "h"
+
+			if defiFormat == "m":
+				defi = html.escape(defi)
+				if "\n" in defi:
+					# could be markdown or unformatted plaintext
+					# FIXME: this changes the font to a monospace
+					defi = f"<pre>{defi}</pre>"
+			elif defiFormat == "h":
+				defi = self._resSrcPattern.sub(self._subResSrc, defi)
+				if escape_defi:
+					defi = html.escape(defi)
+
+			entryId = f"entry{entryIndex}"
+
+			if word_title:
+				words = [html.escape(word) for word in entry.l_word]
+				title = glos.wordTitleStr(
+					wordSep.join(words),
+					sample=entry.l_word[0],
+					class_="headword",
+				)
+
+			if not title:
+				title = f"Entry {entryIndex}"
+
+			# entry_link_sym = "&#182;"
+			entry_link_sym = "&#128279;"
+			text = (
+				f'<div id="{entryId}">{title}{nbsp}{nbsp}'
+				f'<a class="no_ul" class="entry_link" href="#{entryId}">'
+				f"{entry_link_sym}</a>"
+				f"{getEntryWebLink(entry)}"
+				f"<br>\n{defi}"
+				"</div>\n"
+				"<hr>\n"
+			)
+			pos = fileObj.tell()
+			if pos > initFileSizeMax and pos > max_file_size - len(
+				text.encode(encoding),
+			):
+				fileObj = self.nextFile()
+				fileObj.write(
+					pageHeader(
+						len(self._filenameList) - 1,
+					),
+				)
+				fileObj.write(navBar())
+			pos = fileObj.tell()
+			tmpFilename = escapeNTB(self._filenameList[-1])
+			for word in entry.l_word:
+				indexTxtFileObj.write(
+					f"{entryIndex}\t"
+					f"{escapeNTB(word)}\t"
+					f"{tmpFilename}\t"
+					f"{pos}\n",
+				)
+			del tmpFilename
+			text = replaceBword(text)
+			addLinks(text, pos)
+			fileObj.write(text)
+
+		fileObj.close()
+		self._fileObj = None
+		indexTxtFileObj.close()
+
+		linksTxtFileObj.close()
+
+		if linkTargetSet:
+			log.info(f"{len(linkTargetSet)} link targets found")
+			log.info("Fixing links, please wait...")
+			self.fixLinks(linkTargetSet)
+
+		os.remove(join(filename, "links.txt"))
diff --git a/pyglossary/plugins/info_plugin/__init__.py b/pyglossary/plugins/info_plugin/__init__.py
index 8c4852ae0..57f4cc719 100644
--- a/pyglossary/plugins/info_plugin/__init__.py
+++ b/pyglossary/plugins/info_plugin/__init__.py
@@ -6,13 +6,9 @@
 
 from pyglossary.info_writer import InfoWriter as Writer
 
-if TYPE_CHECKING:
-	from collections.abc import Iterator
+from .reader import Reader
 
-	from pyglossary.glossary_types import (
-		EntryType,
-		GlossaryType,
-	)
+if TYPE_CHECKING:
 	from pyglossary.option import Option
 
 __all__ = [
@@ -44,25 +40,3 @@
 
 # key is option/argument name, value is instance of Option
 optionsProp: dict[str, Option] = {}
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-
-	def close(self) -> None:
-		pass
-
-	def open(self, filename: str) -> None:
-		from pyglossary.json_utils import jsonToData
-
-		with open(filename, encoding="utf-8") as infoFp:
-			info = jsonToData(infoFp.read())
-		for key, value in info.items():
-			self._glos.setInfo(key, value)
-
-	def __len__(self) -> int:
-		return 0
-
-	def __iter__(self) -> Iterator[EntryType | None]:
-		yield None
diff --git a/pyglossary/plugins/info_plugin/reader.py b/pyglossary/plugins/info_plugin/reader.py
new file mode 100644
index 000000000..f8c212230
--- /dev/null
+++ b/pyglossary/plugins/info_plugin/reader.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import (
+		EntryType,
+		GlossaryType,
+	)
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+
+	def close(self) -> None:
+		pass
+
+	def open(self, filename: str) -> None:
+		from pyglossary.json_utils import jsonToData
+
+		with open(filename, encoding="utf-8") as infoFp:
+			info = jsonToData(infoFp.read())
+		assert isinstance(info, dict)
+		for key, value in info.items():
+			self._glos.setInfo(key, value)
+
+	def __len__(self) -> int:
+		return 0
+
+	def __iter__(self) -> Iterator[EntryType | None]:
+		yield None
diff --git a/pyglossary/plugins/jmdict/__init__.py b/pyglossary/plugins/jmdict/__init__.py
index e5f88f31c..de0297912 100644
--- a/pyglossary/plugins/jmdict/__init__.py
+++ b/pyglossary/plugins/jmdict/__init__.py
@@ -1,28 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import os
-import re
-import unicodedata
-from io import BytesIO
-from typing import TYPE_CHECKING, cast
-
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Callable, Iterator
-
-	from pyglossary.glossary_types import (
-		EntryType,
-		GlossaryType,
-	)
-	from pyglossary.lxml_types import Element, T_htmlfile
-
-from pyglossary.compression import (
-	compressionOpen,
-	stdCompressions,
-)
-from pyglossary.core import exc_note, pip
-from pyglossary.io_utils import nullBinaryIO
 from pyglossary.option import (
 	BoolOption,
 	IntOption,
@@ -30,6 +8,8 @@
 	StrOption,
 )
 
+from .reader import Reader
+
 __all__ = [
 	"Reader",
 	"description",
@@ -69,395 +49,3 @@
 		comment="Add translitation (romaji) of keywords",
 	),
 }
-
-
-class Reader:
-	compressions = stdCompressions
-	depends = {
-		"lxml": "lxml",
-	}
-
-	_example_padding: int = 10
-	_example_color: str = ""
-	# _example_color: str = "#008FE1"
-	_translitation: bool = False
-
-	tagStyle = (
-		"color:white;"
-		"background:green;"
-		"padding-left:3px;"
-		"padding-right:3px;"
-		"border-radius:0.5ex;"
-		# 0.5ex ~= 0.3em, but "ex" is recommended
-	)
-
-	gikun_key = "gikun (meaning as reading) or jukujikun (special kanji reading)"
-	re_inf_mapping = {
-		gikun_key: "gikun/jukujikun",
-		"out-dated or obsolete kana usage": "obsolete",  # outdated/obsolete
-		"word containing irregular kana usage": "irregular",
-	}
-
-	@staticmethod
-	def makeList(
-		hf: T_htmlfile,
-		input_objects: list[Element],
-		processor: Callable,
-		single_prefix: str = "",
-		skip_single: bool = True,
-	) -> None:
-		"""Wrap elements into <ol> if more than one element."""
-		if not input_objects:
-			return
-
-		if skip_single and len(input_objects) == 1:
-			hf.write(single_prefix)
-			processor(hf, input_objects[0])
-			return
-
-		with hf.element("ol"):
-			for el in input_objects:
-				with hf.element("li"):
-					processor(hf, el)
-
-	# TODO: break it down
-	# PLR0912 Too many branches (23 > 12)
-	def writeSense(  # noqa: PLR0912
-		self,
-		hf: T_htmlfile,
-		sense: Element,
-	) -> None:
-		from lxml import etree as ET
-
-		def br() -> Element:
-			return ET.Element("br")
-
-		for elem in sense.findall("pos"):
-			if not elem.text:
-				continue
-			desc = elem.text
-			if desc == "unclassified":
-				continue
-			with hf.element("i"):
-				hf.write(desc.capitalize())
-			hf.write(br())
-
-		glossList = [elem.text.strip() for elem in sense.findall("gloss") if elem.text]
-		if glossList:
-			for i, gloss in enumerate(glossList):
-				if i > 0:
-					hf.write(", ")
-				hf.write(gloss)
-			hf.write(br())
-
-		relatedWords: list[str] = []
-		for elem in sense.findall("xref"):
-			if not elem.text:
-				continue
-			word = elem.text.strip()
-			word = self._link_number_postfix.sub("", word)
-			relatedWords.append(word)
-
-		if relatedWords:
-			hf.write("Related: ")
-			for i, word in enumerate(relatedWords):
-				if i > 0:
-					with hf.element("big"):
-						hf.write(" | ")
-				with hf.element("a", href=f"bword://{word}"):
-					hf.write(word)
-			hf.write(br())
-
-		antonymWords: list[str] = []
-		for elem in sense.findall("ant"):
-			if not elem.text:
-				continue
-			word = elem.text.strip()
-			word = self._link_number_postfix.sub("", word)
-			antonymWords.append(word)
-		if antonymWords:
-			hf.write("Antonym: ")
-			for i, word in enumerate(antonymWords):
-				if i > 0:
-					with hf.element("big"):
-						hf.write(" | ")
-				with hf.element(
-					"a",
-					href=f"bword://{word}",
-					attrib={"class": "antonym"},
-				):
-					hf.write(word)
-			hf.write(br())
-
-		for i, elem in enumerate(sense.findall("field")):
-			if not elem.text:
-				continue
-			if i > 0:
-				hf.write(" ")
-			desc = elem.text
-			with hf.element("span", style=self.tagStyle):
-				hf.write(desc)
-			hf.write(br())
-
-		for i, elem in enumerate(sense.findall("misc")):
-			if not elem.text:
-				continue
-			if i > 0:
-				hf.write(" ")
-			desc = elem.text
-			with hf.element("small"):
-				with hf.element("span", style=self.tagStyle):
-					hf.write(desc)
-			hf.write(br())
-
-		examples = sense.findall("example")
-		# TODO: move to a method
-		if examples:  # noqa: PLR1702
-			with hf.element(
-				"div",
-				attrib={
-					"class": "example",
-					"style": f"padding: {self._example_padding}px 0px;",
-				},
-			):
-				hf.write("Examples:")
-				with hf.element("ul"):
-					for i, elem in enumerate(examples):
-						if not elem.text:
-							continue
-						if i > 0:
-							hf.write(" ")
-						# one ex_srce (id?), one ex_text, and two ex_sent tags
-						textElem = elem.find("ex_text")
-						if textElem is None:
-							continue
-						if not textElem.text:
-							continue
-						text = textElem.text
-						sentList: list[str] = []
-						for sentElem in elem.findall("ex_sent"):
-							if not sentElem.text:
-								continue
-							sentList.append(sentElem.text)
-						with hf.element("li"):
-							style: dict[str, str] = {}
-							if self._example_color:
-								style["color"] = self._example_color
-							with hf.element("font", attrib=style):
-								hf.write(text)
-								for sent in sentList:
-									hf.write(br())
-									hf.write(sent)
-
-	# TODO: break it down
-	def getEntryByElem(  # noqa: PLR0912
-		self,
-		entry: Element,
-	) -> EntryType:
-		from lxml import etree as ET
-
-		glos = self._glos
-		keywords: list[str] = []
-		f = BytesIO()
-		translit = self._translitation
-
-		def br() -> Element:
-			return ET.Element("br")
-
-		with ET.htmlfile(f, encoding="utf-8") as hf:  # noqa: PLR1702
-			kebList: list[str] = []
-			rebList: list[str] = []
-			kebDisplayList: list[str] = []
-			rebDisplayList: list[tuple[str, list[str]]] = []
-			with hf.element("div"):
-				for k_ele in entry.findall("k_ele"):
-					keb = k_ele.find("keb")
-					if keb is None:
-						continue
-					if not keb.text:
-						continue
-					keb_text = keb.text
-					keb_text_norm = unicodedata.normalize("NFKC", keb_text)
-					keywords.append(keb_text_norm)
-					if keb_text != keb_text_norm:
-						keywords.append(keb_text)
-					kebList.append(keb_text)
-					keb_display = keb_text
-					if translit:
-						import romkan  # type: ignore
-
-						t_keb = romkan.to_roma(keb_text)
-						if t_keb and t_keb.isascii():
-							keywords.append(t_keb)
-							keb_display += f" ({t_keb})"
-					kebDisplayList.append(keb_display)
-					# for elem in k_ele.findall("ke_pri"):
-					# 	log.info(elem.text)
-
-				for r_ele in entry.findall("r_ele"):
-					reb = r_ele.find("reb")
-					if reb is None:
-						continue
-					if not reb.text:
-						continue
-					props: list[str] = []
-					if r_ele.find("re_nokanji") is not None:
-						props.append("no kanji")
-					inf = r_ele.find("re_inf")
-					if inf is not None and inf.text:
-						props.append(
-							self.re_inf_mapping.get(inf.text, inf.text),
-						)
-					keywords.append(reb.text)
-					reb_text = reb.text
-					rebList.append(reb_text)
-					reb_display = reb_text
-					if translit:
-						import romkan
-
-						t_reb = romkan.to_roma(reb.text)
-						if t_reb and t_reb.isascii():
-							keywords.append(t_reb)
-							reb_display += f" ({t_reb})"
-					rebDisplayList.append((reb_display, props))
-					# for elem in r_ele.findall("re_pri"):
-					# 	log.info(elem.text)
-
-				# this is for making internal links valid
-				# this makes too many alternates!
-				# but we don't seem to have a choice
-				# except for scanning and indexing all words once
-				# and then starting over and fixing/optimizing links
-				for s_keb in kebList:
-					for s_reb in rebList:
-						keywords.append(f"{s_keb}・{s_reb}")  # noqa: PERF401
-
-				if kebDisplayList:
-					with hf.element(glos.titleTag(kebDisplayList[0])):
-						for i, s_keb in enumerate(kebDisplayList):
-							if i > 0:
-								with hf.element("font", color="red"):
-									hf.write(" | ")
-							hf.write(s_keb)
-					hf.write(br())
-
-				if rebDisplayList:
-					for i, (s_reb, props) in enumerate(rebDisplayList):
-						if i > 0:
-							with hf.element("font", color="red"):
-								hf.write(" | ")
-						with hf.element("font", color="green"):
-							hf.write(s_reb)
-						for prop in props:
-							hf.write(" ")
-							with hf.element("small"):
-								with hf.element("span", style=self.tagStyle):
-									hf.write(prop)
-					hf.write(br())
-
-				hf_ = cast("T_htmlfile", hf)
-				self.makeList(
-					hf_,
-					entry.findall("sense"),
-					self.writeSense,
-				)
-
-		defi = f.getvalue().decode("utf-8")
-		file = self._file
-		byteProgress = (file.tell(), self._fileSize)
-		return self._glos.newEntry(
-			keywords,
-			defi,
-			defiFormat="h",
-			byteProgress=byteProgress,
-		)
-
-	@staticmethod
-	def tostring(elem: Element) -> str:
-		from lxml import etree as ET
-
-		return (
-			ET.tostring(
-				elem,
-				method="html",
-				pretty_print=True,
-			)
-			.decode("utf-8")
-			.strip()
-		)
-
-	def setCreationTime(self, header: str) -> None:
-		m = re.search("JMdict created: ([0-9]{4}-[0-9]{2}-[0-9]{2})", header)
-		if m is None:
-			return
-		self._glos.setInfo("creationTime", m.group(1))
-
-	def setMetadata(self, header: str) -> None:
-		# TODO: self.set_info("edition", ...)
-		self.setCreationTime(header)
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._wordCount = 0
-		self._filename = ""
-		self._file: io.IOBase = nullBinaryIO
-		self._fileSize = 0
-		self._link_number_postfix = re.compile("・[0-9]+$")
-
-	def __len__(self) -> int:
-		return self._wordCount
-
-	def close(self) -> None:
-		if self._file:
-			self._file.close()
-			self._file = nullBinaryIO
-
-	def open(
-		self,
-		filename: str,
-	) -> None:
-		try:
-			from lxml import etree as ET  # noqa: F401
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install lxml` to install")
-			raise
-
-		self._filename = filename
-		self._fileSize = os.path.getsize(filename)
-
-		self._glos.sourceLangName = "Japanese"
-
-		self._glos.setDefaultDefiFormat("h")
-		self._glos.setInfo("definition_has_headwords", "True")
-		self._glos.setInfo("entry_url", "https://jisho.org/search/{word}")
-		# also good: f"https://sakuradict.com/search?q={{word}}"
-
-		header = ""
-		with compressionOpen(filename, mode="rt", encoding="utf-8") as text_file:
-			text_file = cast("io.TextIOBase", text_file)
-			for line in text_file:
-				if "<JMdict>" in line:
-					break
-				header += line
-		self.setMetadata(header)
-
-		self._file = compressionOpen(filename, mode="rb")
-
-	def __iter__(self) -> Iterator[EntryType]:
-		from lxml import etree as ET
-
-		context = ET.iterparse(  # type: ignore # noqa: PGH003
-			self._file,
-			events=("end",),
-			tag="entry",
-		)
-		for _, _elem in context:
-			elem = cast("Element", _elem)
-			yield self.getEntryByElem(elem)
-			# clean up preceding siblings to save memory
-			# this reduces memory usage from ~64 MB to ~30 MB
-			parent = elem.getparent()
-			if parent is None:
-				continue
-			while elem.getprevious() is not None:
-				del parent[0]
diff --git a/pyglossary/plugins/jmdict/reader.py b/pyglossary/plugins/jmdict/reader.py
new file mode 100644
index 000000000..16de72ffc
--- /dev/null
+++ b/pyglossary/plugins/jmdict/reader.py
@@ -0,0 +1,417 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import os
+import re
+import unicodedata
+from io import BytesIO
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Callable, Iterator
+
+	from pyglossary.glossary_types import (
+		EntryType,
+		GlossaryType,
+	)
+	from pyglossary.lxml_types import Element, T_htmlfile
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import exc_note, pip
+from pyglossary.io_utils import nullBinaryIO
+
+
+class Reader:
+	compressions = stdCompressions
+	depends = {
+		"lxml": "lxml",
+	}
+
+	_example_padding: int = 10
+	_example_color: str = ""
+	# _example_color: str = "#008FE1"
+	_translitation: bool = False
+
+	tagStyle = (
+		"color:white;"
+		"background:green;"
+		"padding-left:3px;"
+		"padding-right:3px;"
+		"border-radius:0.5ex;"
+		# 0.5ex ~= 0.3em, but "ex" is recommended
+	)
+
+	gikun_key = "gikun (meaning as reading) or jukujikun (special kanji reading)"
+	re_inf_mapping = {
+		gikun_key: "gikun/jukujikun",
+		"out-dated or obsolete kana usage": "obsolete",  # outdated/obsolete
+		"word containing irregular kana usage": "irregular",
+	}
+
+	@staticmethod
+	def makeList(
+		hf: T_htmlfile,
+		input_objects: list[Element],
+		processor: Callable,
+		single_prefix: str = "",
+		skip_single: bool = True,
+	) -> None:
+		"""Wrap elements into <ol> if more than one element."""
+		if not input_objects:
+			return
+
+		if skip_single and len(input_objects) == 1:
+			hf.write(single_prefix)
+			processor(hf, input_objects[0])
+			return
+
+		with hf.element("ol"):
+			for el in input_objects:
+				with hf.element("li"):
+					processor(hf, el)
+
+	# TODO: break it down
+	# PLR0912 Too many branches (23 > 12)
+	def writeSense(  # noqa: PLR0912
+		self,
+		hf: T_htmlfile,
+		sense: Element,
+	) -> None:
+		from lxml import etree as ET
+
+		def br() -> Element:
+			return ET.Element("br")
+
+		for elem in sense.findall("pos"):
+			if not elem.text:
+				continue
+			desc = elem.text
+			if desc == "unclassified":
+				continue
+			with hf.element("i"):
+				hf.write(desc.capitalize())
+			hf.write(br())
+
+		glossList = [elem.text.strip() for elem in sense.findall("gloss") if elem.text]
+		if glossList:
+			for i, gloss in enumerate(glossList):
+				if i > 0:
+					hf.write(", ")
+				hf.write(gloss)
+			hf.write(br())
+
+		relatedWords: list[str] = []
+		for elem in sense.findall("xref"):
+			if not elem.text:
+				continue
+			word = elem.text.strip()
+			word = self._link_number_postfix.sub("", word)
+			relatedWords.append(word)
+
+		if relatedWords:
+			hf.write("Related: ")
+			for i, word in enumerate(relatedWords):
+				if i > 0:
+					with hf.element("big"):
+						hf.write(" | ")
+				with hf.element("a", href=f"bword://{word}"):
+					hf.write(word)
+			hf.write(br())
+
+		antonymWords: list[str] = []
+		for elem in sense.findall("ant"):
+			if not elem.text:
+				continue
+			word = elem.text.strip()
+			word = self._link_number_postfix.sub("", word)
+			antonymWords.append(word)
+		if antonymWords:
+			hf.write("Antonym: ")
+			for i, word in enumerate(antonymWords):
+				if i > 0:
+					with hf.element("big"):
+						hf.write(" | ")
+				with hf.element(
+					"a",
+					href=f"bword://{word}",
+					attrib={"class": "antonym"},
+				):
+					hf.write(word)
+			hf.write(br())
+
+		for i, elem in enumerate(sense.findall("field")):
+			if not elem.text:
+				continue
+			if i > 0:
+				hf.write(" ")
+			desc = elem.text
+			with hf.element("span", style=self.tagStyle):
+				hf.write(desc)
+			hf.write(br())
+
+		for i, elem in enumerate(sense.findall("misc")):
+			if not elem.text:
+				continue
+			if i > 0:
+				hf.write(" ")
+			desc = elem.text
+			with hf.element("small"):
+				with hf.element("span", style=self.tagStyle):
+					hf.write(desc)
+			hf.write(br())
+
+		examples = sense.findall("example")
+		# TODO: move to a method
+		if examples:  # noqa: PLR1702
+			with hf.element(
+				"div",
+				attrib={
+					"class": "example",
+					"style": f"padding: {self._example_padding}px 0px;",
+				},
+			):
+				hf.write("Examples:")
+				with hf.element("ul"):
+					for i, elem in enumerate(examples):
+						if not elem.text:
+							continue
+						if i > 0:
+							hf.write(" ")
+						# one ex_srce (id?), one ex_text, and two ex_sent tags
+						textElem = elem.find("ex_text")
+						if textElem is None:
+							continue
+						if not textElem.text:
+							continue
+						text = textElem.text
+						sentList: list[str] = []
+						for sentElem in elem.findall("ex_sent"):
+							if not sentElem.text:
+								continue
+							sentList.append(sentElem.text)
+						with hf.element("li"):
+							style: dict[str, str] = {}
+							if self._example_color:
+								style["color"] = self._example_color
+							with hf.element("font", attrib=style):
+								hf.write(text)
+								for sent in sentList:
+									hf.write(br())
+									hf.write(sent)
+
+	# TODO: break it down
+	def getEntryByElem(  # noqa: PLR0912
+		self,
+		entry: Element,
+	) -> EntryType:
+		from lxml import etree as ET
+
+		glos = self._glos
+		keywords: list[str] = []
+		f = BytesIO()
+		translit = self._translitation
+
+		def br() -> Element:
+			return ET.Element("br")
+
+		with ET.htmlfile(f, encoding="utf-8") as hf:  # noqa: PLR1702
+			kebList: list[str] = []
+			rebList: list[str] = []
+			kebDisplayList: list[str] = []
+			rebDisplayList: list[tuple[str, list[str]]] = []
+			with hf.element("div"):
+				for k_ele in entry.findall("k_ele"):
+					keb = k_ele.find("keb")
+					if keb is None:
+						continue
+					if not keb.text:
+						continue
+					keb_text = keb.text
+					keb_text_norm = unicodedata.normalize("NFKC", keb_text)
+					keywords.append(keb_text_norm)
+					if keb_text != keb_text_norm:
+						keywords.append(keb_text)
+					kebList.append(keb_text)
+					keb_display = keb_text
+					if translit:
+						import romkan  # type: ignore
+
+						t_keb = romkan.to_roma(keb_text)
+						if t_keb and t_keb.isascii():
+							keywords.append(t_keb)
+							keb_display += f" ({t_keb})"
+					kebDisplayList.append(keb_display)
+					# for elem in k_ele.findall("ke_pri"):
+					# 	log.info(elem.text)
+
+				for r_ele in entry.findall("r_ele"):
+					reb = r_ele.find("reb")
+					if reb is None:
+						continue
+					if not reb.text:
+						continue
+					props: list[str] = []
+					if r_ele.find("re_nokanji") is not None:
+						props.append("no kanji")
+					inf = r_ele.find("re_inf")
+					if inf is not None and inf.text:
+						props.append(
+							self.re_inf_mapping.get(inf.text, inf.text),
+						)
+					keywords.append(reb.text)
+					reb_text = reb.text
+					rebList.append(reb_text)
+					reb_display = reb_text
+					if translit:
+						import romkan
+
+						t_reb = romkan.to_roma(reb.text)
+						if t_reb and t_reb.isascii():
+							keywords.append(t_reb)
+							reb_display += f" ({t_reb})"
+					rebDisplayList.append((reb_display, props))
+					# for elem in r_ele.findall("re_pri"):
+					# 	log.info(elem.text)
+
+				# this is for making internal links valid
+				# this makes too many alternates!
+				# but we don't seem to have a choice
+				# except for scanning and indexing all words once
+				# and then starting over and fixing/optimizing links
+				for s_keb in kebList:
+					for s_reb in rebList:
+						keywords.append(f"{s_keb}・{s_reb}")  # noqa: PERF401
+
+				if kebDisplayList:
+					with hf.element(glos.titleTag(kebDisplayList[0])):
+						for i, s_keb in enumerate(kebDisplayList):
+							if i > 0:
+								with hf.element("font", color="red"):
+									hf.write(" | ")
+							hf.write(s_keb)
+					hf.write(br())
+
+				if rebDisplayList:
+					for i, (s_reb, props) in enumerate(rebDisplayList):
+						if i > 0:
+							with hf.element("font", color="red"):
+								hf.write(" | ")
+						with hf.element("font", color="green"):
+							hf.write(s_reb)
+						for prop in props:
+							hf.write(" ")
+							with hf.element("small"):
+								with hf.element("span", style=self.tagStyle):
+									hf.write(prop)
+					hf.write(br())
+
+				hf_ = cast("T_htmlfile", hf)
+				self.makeList(
+					hf_,
+					entry.findall("sense"),
+					self.writeSense,
+				)
+
+		defi = f.getvalue().decode("utf-8")
+		file = self._file
+		byteProgress = (file.tell(), self._fileSize)
+		return self._glos.newEntry(
+			keywords,
+			defi,
+			defiFormat="h",
+			byteProgress=byteProgress,
+		)
+
+	@staticmethod
+	def tostring(elem: Element) -> str:
+		from lxml import etree as ET
+
+		return (
+			ET.tostring(
+				elem,
+				method="html",
+				pretty_print=True,
+			)
+			.decode("utf-8")
+			.strip()
+		)
+
+	def setCreationTime(self, header: str) -> None:
+		m = re.search("JMdict created: ([0-9]{4}-[0-9]{2}-[0-9]{2})", header)
+		if m is None:
+			return
+		self._glos.setInfo("creationTime", m.group(1))
+
+	def setMetadata(self, header: str) -> None:
+		# TODO: self.set_info("edition", ...)
+		self.setCreationTime(header)
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._wordCount = 0
+		self._filename = ""
+		self._file: io.IOBase = nullBinaryIO
+		self._fileSize = 0
+		self._link_number_postfix = re.compile("・[0-9]+$")
+
+	def __len__(self) -> int:
+		return self._wordCount
+
+	def close(self) -> None:
+		if self._file:
+			self._file.close()
+			self._file = nullBinaryIO
+
+	def open(
+		self,
+		filename: str,
+	) -> None:
+		try:
+			from lxml import etree as ET  # noqa: F401
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install lxml` to install")
+			raise
+
+		self._filename = filename
+		self._fileSize = os.path.getsize(filename)
+
+		self._glos.sourceLangName = "Japanese"
+
+		self._glos.setDefaultDefiFormat("h")
+		self._glos.setInfo("definition_has_headwords", "True")
+		self._glos.setInfo("entry_url", "https://jisho.org/search/{word}")
+		# also good: f"https://sakuradict.com/search?q={{word}}"
+
+		header = ""
+		with compressionOpen(filename, mode="rt", encoding="utf-8") as text_file:
+			text_file = cast("io.TextIOBase", text_file)
+			for line in text_file:
+				if "<JMdict>" in line:
+					break
+				header += line
+		self.setMetadata(header)
+
+		self._file = compressionOpen(filename, mode="rb")
+
+	def __iter__(self) -> Iterator[EntryType]:
+		from lxml import etree as ET
+
+		context = ET.iterparse(  # type: ignore # noqa: PGH003
+			self._file,
+			events=("end",),
+			tag="entry",
+		)
+		for _, _elem in context:
+			elem = cast("Element", _elem)
+			yield self.getEntryByElem(elem)
+			# clean up preceding siblings to save memory
+			# this reduces memory usage from ~64 MB to ~30 MB
+			parent = elem.getparent()
+			if parent is None:
+				continue
+			while elem.getprevious() is not None:
+				del parent[0]
diff --git a/pyglossary/plugins/jmnedict/__init__.py b/pyglossary/plugins/jmnedict/__init__.py
index 59582d936..ba4213465 100644
--- a/pyglossary/plugins/jmnedict/__init__.py
+++ b/pyglossary/plugins/jmnedict/__init__.py
@@ -1,28 +1,13 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import os
-import re
-from io import BytesIO
-from typing import TYPE_CHECKING, cast
+from typing import TYPE_CHECKING
 
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Callable, Iterator
+from .reader import Reader
 
-	from pyglossary.glossary_types import (
-		EntryType,
-		GlossaryType,
-	)
-	from pyglossary.lxml_types import Element, T_htmlfile
+if TYPE_CHECKING:
 	from pyglossary.option import Option
 
-from pyglossary.compression import (
-	compressionOpen,
-	stdCompressions,
-)
-from pyglossary.core import exc_note, pip
-from pyglossary.io_utils import nullBinaryIO
 
 __all__ = [
 	"Reader",
@@ -53,277 +38,3 @@
 	"EDRDG Wiki",
 )
 optionsProp: dict[str, Option] = {}
-
-
-class Reader:
-	compressions = stdCompressions
-	depends = {
-		"lxml": "lxml",
-	}
-
-	tagStyle = (
-		"color:white;"
-		"background:green;"
-		"padding-left:3px;"
-		"padding-right:3px;"
-		"border-radius:0.5ex;"
-		# 0.5ex ~= 0.3em, but "ex" is recommended
-	)
-
-	gikun_key = "gikun (meaning as reading) or jukujikun (special kanji reading)"
-	re_inf_mapping = {
-		gikun_key: "gikun/jukujikun",
-		"out-dated or obsolete kana usage": "obsolete",  # outdated/obsolete
-		"word containing irregular kana usage": "irregular",
-	}
-
-	@staticmethod
-	def makeList(
-		hf: T_htmlfile,
-		input_objects: list[Element],
-		processor: Callable,
-		single_prefix: str = "",
-		skip_single: bool = True,
-	) -> None:
-		"""Wrap elements into <ol> if more than one element."""
-		if not input_objects:
-			return
-
-		if skip_single and len(input_objects) == 1:
-			hf.write(single_prefix)
-			processor(hf, input_objects[0])
-			return
-
-		with hf.element("ol"):
-			for el in input_objects:
-				with hf.element("li"):
-					processor(hf, el)
-
-	def writeTrans(
-		self,
-		hf: T_htmlfile,
-		trans: Element,
-	) -> None:
-		from lxml import etree as ET
-
-		def br() -> Element:
-			return ET.Element("br")
-
-		for elem in trans.findall("name_type"):
-			if not elem.text:
-				continue
-			desc = elem.text
-			with hf.element("i"):
-				hf.write(desc.capitalize())
-			hf.write(br())
-
-		for elem in trans.findall("trans_det"):
-			if not elem.text:
-				continue
-			desc = elem.text
-			hf.write(desc)
-			hf.write(br())
-
-		relatedWords: list[str] = []
-		for elem in trans.findall("xref"):
-			if not elem.text:
-				continue
-			word = elem.text.strip()
-			word = self._link_number_postfix.sub("", word)
-			relatedWords.append(word)
-
-		if relatedWords:
-			hf.write("Related: ")
-			for i, word in enumerate(relatedWords):
-				if i > 0:
-					with hf.element("big"):
-						hf.write(" | ")
-				with hf.element("a", href=f"bword://{word}"):
-					hf.write(word)
-			hf.write(br())
-
-	def getEntryByElem(  # noqa: PLR0912
-		self,
-		entry: Element,
-	) -> EntryType:
-		from lxml import etree as ET
-
-		glos = self._glos
-		keywords: list[str] = []
-		f = BytesIO()
-
-		def br() -> Element:
-			return ET.Element("br")
-
-		with ET.htmlfile(f, encoding="utf-8") as hf:  # noqa: PLR1702
-			kebList: list[str] = []
-			rebList: list[tuple[str, list[str]]] = []
-			with hf.element("div"):
-				for k_ele in entry.findall("k_ele"):
-					keb = k_ele.find("keb")
-					if keb is None:
-						continue
-					if not keb.text:
-						continue
-					kebList.append(keb.text)
-					keywords.append(keb.text)
-					# for elem in k_ele.findall("ke_pri"):
-					# 	log.info(elem.text)
-
-				for r_ele in entry.findall("r_ele"):
-					reb = r_ele.find("reb")
-					if reb is None:
-						continue
-					if not reb.text:
-						continue
-					props: list[str] = []
-					if r_ele.find("re_nokanji") is not None:
-						props.append("no kanji")
-					inf = r_ele.find("re_inf")
-					if inf is not None and inf.text:
-						props.append(
-							self.re_inf_mapping.get(inf.text, inf.text),
-						)
-					rebList.append((reb.text, props))
-					keywords.append(reb.text)
-					# for elem in r_ele.findall("re_pri"):
-					# 	log.info(elem.text)
-
-				# this is for making internal links valid
-				# this makes too many alternates!
-				# but we don't seem to have a choice
-				# except for scanning and indexing all words once
-				# and then starting over and fixing/optimizing links
-				for s_keb in kebList:
-					for s_reb, _ in rebList:
-						keywords.append(f"{s_keb}・{s_reb}")
-
-				if kebList:
-					with hf.element(glos.titleTag(kebList[0])):
-						for i, s_keb in enumerate(kebList):
-							if i > 0:
-								with hf.element("font", color="red"):
-									hf.write(" | ")
-							hf.write(s_keb)
-					hf.write(br())
-
-				if rebList:
-					for i, (s_reb, props) in enumerate(rebList):
-						if i > 0:
-							with hf.element("font", color="red"):
-								hf.write(" | ")
-						with hf.element("font", color="green"):
-							hf.write(s_reb)
-						for prop in props:
-							hf.write(" ")
-							with hf.element("small"):
-								with hf.element("span", style=self.tagStyle):
-									hf.write(prop)
-					hf.write(br())
-
-				hf_ = cast("T_htmlfile", hf)
-				self.makeList(
-					hf_,
-					entry.findall("trans"),
-					self.writeTrans,
-				)
-
-		defi = f.getvalue().decode("utf-8")
-		file = self._file
-		byteProgress = (file.tell(), self._fileSize)
-		return self._glos.newEntry(
-			keywords,
-			defi,
-			defiFormat="h",
-			byteProgress=byteProgress,
-		)
-
-	@staticmethod
-	def tostring(elem: Element) -> str:
-		from lxml import etree as ET
-
-		return (
-			ET.tostring(
-				elem,
-				method="html",
-				pretty_print=True,
-			)
-			.decode("utf-8")
-			.strip()
-		)
-
-	def setCreationTime(self, header: str) -> None:
-		m = re.search("JMdict created: ([0-9]{4}-[0-9]{2}-[0-9]{2})", header)
-		if m is None:
-			return
-		self._glos.setInfo("creationTime", m.group(1))
-
-	def setMetadata(self, header: str) -> None:
-		# TODO: self.set_info("edition", ...)
-		self.setCreationTime(header)
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._wordCount = 0
-		self._filename = ""
-		self._file: io.IOBase = nullBinaryIO
-		self._fileSize = 0
-		self._link_number_postfix = re.compile("・[0-9]+$")
-
-	def __len__(self) -> int:
-		return self._wordCount
-
-	def close(self) -> None:
-		if self._file:
-			self._file.close()
-			self._file = nullBinaryIO
-
-	def open(
-		self,
-		filename: str,
-	) -> None:
-		try:
-			from lxml import etree as ET  # noqa: F401
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install lxml` to install")
-			raise
-
-		self._filename = filename
-		self._fileSize = os.path.getsize(filename)
-
-		self._glos.sourceLangName = "Japanese"
-
-		self._glos.setDefaultDefiFormat("h")
-		self._glos.setInfo("definition_has_headwords", "True")
-		self._glos.setInfo("entry_url", "https://jisho.org/search/{word}")
-		# also good: f"https://sakuradict.com/search?q={{word}}"
-
-		header = ""
-		with compressionOpen(filename, mode="rt", encoding="utf-8") as text_file:
-			text_file = cast("io.TextIOBase", text_file)
-			for line in text_file:
-				if "<JMdict>" in line:
-					break
-				header += line
-		self.setMetadata(header)
-
-		self._file = compressionOpen(filename, mode="rb")
-
-	def __iter__(self) -> Iterator[EntryType]:
-		from lxml import etree as ET
-
-		context = ET.iterparse(  # type: ignore # noqa: PGH003
-			self._file,
-			events=("end",),
-			tag="entry",
-		)
-		for _, _elem in context:
-			elem = cast("Element", _elem)
-			yield self.getEntryByElem(elem)
-			# clean up preceding siblings to save memory
-			# this reduces memory usage from ~64 MB to ~30 MB
-			parent = elem.getparent()
-			if parent is None:
-				continue
-			while elem.getprevious() is not None:
-				del parent[0]
diff --git a/pyglossary/plugins/jmnedict/reader.py b/pyglossary/plugins/jmnedict/reader.py
new file mode 100644
index 000000000..8d25b8ce1
--- /dev/null
+++ b/pyglossary/plugins/jmnedict/reader.py
@@ -0,0 +1,298 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import os
+import re
+from io import BytesIO
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Callable, Iterator
+
+	from pyglossary.glossary_types import (
+		EntryType,
+		GlossaryType,
+	)
+	from pyglossary.lxml_types import Element, T_htmlfile
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import exc_note, pip
+from pyglossary.io_utils import nullBinaryIO
+
+
+class Reader:
+	compressions = stdCompressions
+	depends = {
+		"lxml": "lxml",
+	}
+
+	tagStyle = (
+		"color:white;"
+		"background:green;"
+		"padding-left:3px;"
+		"padding-right:3px;"
+		"border-radius:0.5ex;"
+		# 0.5ex ~= 0.3em, but "ex" is recommended
+	)
+
+	gikun_key = "gikun (meaning as reading) or jukujikun (special kanji reading)"
+	re_inf_mapping = {
+		gikun_key: "gikun/jukujikun",
+		"out-dated or obsolete kana usage": "obsolete",  # outdated/obsolete
+		"word containing irregular kana usage": "irregular",
+	}
+
+	@staticmethod
+	def makeList(
+		hf: T_htmlfile,
+		input_objects: list[Element],
+		processor: Callable,
+		single_prefix: str = "",
+		skip_single: bool = True,
+	) -> None:
+		"""Wrap elements into <ol> if more than one element."""
+		if not input_objects:
+			return
+
+		if skip_single and len(input_objects) == 1:
+			hf.write(single_prefix)
+			processor(hf, input_objects[0])
+			return
+
+		with hf.element("ol"):
+			for el in input_objects:
+				with hf.element("li"):
+					processor(hf, el)
+
+	def writeTrans(
+		self,
+		hf: T_htmlfile,
+		trans: Element,
+	) -> None:
+		from lxml import etree as ET
+
+		def br() -> Element:
+			return ET.Element("br")
+
+		for elem in trans.findall("name_type"):
+			if not elem.text:
+				continue
+			desc = elem.text
+			with hf.element("i"):
+				hf.write(desc.capitalize())
+			hf.write(br())
+
+		for elem in trans.findall("trans_det"):
+			if not elem.text:
+				continue
+			desc = elem.text
+			hf.write(desc)
+			hf.write(br())
+
+		relatedWords: list[str] = []
+		for elem in trans.findall("xref"):
+			if not elem.text:
+				continue
+			word = elem.text.strip()
+			word = self._link_number_postfix.sub("", word)
+			relatedWords.append(word)
+
+		if relatedWords:
+			hf.write("Related: ")
+			for i, word in enumerate(relatedWords):
+				if i > 0:
+					with hf.element("big"):
+						hf.write(" | ")
+				with hf.element("a", href=f"bword://{word}"):
+					hf.write(word)
+			hf.write(br())
+
+	def getEntryByElem(  # noqa: PLR0912
+		self,
+		entry: Element,
+	) -> EntryType:
+		from lxml import etree as ET
+
+		glos = self._glos
+		keywords: list[str] = []
+		f = BytesIO()
+
+		def br() -> Element:
+			return ET.Element("br")
+
+		with ET.htmlfile(f, encoding="utf-8") as hf:  # noqa: PLR1702
+			kebList: list[str] = []
+			rebList: list[tuple[str, list[str]]] = []
+			with hf.element("div"):
+				for k_ele in entry.findall("k_ele"):
+					keb = k_ele.find("keb")
+					if keb is None:
+						continue
+					if not keb.text:
+						continue
+					kebList.append(keb.text)
+					keywords.append(keb.text)
+					# for elem in k_ele.findall("ke_pri"):
+					# 	log.info(elem.text)
+
+				for r_ele in entry.findall("r_ele"):
+					reb = r_ele.find("reb")
+					if reb is None:
+						continue
+					if not reb.text:
+						continue
+					props: list[str] = []
+					if r_ele.find("re_nokanji") is not None:
+						props.append("no kanji")
+					inf = r_ele.find("re_inf")
+					if inf is not None and inf.text:
+						props.append(
+							self.re_inf_mapping.get(inf.text, inf.text),
+						)
+					rebList.append((reb.text, props))
+					keywords.append(reb.text)
+					# for elem in r_ele.findall("re_pri"):
+					# 	log.info(elem.text)
+
+				# this is for making internal links valid
+				# this makes too many alternates!
+				# but we don't seem to have a choice
+				# except for scanning and indexing all words once
+				# and then starting over and fixing/optimizing links
+				for s_keb in kebList:
+					for s_reb, _ in rebList:
+						keywords.append(f"{s_keb}・{s_reb}")
+
+				if kebList:
+					with hf.element(glos.titleTag(kebList[0])):
+						for i, s_keb in enumerate(kebList):
+							if i > 0:
+								with hf.element("font", color="red"):
+									hf.write(" | ")
+							hf.write(s_keb)
+					hf.write(br())
+
+				if rebList:
+					for i, (s_reb, props) in enumerate(rebList):
+						if i > 0:
+							with hf.element("font", color="red"):
+								hf.write(" | ")
+						with hf.element("font", color="green"):
+							hf.write(s_reb)
+						for prop in props:
+							hf.write(" ")
+							with hf.element("small"):
+								with hf.element("span", style=self.tagStyle):
+									hf.write(prop)
+					hf.write(br())
+
+				hf_ = cast("T_htmlfile", hf)
+				self.makeList(
+					hf_,
+					entry.findall("trans"),
+					self.writeTrans,
+				)
+
+		defi = f.getvalue().decode("utf-8")
+		file = self._file
+		byteProgress = (file.tell(), self._fileSize)
+		return self._glos.newEntry(
+			keywords,
+			defi,
+			defiFormat="h",
+			byteProgress=byteProgress,
+		)
+
+	@staticmethod
+	def tostring(elem: Element) -> str:
+		from lxml import etree as ET
+
+		return (
+			ET.tostring(
+				elem,
+				method="html",
+				pretty_print=True,
+			)
+			.decode("utf-8")
+			.strip()
+		)
+
+	def setCreationTime(self, header: str) -> None:
+		m = re.search("JMdict created: ([0-9]{4}-[0-9]{2}-[0-9]{2})", header)
+		if m is None:
+			return
+		self._glos.setInfo("creationTime", m.group(1))
+
+	def setMetadata(self, header: str) -> None:
+		# TODO: self.set_info("edition", ...)
+		self.setCreationTime(header)
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._wordCount = 0
+		self._filename = ""
+		self._file: io.IOBase = nullBinaryIO
+		self._fileSize = 0
+		self._link_number_postfix = re.compile("・[0-9]+$")
+
+	def __len__(self) -> int:
+		return self._wordCount
+
+	def close(self) -> None:
+		if self._file:
+			self._file.close()
+			self._file = nullBinaryIO
+
+	def open(
+		self,
+		filename: str,
+	) -> None:
+		try:
+			from lxml import etree as ET  # noqa: F401
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install lxml` to install")
+			raise
+
+		self._filename = filename
+		self._fileSize = os.path.getsize(filename)
+
+		self._glos.sourceLangName = "Japanese"
+
+		self._glos.setDefaultDefiFormat("h")
+		self._glos.setInfo("definition_has_headwords", "True")
+		self._glos.setInfo("entry_url", "https://jisho.org/search/{word}")
+		# also good: f"https://sakuradict.com/search?q={{word}}"
+
+		header = ""
+		with compressionOpen(filename, mode="rt", encoding="utf-8") as text_file:
+			text_file = cast("io.TextIOBase", text_file)
+			for line in text_file:
+				if "<JMdict>" in line:
+					break
+				header += line
+		self.setMetadata(header)
+
+		self._file = compressionOpen(filename, mode="rb")
+
+	def __iter__(self) -> Iterator[EntryType]:
+		from lxml import etree as ET
+
+		context = ET.iterparse(  # type: ignore # noqa: PGH003
+			self._file,
+			events=("end",),
+			tag="entry",
+		)
+		for _, _elem in context:
+			elem = cast("Element", _elem)
+			yield self.getEntryByElem(elem)
+			# clean up preceding siblings to save memory
+			# this reduces memory usage from ~64 MB to ~30 MB
+			parent = elem.getparent()
+			if parent is None:
+				continue
+			while elem.getprevious() is not None:
+				del parent[0]
diff --git a/pyglossary/plugins/json_plugin/__init__.py b/pyglossary/plugins/json_plugin/__init__.py
index 83fdbbb10..a21b50f69 100644
--- a/pyglossary/plugins/json_plugin/__init__.py
+++ b/pyglossary/plugins/json_plugin/__init__.py
@@ -2,25 +2,13 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-from pyglossary.compression import (
-	# compressionOpen,
-	stdCompressions,
-)
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
 	Option,
 )
 
-if TYPE_CHECKING:
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import (
-		EntryType,
-		GlossaryType,
-	)
+from .writer import Writer
 
 __all__ = [
 	"Writer",
@@ -58,53 +46,3 @@
 		comment="add headwords title to beginning of definition",
 	),
 }
-
-
-class Writer:
-	_encoding: str = "utf-8"
-	_enable_info: bool = True
-	_resources: bool = True
-	_word_title: bool = False
-
-	compressions = stdCompressions
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		glos.preventDuplicateWords()
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-
-	def finish(self) -> None:
-		self._filename = ""
-
-	def write(self) -> Generator[None, EntryType, None]:
-		from json import dumps
-
-		from pyglossary.text_writer import writeTxt
-
-		glos = self._glos
-		encoding = self._encoding
-		enable_info = self._enable_info
-		resources = self._resources
-
-		ensure_ascii = encoding == "ascii"
-
-		def escape(st: str) -> str:
-			return dumps(st, ensure_ascii=ensure_ascii)
-
-		yield from writeTxt(
-			glos,
-			entryFmt="\t{word}: {defi},\n",
-			filename=self._filename,
-			encoding=encoding,
-			writeInfo=enable_info,
-			wordEscapeFunc=escape,
-			defiEscapeFunc=escape,
-			ext=".json",
-			head="{\n",
-			tail='\t"": ""\n}',
-			resources=resources,
-			word_title=self._word_title,
-		)
diff --git a/pyglossary/plugins/json_plugin/writer.py b/pyglossary/plugins/json_plugin/writer.py
new file mode 100644
index 000000000..f43b88a7e
--- /dev/null
+++ b/pyglossary/plugins/json_plugin/writer.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pyglossary.compression import (
+	# compressionOpen,
+	stdCompressions,
+)
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import (
+		EntryType,
+		GlossaryType,
+	)
+
+
+class Writer:
+	_encoding: str = "utf-8"
+	_enable_info: bool = True
+	_resources: bool = True
+	_word_title: bool = False
+
+	compressions = stdCompressions
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		glos.preventDuplicateWords()
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+
+	def finish(self) -> None:
+		self._filename = ""
+
+	def write(self) -> Generator[None, EntryType, None]:
+		from json import dumps
+
+		from pyglossary.text_writer import writeTxt
+
+		glos = self._glos
+		encoding = self._encoding
+		enable_info = self._enable_info
+		resources = self._resources
+
+		ensure_ascii = encoding == "ascii"
+
+		def escape(st: str) -> str:
+			return dumps(st, ensure_ascii=ensure_ascii)
+
+		yield from writeTxt(
+			glos,
+			entryFmt="\t{word}: {defi},\n",
+			filename=self._filename,
+			encoding=encoding,
+			writeInfo=enable_info,
+			wordEscapeFunc=escape,
+			defiEscapeFunc=escape,
+			ext=".json",
+			head="{\n",
+			tail='\t"": ""\n}',
+			resources=resources,
+			word_title=self._word_title,
+		)
diff --git a/pyglossary/plugins/lingoes_ldf/__init__.py b/pyglossary/plugins/lingoes_ldf/__init__.py
index 41f9c3269..e63e43e93 100644
--- a/pyglossary/plugins/lingoes_ldf/__init__.py
+++ b/pyglossary/plugins/lingoes_ldf/__init__.py
@@ -1,27 +1,15 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-from pyglossary.compression import (
-	# compressionOpen,
-	stdCompressions,
-)
-from pyglossary.core import log
-from pyglossary.file_utils import fileCountLines
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
 	NewlineOption,
 	Option,
 )
-from pyglossary.text_reader import TextGlossaryReader, nextBlockResultType
-from pyglossary.text_utils import splitByBar
 
-if TYPE_CHECKING:
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .reader import Reader
+from .writer import Writer
 
 __all__ = [
 	"Reader",
@@ -57,121 +45,3 @@
 	"resources": BoolOption(comment="Enable resources / data files"),
 	"encoding": EncodingOption(),
 }
-
-
-class Reader(TextGlossaryReader):
-	compressions = stdCompressions
-
-	def __len__(self) -> int:
-		if self._wordCount is None:
-			log.debug("Try not to use len(reader) as it takes extra time")
-			self._wordCount = (
-				fileCountLines(
-					self._filename,
-					newline=b"\n\n",
-				)
-				- self._leadingLinesCount
-			)
-		return self._wordCount
-
-	@classmethod
-	def isInfoWord(cls, word: str) -> bool:
-		if isinstance(word, str):
-			return word.startswith("#")
-
-		return False
-
-	@classmethod
-	def fixInfoWord(cls, word: str) -> str:
-		if isinstance(word, str):
-			return word.lstrip("#").lower()
-
-		return word
-
-	def nextBlock(self) -> nextBlockResultType:
-		if not self._file:
-			raise StopIteration
-		entryLines: list[str] = []
-		while True:
-			line = self.readline()
-			if not line:
-				raise StopIteration
-			line = line.rstrip("\n\r")  # FIXME
-			if line.startswith("###"):
-				parts = line.split(":")
-				key = parts[0].strip()
-				value = ":".join(parts[1:]).strip()
-				return key, value, None
-
-			if line:
-				entryLines.append(line)
-				continue
-
-			# now `line` is empty, process `entryLines`
-			if not entryLines:
-				return None
-			if len(entryLines) < 2:
-				log.error(
-					f"invalid block near pos {self._file.tell()}"
-					f" in file {self._filename}",
-				)
-				return None
-			word = entryLines[0]
-			defi = "\n".join(entryLines[1:])
-			defi = defi.replace("<br/>", "\n")  # FIXME
-
-			words = splitByBar(word)
-
-			return words, defi, None
-
-
-class Writer:
-	compressions = stdCompressions
-
-	_newline: str = "\n"
-	_resources: bool = True
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-
-	def getInfo(self, key: str) -> str:
-		return self._glos.getInfo(key).replace("\n", "<br>")
-
-	def getAuthor(self) -> str:
-		return self._glos.author.replace("\n", "<br>")
-
-	def finish(self) -> None:
-		self._filename = ""
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-
-	@staticmethod
-	def _defiEscapeFunc(defi: str) -> str:
-		return defi.replace("\n", "<br/>")
-
-	def write(self) -> Generator[None, EntryType, None]:
-		from pyglossary.text_writer import writeTxt
-
-		newline = self._newline
-		resources = self._resources
-		head = (
-			f"###Title: {self.getInfo('title')}\n"
-			f"###Description: {self.getInfo('description')}\n"
-			f"###Author: {self.getAuthor()}\n"
-			f"###Email: {self.getInfo('email')}\n"
-			f"###Website: {self.getInfo('website')}\n"
-			f"###Copyright: {self.getInfo('copyright')}\n"
-		)
-		yield from writeTxt(
-			self._glos,
-			entryFmt="{word}\n{defi}\n\n",
-			filename=self._filename,
-			writeInfo=False,
-			defiEscapeFunc=self._defiEscapeFunc,
-			ext=".ldf",
-			head=head,
-			newline=newline,
-			resources=resources,
-		)
diff --git a/pyglossary/plugins/lingoes_ldf/reader.py b/pyglossary/plugins/lingoes_ldf/reader.py
new file mode 100644
index 000000000..211056bfe
--- /dev/null
+++ b/pyglossary/plugins/lingoes_ldf/reader.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+from pyglossary.compression import (
+	# compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import log
+from pyglossary.file_utils import fileCountLines
+from pyglossary.text_reader import TextGlossaryReader, nextBlockResultType
+from pyglossary.text_utils import splitByBar
+
+
+class Reader(TextGlossaryReader):
+	compressions = stdCompressions
+
+	def __len__(self) -> int:
+		if self._wordCount is None:
+			log.debug("Try not to use len(reader) as it takes extra time")
+			self._wordCount = (
+				fileCountLines(
+					self._filename,
+					newline=b"\n\n",
+				)
+				- self._leadingLinesCount
+			)
+		return self._wordCount
+
+	@classmethod
+	def isInfoWord(cls, word: str) -> bool:
+		if isinstance(word, str):
+			return word.startswith("#")
+
+		return False
+
+	@classmethod
+	def fixInfoWord(cls, word: str) -> str:
+		if isinstance(word, str):
+			return word.lstrip("#").lower()
+
+		return word
+
+	def nextBlock(self) -> nextBlockResultType:
+		if not self._file:
+			raise StopIteration
+		entryLines: list[str] = []
+		while True:
+			line = self.readline()
+			if not line:
+				raise StopIteration
+			line = line.rstrip("\n\r")  # FIXME
+			if line.startswith("###"):
+				parts = line.split(":")
+				key = parts[0].strip()
+				value = ":".join(parts[1:]).strip()
+				return key, value, None
+
+			if line:
+				entryLines.append(line)
+				continue
+
+			# now `line` is empty, process `entryLines`
+			if not entryLines:
+				return None
+			if len(entryLines) < 2:
+				log.error(
+					f"invalid block near pos {self._file.tell()}"
+					f" in file {self._filename}",
+				)
+				return None
+			word = entryLines[0]
+			defi = "\n".join(entryLines[1:])
+			defi = defi.replace("<br/>", "\n")  # FIXME
+
+			words = splitByBar(word)
+
+			return words, defi, None
diff --git a/pyglossary/plugins/lingoes_ldf/writer.py b/pyglossary/plugins/lingoes_ldf/writer.py
new file mode 100644
index 000000000..93004246f
--- /dev/null
+++ b/pyglossary/plugins/lingoes_ldf/writer.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pyglossary.compression import (
+	# compressionOpen,
+	stdCompressions,
+)
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Writer:
+	compressions = stdCompressions
+
+	_newline: str = "\n"
+	_resources: bool = True
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+
+	def getInfo(self, key: str) -> str:
+		return self._glos.getInfo(key).replace("\n", "<br>")
+
+	def getAuthor(self) -> str:
+		return self._glos.author.replace("\n", "<br>")
+
+	def finish(self) -> None:
+		self._filename = ""
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+
+	@staticmethod
+	def _defiEscapeFunc(defi: str) -> str:
+		return defi.replace("\n", "<br/>")
+
+	def write(self) -> Generator[None, EntryType, None]:
+		from pyglossary.text_writer import writeTxt
+
+		newline = self._newline
+		resources = self._resources
+		head = (
+			f"###Title: {self.getInfo('title')}\n"
+			f"###Description: {self.getInfo('description')}\n"
+			f"###Author: {self.getAuthor()}\n"
+			f"###Email: {self.getInfo('email')}\n"
+			f"###Website: {self.getInfo('website')}\n"
+			f"###Copyright: {self.getInfo('copyright')}\n"
+		)
+		yield from writeTxt(
+			self._glos,
+			entryFmt="{word}\n{defi}\n\n",
+			filename=self._filename,
+			writeInfo=False,
+			defiEscapeFunc=self._defiEscapeFunc,
+			ext=".ldf",
+			head=head,
+			newline=newline,
+			resources=resources,
+		)
diff --git a/pyglossary/plugins/makindo_medical/__init__.py b/pyglossary/plugins/makindo_medical/__init__.py
index 2e2f5f579..07f783113 100644
--- a/pyglossary/plugins/makindo_medical/__init__.py
+++ b/pyglossary/plugins/makindo_medical/__init__.py
@@ -1,14 +1,11 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import html
 from typing import TYPE_CHECKING
 
-if TYPE_CHECKING:
-	import sqlite3
-	from collections.abc import Iterator
+from .reader import Reader
 
-	from pyglossary.glossary_types import EntryType, GlossaryType
+if TYPE_CHECKING:
 	from pyglossary.option import Option
 
 __all__ = [
@@ -40,50 +37,3 @@
 	"Makindo.co.uk Comprehensive Medical Encyclopedia",
 )
 optionsProp: dict[str, Option] = {}
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-		self._con: sqlite3.Connection | None = None
-		self._cur: sqlite3.Cursor | None = None
-
-	def open(self, filename: str) -> None:
-		from sqlite3 import connect
-
-		self._filename = filename
-		self._con = connect(filename)
-		self._cur = self._con.cursor()
-		self._glos.setDefaultDefiFormat("h")
-
-	def __len__(self) -> int:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute("select count(*) from NEW_TABLE")
-		return self._cur.fetchone()[0]
-
-	def __iter__(self) -> Iterator[EntryType]:
-		if self._cur is None:
-			raise ValueError("cur is None")
-		self._cur.execute(
-			"select _id, contents from NEW_TABLE where _id is not null",
-		)
-		# FIXME: iteration over self._cur stops after one entry
-		# and self._cur.fetchone() returns None
-		# for row in self._cur:
-		for row in self._cur.fetchall():
-			word = html.unescape(row[0])
-			definition = row[1].decode("utf-8", errors="ignore")
-			# print(f"{word!r}, {definition!r}")
-			yield self._glos.newEntry(word, definition, defiFormat="h")
-
-	def close(self) -> None:
-		if self._cur:
-			self._cur.close()
-		if self._con:
-			self._con.close()
-		self._clear()
diff --git a/pyglossary/plugins/makindo_medical/reader.py b/pyglossary/plugins/makindo_medical/reader.py
new file mode 100644
index 000000000..14bb29cd2
--- /dev/null
+++ b/pyglossary/plugins/makindo_medical/reader.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import html
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	import sqlite3
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+		self._con: sqlite3.Connection | None = None
+		self._cur: sqlite3.Cursor | None = None
+
+	def open(self, filename: str) -> None:
+		from sqlite3 import connect
+
+		self._filename = filename
+		self._con = connect(filename)
+		self._cur = self._con.cursor()
+		self._glos.setDefaultDefiFormat("h")
+
+	def __len__(self) -> int:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute("select count(*) from NEW_TABLE")
+		return self._cur.fetchone()[0]
+
+	def __iter__(self) -> Iterator[EntryType]:
+		if self._cur is None:
+			raise ValueError("cur is None")
+		self._cur.execute(
+			"select _id, contents from NEW_TABLE where _id is not null",
+		)
+		# FIXME: iteration over self._cur stops after one entry
+		# and self._cur.fetchone() returns None
+		# for row in self._cur:
+		for row in self._cur.fetchall():
+			word = html.unescape(row[0])
+			definition = row[1].decode("utf-8", errors="ignore")
+			# print(f"{word!r}, {definition!r}")
+			yield self._glos.newEntry(word, definition, defiFormat="h")
+
+	def close(self) -> None:
+		if self._cur:
+			self._cur.close()
+		if self._con:
+			self._con.close()
+		self._clear()
diff --git a/pyglossary/plugins/octopus_mdict_new/__init__.py b/pyglossary/plugins/octopus_mdict_new/__init__.py
index 244609819..bdd3aa239 100644
--- a/pyglossary/plugins/octopus_mdict_new/__init__.py
+++ b/pyglossary/plugins/octopus_mdict_new/__init__.py
@@ -1,43 +1,13 @@
 # -*- coding: utf-8 -*-
-# Read Octopus MDict dictionary format, mdx(dictionary)/mdd(data)
-#
-# Copyright © 2013 Xiaoqiang Wang <xiaoqiangwang AT gmail DOT com>
-# Copyright © 2013-2021 Saeed Rasooli <saeed.gnu@gmail.com>
-#
-# This program is a free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# You can get a copy of GNU General Public License along this program
-# But you can always get it from http://www.gnu.org/licenses/gpl.txt
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
 from __future__ import annotations
 
-import gc
-import os
-import re
-import sys
-from os.path import dirname, extsep, isfile, join, splitext
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-	from collections.abc import Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
-	from pyglossary.plugin_lib.readmdict import MDD, MDX
-
-
-from pyglossary.core import log
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
 	Option,
 )
-from pyglossary.text_utils import toStr
+
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -87,189 +57,3 @@
 then try to install [LZO library and Python binding](./doc/lzo.md).""",
 	),
 ]
-
-
-class Reader:
-	_encoding: str = ""
-	_substyle: bool = True
-	_same_dir_data_files: bool = False
-	_audio: bool = False
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self.clear()
-		self._re_internal_link = re.compile("href=([\"'])(entry://|[dx]:)")
-		self._re_audio_link = re.compile(
-			'<a (type="sound" )?([^<>]*? )?href="sound://([^<>"]+)"( .*?)?>(.*?)</a>',
-		)
-
-	def clear(self) -> None:
-		self._filename = ""
-		self._mdx: MDX | None = None
-		self._mdd: list[MDD] = []
-		self._wordCount = 0
-		self._dataEntryCount = 0
-
-		# dict of mainWord -> newline-separated alternatives
-		self._linksDict: dict[str, str] = {}
-
-	def open(self, filename: str) -> None:
-		from pyglossary.plugin_lib.readmdict import MDD, MDX
-
-		self._filename = filename
-		self._mdx = MDX(filename, self._encoding, self._substyle)
-
-		"""
-			multiple MDD files are supported with this naming schema:
-				FILE.mdx
-				FILE.mdd
-				FILE.1.mdd
-				FILE.2.mdd
-				FILE.3.mdd
-		"""
-
-		filenameNoExt, _ext = splitext(self._filename)
-		mddBase = filenameNoExt + extsep
-		for fname in (f"{mddBase}mdd", f"{mddBase}1.mdd"):
-			if isfile(fname):
-				self._mdd.append(MDD(fname))
-		mddN = 2
-		while isfile(f"{mddBase}{mddN}.mdd"):
-			self._mdd.append(MDD(f"{mddBase}{mddN}.mdd"))
-			mddN += 1
-
-		dataEntryCount = 0
-		for mdd in self._mdd:
-			dataEntryCount += len(mdd)
-		self._dataEntryCount = dataEntryCount
-		log.info(f"Found {len(self._mdd)} mdd files with {dataEntryCount} entries")
-
-		# from pprint import pformat
-		# log.debug("mdx.header = " + pformat(self._mdx.header))
-		# for key, value in self._mdx.header.items():
-		# 	key = key.lower()
-		# 	self._glos.setInfo(key, value)
-		try:
-			title = toStr(self._mdx.header[b"Title"])
-		except KeyError:
-			pass
-		else:
-			title = title.strip()
-			if title == "Title (No HTML code allowed)":
-				# TODO: how to avoid this?
-				title = ""
-			if title:
-				self._glos.setInfo("name", title)
-		desc = toStr(self._mdx.header.get(b"Description", ""))
-		if desc:
-			self._glos.setInfo("description", desc)
-
-		self.loadLinks()
-
-	def loadLinks(self) -> None:
-		from pyglossary.plugin_lib.readmdict import MDX
-
-		mdx = self._mdx
-		if mdx is None:
-			raise ValueError("mdx is None")
-
-		log.info("extracting links...")
-		linksDict: dict[str, str] = {}
-		word = ""
-		wordCount = 0
-		for b_word, b_defi in mdx.items():
-			word = b_word.decode("utf-8")
-			defi = b_defi.decode("utf-8").strip()
-			if defi.startswith("@@@LINK="):
-				if not word:
-					log.warning(f"unexpected defi: {defi}")
-					continue
-				mainWord = defi[8:]
-				if mainWord in linksDict:
-					linksDict[mainWord] += "\n" + word
-				else:
-					linksDict[mainWord] = word
-				continue
-			wordCount += 1
-
-		log.info(
-			f"extracting links done, sizeof(linksDict)={sys.getsizeof(linksDict)}",
-		)
-		log.info(f"{wordCount = }")
-		self._linksDict = linksDict
-		self._wordCount = wordCount
-		self._mdx = MDX(self._filename, self._encoding, self._substyle)
-
-	def fixDefi(self, defi: str) -> str:
-		defi = self._re_internal_link.sub(r"href=\1bword://", defi)
-		defi = defi.replace(' src="file://', ' src=".')
-
-		if self._audio:
-			# \5 is the possible elements between <a ...> and </a>
-			# but anything between <audio...> and </audio> is completely
-			# ignored by Aaard2 Web and browser
-			# and there is no point adding it after </audio>
-			# which makes it shown after audio controls
-
-			# GoldenDict acts completely different, so must use
-			# audio_goldendict=True option in StarDict writer instead.
-
-			defi = self._re_audio_link.sub(
-				r'<audio controls src="\3"></audio>',
-				defi,
-			)
-
-		return defi
-
-	def __iter__(self) -> Iterator[EntryType]:
-		if self._mdx is None:
-			log.error("trying to iterate on a closed MDX file")
-			return
-
-		glos = self._glos
-		linksDict = self._linksDict
-		for b_word, b_defi in self._mdx.items():
-			word = b_word.decode("utf-8")
-			defi = b_defi.decode("utf-8").strip()
-			if defi.startswith("@@@LINK="):
-				continue
-			defi = self.fixDefi(defi)
-			words = word
-			altsStr = linksDict.get(word, "")
-			if altsStr:
-				words = [word] + altsStr.split("\n")
-			yield glos.newEntry(words, defi)
-
-		self._mdx = None
-		del linksDict
-		self._linksDict = {}
-		gc.collect()
-
-		if self._same_dir_data_files:
-			dirPath = dirname(self._filename)
-			for fname in os.listdir(dirPath):
-				ext = splitext(fname)[1].lower()
-				if ext in {".mdx", ".mdd"}:
-					continue
-				fpath = join(dirPath, fname)
-				if not isfile(fpath):
-					continue
-				with open(fpath, mode="rb") as _file:
-					b_data = _file.read()
-				yield glos.newDataEntry(fname, b_data)
-
-		for mdd in self._mdd:
-			try:
-				for b_fname, b_data in mdd.items():
-					fname = toStr(b_fname)
-					fname = fname.replace("\\", os.sep).lstrip(os.sep)
-					yield glos.newDataEntry(fname, b_data)
-			except Exception:  # noqa: PERF203
-				log.exception(f"Error reading {mdd.filename}")
-		self._mdd = []
-
-	def __len__(self) -> int:
-		return self._wordCount + self._dataEntryCount
-
-	def close(self) -> None:
-		self.clear()
diff --git a/pyglossary/plugins/octopus_mdict_new/reader.py b/pyglossary/plugins/octopus_mdict_new/reader.py
new file mode 100644
index 000000000..f154200dc
--- /dev/null
+++ b/pyglossary/plugins/octopus_mdict_new/reader.py
@@ -0,0 +1,221 @@
+# -*- coding: utf-8 -*-
+# Read Octopus MDict dictionary format, mdx(dictionary)/mdd(data)
+#
+# Copyright © 2013 Xiaoqiang Wang <xiaoqiangwang AT gmail DOT com>
+# Copyright © 2013-2021 Saeed Rasooli <saeed.gnu@gmail.com>
+#
+# This program is a free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# You can get a copy of GNU General Public License along this program
+# But you can always get it from http://www.gnu.org/licenses/gpl.txt
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+from __future__ import annotations
+
+import gc
+import os
+import re
+import sys
+from os.path import dirname, extsep, isfile, join, splitext
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+	from pyglossary.plugin_lib.readmdict import MDD, MDX
+
+
+from pyglossary.core import log
+from pyglossary.text_utils import toStr
+
+
+class Reader:
+	_encoding: str = ""
+	_substyle: bool = True
+	_same_dir_data_files: bool = False
+	_audio: bool = False
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self.clear()
+		self._re_internal_link = re.compile("href=([\"'])(entry://|[dx]:)")
+		self._re_audio_link = re.compile(
+			'<a (type="sound" )?([^<>]*? )?href="sound://([^<>"]+)"( .*?)?>(.*?)</a>',
+		)
+
+	def clear(self) -> None:
+		self._filename = ""
+		self._mdx: MDX | None = None
+		self._mdd: list[MDD] = []
+		self._wordCount = 0
+		self._dataEntryCount = 0
+
+		# dict of mainWord -> newline-separated alternatives
+		self._linksDict: dict[str, str] = {}
+
+	def open(self, filename: str) -> None:
+		from pyglossary.plugin_lib.readmdict import MDD, MDX
+
+		self._filename = filename
+		self._mdx = MDX(filename, self._encoding, self._substyle)
+
+		"""
+			multiple MDD files are supported with this naming schema:
+				FILE.mdx
+				FILE.mdd
+				FILE.1.mdd
+				FILE.2.mdd
+				FILE.3.mdd
+		"""
+
+		filenameNoExt, _ext = splitext(self._filename)
+		mddBase = filenameNoExt + extsep
+		for fname in (f"{mddBase}mdd", f"{mddBase}1.mdd"):
+			if isfile(fname):
+				self._mdd.append(MDD(fname))
+		mddN = 2
+		while isfile(f"{mddBase}{mddN}.mdd"):
+			self._mdd.append(MDD(f"{mddBase}{mddN}.mdd"))
+			mddN += 1
+
+		dataEntryCount = 0
+		for mdd in self._mdd:
+			dataEntryCount += len(mdd)
+		self._dataEntryCount = dataEntryCount
+		log.info(f"Found {len(self._mdd)} mdd files with {dataEntryCount} entries")
+
+		# from pprint import pformat
+		# log.debug("mdx.header = " + pformat(self._mdx.header))
+		# for key, value in self._mdx.header.items():
+		# 	key = key.lower()
+		# 	self._glos.setInfo(key, value)
+		try:
+			title = toStr(self._mdx.header[b"Title"])
+		except KeyError:
+			pass
+		else:
+			title = title.strip()
+			if title == "Title (No HTML code allowed)":
+				# TODO: how to avoid this?
+				title = ""
+			if title:
+				self._glos.setInfo("name", title)
+		desc = toStr(self._mdx.header.get(b"Description", ""))
+		if desc:
+			self._glos.setInfo("description", desc)
+
+		self.loadLinks()
+
+	def loadLinks(self) -> None:
+		from pyglossary.plugin_lib.readmdict import MDX
+
+		mdx = self._mdx
+		if mdx is None:
+			raise ValueError("mdx is None")
+
+		log.info("extracting links...")
+		linksDict: dict[str, str] = {}
+		word = ""
+		wordCount = 0
+		for b_word, b_defi in mdx.items():
+			word = b_word.decode("utf-8")
+			defi = b_defi.decode("utf-8").strip()
+			if defi.startswith("@@@LINK="):
+				if not word:
+					log.warning(f"unexpected defi: {defi}")
+					continue
+				mainWord = defi[8:]
+				if mainWord in linksDict:
+					linksDict[mainWord] += "\n" + word
+				else:
+					linksDict[mainWord] = word
+				continue
+			wordCount += 1
+
+		log.info(
+			f"extracting links done, sizeof(linksDict)={sys.getsizeof(linksDict)}",
+		)
+		log.info(f"{wordCount = }")
+		self._linksDict = linksDict
+		self._wordCount = wordCount
+		self._mdx = MDX(self._filename, self._encoding, self._substyle)
+
+	def fixDefi(self, defi: str) -> str:
+		defi = self._re_internal_link.sub(r"href=\1bword://", defi)
+		defi = defi.replace(' src="file://', ' src=".')
+
+		if self._audio:
+			# \5 is the possible elements between <a ...> and </a>
+			# but anything between <audio...> and </audio> is completely
+			# ignored by Aaard2 Web and browser
+			# and there is no point adding it after </audio>
+			# which makes it shown after audio controls
+
+			# GoldenDict acts completely different, so must use
+			# audio_goldendict=True option in StarDict writer instead.
+
+			defi = self._re_audio_link.sub(
+				r'<audio controls src="\3"></audio>',
+				defi,
+			)
+
+		return defi
+
+	def __iter__(self) -> Iterator[EntryType]:
+		if self._mdx is None:
+			log.error("trying to iterate on a closed MDX file")
+			return
+
+		glos = self._glos
+		linksDict = self._linksDict
+		for b_word, b_defi in self._mdx.items():
+			word = b_word.decode("utf-8")
+			defi = b_defi.decode("utf-8").strip()
+			if defi.startswith("@@@LINK="):
+				continue
+			defi = self.fixDefi(defi)
+			words = word
+			altsStr = linksDict.get(word, "")
+			if altsStr:
+				words = [word] + altsStr.split("\n")
+			yield glos.newEntry(words, defi)
+
+		self._mdx = None
+		del linksDict
+		self._linksDict = {}
+		gc.collect()
+
+		if self._same_dir_data_files:
+			dirPath = dirname(self._filename)
+			for fname in os.listdir(dirPath):
+				ext = splitext(fname)[1].lower()
+				if ext in {".mdx", ".mdd"}:
+					continue
+				fpath = join(dirPath, fname)
+				if not isfile(fpath):
+					continue
+				with open(fpath, mode="rb") as _file:
+					b_data = _file.read()
+				yield glos.newDataEntry(fname, b_data)
+
+		for mdd in self._mdd:
+			try:
+				for b_fname, b_data in mdd.items():
+					fname = toStr(b_fname)
+					fname = fname.replace("\\", os.sep).lstrip(os.sep)
+					yield glos.newDataEntry(fname, b_data)
+			except Exception:  # noqa: PERF203
+				log.exception(f"Error reading {mdd.filename}")
+		self._mdd = []
+
+	def __len__(self) -> int:
+		return self._wordCount + self._dataEntryCount
+
+	def close(self) -> None:
+		self.clear()
diff --git a/pyglossary/plugins/sql/__init__.py b/pyglossary/plugins/sql/__init__.py
index fce4cfb56..c0629c979 100644
--- a/pyglossary/plugins/sql/__init__.py
+++ b/pyglossary/plugins/sql/__init__.py
@@ -1,8 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
@@ -11,11 +9,7 @@
 	Option,
 )
 
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .writer import Writer
 
 __all__ = [
 	"Writer",
@@ -49,133 +43,3 @@
 	"newline": NewlineOption(),
 	"transaction": BoolOption(comment="Use TRANSACTION"),
 }
-
-
-class Writer:
-	_encoding: str = "utf-8"
-	_info_keys: list | None = None
-	_add_extra_info: bool = True
-	_newline: str = "<br>"
-	_transaction: bool = False
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._file: io.IOBase | None = None
-
-	def finish(self) -> None:
-		self._filename = ""
-		if self._file:
-			self._file.close()
-			self._file = None
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-		self._file = open(filename, "w", encoding=self._encoding)
-		self._writeInfo()
-
-	def _writeInfo(self) -> None:
-		fileObj = self._file
-		if fileObj is None:
-			raise ValueError("fileObj is None")
-		newline = self._newline
-		info_keys = self._getInfoKeys()
-		infoDefLine = "CREATE TABLE dbinfo ("
-		infoValues: list[str] = []
-		glos = self._glos
-
-		for key in info_keys:
-			value = glos.getInfo(key)
-			value = (
-				value.replace("'", "''")
-				.replace("\x00", "")
-				.replace("\r", "")
-				.replace("\n", newline)
-			)
-			infoValues.append(f"'{value}'")
-			infoDefLine += f"{key} char({len(value)}), "
-
-		infoDefLine = infoDefLine[:-2] + ");"
-		fileObj.write(infoDefLine + "\n")
-
-		if self._add_extra_info:
-			fileObj.write(
-				"CREATE TABLE dbinfo_extra ("
-				"'id' INTEGER PRIMARY KEY NOT NULL, "
-				"'name' TEXT UNIQUE, 'value' TEXT);\n",
-			)
-
-		fileObj.write(
-			"CREATE TABLE word ('id' INTEGER PRIMARY KEY NOT NULL, "
-			"'w' TEXT, 'm' TEXT);\n",
-		)
-		fileObj.write(
-			"CREATE TABLE alt ('id' INTEGER NOT NULL, 'w' TEXT);\n",
-		)
-
-		if self._transaction:
-			fileObj.write("BEGIN TRANSACTION;\n")
-		fileObj.write(f"INSERT INTO dbinfo VALUES({','.join(infoValues)});\n")
-
-		if self._add_extra_info:
-			extraInfo = glos.getExtraInfos(info_keys)
-			for index, (key, value) in enumerate(extraInfo.items()):
-				key2 = key.replace("'", "''")
-				value2 = value.replace("'", "''")
-				fileObj.write(
-					f"INSERT INTO dbinfo_extra VALUES({index + 1}, "
-					f"'{key2}', '{value2}');\n",
-				)
-
-	def _getInfoKeys(self) -> list[str]:
-		info_keys = self._info_keys
-		if info_keys:
-			return info_keys
-		return [
-			"dbname",
-			"author",
-			"version",
-			"direction",
-			"origLang",
-			"destLang",
-			"license",
-			"category",
-			"description",
-		]
-
-	def write(self) -> Generator[None, EntryType, None]:
-		newline = self._newline
-
-		fileObj = self._file
-		if fileObj is None:
-			raise ValueError("fileObj is None")
-
-		def fixStr(word: str) -> str:
-			return word.replace("'", "''").replace("\r", "").replace("\n", newline)
-
-		id_ = 1
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				# FIXME
-				continue
-			words = entry.l_word
-			word = fixStr(words[0])
-			defi = fixStr(entry.defi)
-			fileObj.write(
-				f"INSERT INTO word VALUES({id_}, '{word}', '{defi}');\n",
-			)
-			for alt in words[1:]:
-				fileObj.write(
-					f"INSERT INTO alt VALUES({id_}, '{fixStr(alt)}');\n",
-				)
-			id_ += 1
-
-		if self._transaction:
-			fileObj.write("END TRANSACTION;\n")
-
-		fileObj.write("CREATE INDEX ix_word_w ON word(w COLLATE NOCASE);\n")
-		fileObj.write("CREATE INDEX ix_alt_id ON alt(id COLLATE NOCASE);\n")
-		fileObj.write("CREATE INDEX ix_alt_w ON alt(w COLLATE NOCASE);\n")
diff --git a/pyglossary/plugins/sql/writer.py b/pyglossary/plugins/sql/writer.py
new file mode 100644
index 000000000..64350fc5a
--- /dev/null
+++ b/pyglossary/plugins/sql/writer.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Writer:
+	_encoding: str = "utf-8"
+	_info_keys: list | None = None
+	_add_extra_info: bool = True
+	_newline: str = "<br>"
+	_transaction: bool = False
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._file: io.IOBase | None = None
+
+	def finish(self) -> None:
+		self._filename = ""
+		if self._file:
+			self._file.close()
+			self._file = None
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+		self._file = open(filename, "w", encoding=self._encoding)
+		self._writeInfo()
+
+	def _writeInfo(self) -> None:
+		fileObj = self._file
+		if fileObj is None:
+			raise ValueError("fileObj is None")
+		newline = self._newline
+		info_keys = self._getInfoKeys()
+		infoDefLine = "CREATE TABLE dbinfo ("
+		infoValues: list[str] = []
+		glos = self._glos
+
+		for key in info_keys:
+			value = glos.getInfo(key)
+			value = (
+				value.replace("'", "''")
+				.replace("\x00", "")
+				.replace("\r", "")
+				.replace("\n", newline)
+			)
+			infoValues.append(f"'{value}'")
+			infoDefLine += f"{key} char({len(value)}), "
+
+		infoDefLine = infoDefLine[:-2] + ");"
+		fileObj.write(infoDefLine + "\n")
+
+		if self._add_extra_info:
+			fileObj.write(
+				"CREATE TABLE dbinfo_extra ("
+				"'id' INTEGER PRIMARY KEY NOT NULL, "
+				"'name' TEXT UNIQUE, 'value' TEXT);\n",
+			)
+
+		fileObj.write(
+			"CREATE TABLE word ('id' INTEGER PRIMARY KEY NOT NULL, "
+			"'w' TEXT, 'm' TEXT);\n",
+		)
+		fileObj.write(
+			"CREATE TABLE alt ('id' INTEGER NOT NULL, 'w' TEXT);\n",
+		)
+
+		if self._transaction:
+			fileObj.write("BEGIN TRANSACTION;\n")
+		fileObj.write(f"INSERT INTO dbinfo VALUES({','.join(infoValues)});\n")
+
+		if self._add_extra_info:
+			extraInfo = glos.getExtraInfos(info_keys)
+			for index, (key, value) in enumerate(extraInfo.items()):
+				key2 = key.replace("'", "''")
+				value2 = value.replace("'", "''")
+				fileObj.write(
+					f"INSERT INTO dbinfo_extra VALUES({index + 1}, "
+					f"'{key2}', '{value2}');\n",
+				)
+
+	def _getInfoKeys(self) -> list[str]:
+		info_keys = self._info_keys
+		if info_keys:
+			return info_keys
+		return [
+			"dbname",
+			"author",
+			"version",
+			"direction",
+			"origLang",
+			"destLang",
+			"license",
+			"category",
+			"description",
+		]
+
+	def write(self) -> Generator[None, EntryType, None]:
+		newline = self._newline
+
+		fileObj = self._file
+		if fileObj is None:
+			raise ValueError("fileObj is None")
+
+		def fixStr(word: str) -> str:
+			return word.replace("'", "''").replace("\r", "").replace("\n", newline)
+
+		id_ = 1
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				# FIXME
+				continue
+			words = entry.l_word
+			word = fixStr(words[0])
+			defi = fixStr(entry.defi)
+			fileObj.write(
+				f"INSERT INTO word VALUES({id_}, '{word}', '{defi}');\n",
+			)
+			for alt in words[1:]:
+				fileObj.write(
+					f"INSERT INTO alt VALUES({id_}, '{fixStr(alt)}');\n",
+				)
+			id_ += 1
+
+		if self._transaction:
+			fileObj.write("END TRANSACTION;\n")
+
+		fileObj.write("CREATE INDEX ix_word_w ON word(w COLLATE NOCASE);\n")
+		fileObj.write("CREATE INDEX ix_alt_id ON alt(id COLLATE NOCASE);\n")
+		fileObj.write("CREATE INDEX ix_alt_w ON alt(w COLLATE NOCASE);\n")
diff --git a/pyglossary/plugins/stardict_merge_syns/__init__.py b/pyglossary/plugins/stardict_merge_syns/__init__.py
index b13cb423f..d1ef62fc7 100644
--- a/pyglossary/plugins/stardict_merge_syns/__init__.py
+++ b/pyglossary/plugins/stardict_merge_syns/__init__.py
@@ -2,10 +2,6 @@
 from __future__ import annotations
 
 import os
-from time import perf_counter as now
-from typing import (
-	TYPE_CHECKING,
-)
 
 from pyglossary.flags import ALWAYS, DEFAULT_YES
 from pyglossary.option import (
@@ -13,17 +9,8 @@
 	Option,
 	StrOption,
 )
-from pyglossary.plugins.stardict import Writer as StdWriter
-
-if TYPE_CHECKING:
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import EntryType
 
-
-from pyglossary.core import log
-from pyglossary.glossary_utils import Error
-from pyglossary.text_utils import uint32ToBytes
+from .writer import Writer
 
 __all__ = [
 	"Writer",
@@ -97,121 +84,3 @@
 
 if os.getenv("PYGLOSSARY_STARDICT_NO_FORCE_SORT") == "1":
 	sortOnWrite = DEFAULT_YES
-
-
-class Writer(StdWriter):
-	dictzipSynFile = False
-
-	def fixDefi(self, defi: str, defiFormat: str) -> bytes:  # noqa: ARG002, PLR6301
-		return defi.encode("utf-8")
-
-	def writeCompact(
-		self,
-		defiFormat: str,
-	) -> Generator[None, EntryType, None]:
-		"""
-		Build StarDict dictionary with sametypesequence option specified.
-		Every item definition consists of a single article.
-		All articles have the same format, specified in defiFormat parameter.
-
-		defiFormat - format of article definition: h - html, m - plain text
-		"""
-		log.debug(f"writeCompact: {defiFormat=}")
-
-		idxBlockList = self.newIdxList()
-		altIndexList = self.newSynList()
-
-		dictFile = open(self._filename + ".dict", "wb")
-
-		t0 = now()
-
-		dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc()
-
-		dictMark, entryIndex = 0, -1
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				entry.save(self._resDir)
-				continue
-			entryIndex += 1
-
-			b_dictBlock = self.fixDefi(entry.defi, defiFormat)
-			dictFile.write(b_dictBlock)
-
-			b_idxBlock = dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock))
-			for b_word in entry.lb_word:
-				idxBlockList.append((b_word, b_idxBlock))
-
-			dictMark += len(b_dictBlock)
-
-			if dictMark > dictMarkMax:
-				raise Error(
-					f"StarDict: {dictMark = } is too big, set option large_file=true",
-				)
-
-		dictFile.close()
-		log.info(f"Writing dict file took {now() - t0:.2f} seconds")
-
-		self.writeIdxFile(idxBlockList)
-
-		self.writeIfoFile(
-			len(idxBlockList),
-			len(altIndexList),
-		)
-
-	def writeGeneral(self) -> Generator[None, EntryType, None]:
-		"""
-		Build StarDict dictionary in general case.
-		Every item definition may consist of an arbitrary number of articles.
-		sametypesequence option is not used.
-		"""
-		log.debug("writeGeneral")
-		idxBlockList = self.newIdxList()
-		altIndexList = self.newSynList()
-
-		dictFile = open(self._filename + ".dict", "wb")
-
-		t0 = now()
-
-		dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc()
-
-		dictMark, entryIndex = 0, -1
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				entry.save(self._resDir)
-				continue
-			entryIndex += 1
-
-			defiFormat = entry.detectDefiFormat("m")  # call no more than once
-
-			b_defi = self.fixDefi(entry.defi, defiFormat)
-			b_dictBlock = defiFormat.encode("ascii") + b_defi + b"\x00"
-			dictFile.write(b_dictBlock)
-
-			b_idxBlock = dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock))
-			for b_word in entry.lb_word:
-				idxBlockList.append((b_word, b_idxBlock))
-
-			dictMark += len(b_dictBlock)
-
-			if dictMark > dictMarkMax:
-				raise Error(
-					f"StarDict: {dictMark = } is too big, set option large_file=true",
-				)
-
-		dictFile.close()
-		log.info(f"Writing dict file took {now() - t0:.2f} seconds")
-
-		self.writeIdxFile(idxBlockList)
-
-		self.writeIfoFile(
-			len(idxBlockList),
-			len(altIndexList),
-		)
-
-	# TODO: override getDescription to indicate merge_syns
diff --git a/pyglossary/plugins/stardict_merge_syns/writer.py b/pyglossary/plugins/stardict_merge_syns/writer.py
new file mode 100644
index 000000000..ba0349d04
--- /dev/null
+++ b/pyglossary/plugins/stardict_merge_syns/writer.py
@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+from time import perf_counter as now
+from typing import (
+	TYPE_CHECKING,
+)
+
+from pyglossary.plugins.stardict import Writer as StdWriter
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType
+
+
+from pyglossary.core import log
+from pyglossary.glossary_utils import Error
+from pyglossary.text_utils import uint32ToBytes
+
+
+class Writer(StdWriter):
+	dictzipSynFile = False
+
+	def fixDefi(self, defi: str, defiFormat: str) -> bytes:  # noqa: ARG002, PLR6301
+		return defi.encode("utf-8")
+
+	def writeCompact(
+		self,
+		defiFormat: str,
+	) -> Generator[None, EntryType, None]:
+		"""
+		Build StarDict dictionary with sametypesequence option specified.
+		Every item definition consists of a single article.
+		All articles have the same format, specified in defiFormat parameter.
+
+		defiFormat - format of article definition: h - html, m - plain text
+		"""
+		log.debug(f"writeCompact: {defiFormat=}")
+
+		idxBlockList = self.newIdxList()
+		altIndexList = self.newSynList()
+
+		dictFile = open(self._filename + ".dict", "wb")
+
+		t0 = now()
+
+		dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc()
+
+		dictMark, entryIndex = 0, -1
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				entry.save(self._resDir)
+				continue
+			entryIndex += 1
+
+			b_dictBlock = self.fixDefi(entry.defi, defiFormat)
+			dictFile.write(b_dictBlock)
+
+			b_idxBlock = dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock))
+			for b_word in entry.lb_word:
+				idxBlockList.append((b_word, b_idxBlock))
+
+			dictMark += len(b_dictBlock)
+
+			if dictMark > dictMarkMax:
+				raise Error(
+					f"StarDict: {dictMark = } is too big, set option large_file=true",
+				)
+
+		dictFile.close()
+		log.info(f"Writing dict file took {now() - t0:.2f} seconds")
+
+		self.writeIdxFile(idxBlockList)
+
+		self.writeIfoFile(
+			len(idxBlockList),
+			len(altIndexList),
+		)
+
+	def writeGeneral(self) -> Generator[None, EntryType, None]:
+		"""
+		Build StarDict dictionary in general case.
+		Every item definition may consist of an arbitrary number of articles.
+		sametypesequence option is not used.
+		"""
+		log.debug("writeGeneral")
+		idxBlockList = self.newIdxList()
+		altIndexList = self.newSynList()
+
+		dictFile = open(self._filename + ".dict", "wb")
+
+		t0 = now()
+
+		dictMarkToBytes, dictMarkMax = self.dictMarkToBytesFunc()
+
+		dictMark, entryIndex = 0, -1
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				entry.save(self._resDir)
+				continue
+			entryIndex += 1
+
+			defiFormat = entry.detectDefiFormat("m")  # call no more than once
+
+			b_defi = self.fixDefi(entry.defi, defiFormat)
+			b_dictBlock = defiFormat.encode("ascii") + b_defi + b"\x00"
+			dictFile.write(b_dictBlock)
+
+			b_idxBlock = dictMarkToBytes(dictMark) + uint32ToBytes(len(b_dictBlock))
+			for b_word in entry.lb_word:
+				idxBlockList.append((b_word, b_idxBlock))
+
+			dictMark += len(b_dictBlock)
+
+			if dictMark > dictMarkMax:
+				raise Error(
+					f"StarDict: {dictMark = } is too big, set option large_file=true",
+				)
+
+		dictFile.close()
+		log.info(f"Writing dict file took {now() - t0:.2f} seconds")
+
+		self.writeIdxFile(idxBlockList)
+
+		self.writeIfoFile(
+			len(idxBlockList),
+			len(altIndexList),
+		)
+
+	# TODO: override getDescription to indicate merge_syns
diff --git a/pyglossary/plugins/stardict_textual/__init__.py b/pyglossary/plugins/stardict_textual/__init__.py
index a54d04266..80dc78d69 100644
--- a/pyglossary/plugins/stardict_textual/__init__.py
+++ b/pyglossary/plugins/stardict_textual/__init__.py
@@ -1,34 +1,15 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import os
-from os.path import dirname, isdir, join
-from typing import TYPE_CHECKING, cast
-
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Generator, Iterator
-
-	from lxml import builder
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
-	from pyglossary.lxml_types import Element
-	from pyglossary.xdxf.transform import XdxfTransformer
-
-
-from pyglossary.compression import (
-	compressionOpen,
-	stdCompressions,
-)
-from pyglossary.core import exc_note, log, pip
-from pyglossary.html_utils import unescape_unicode
-from pyglossary.io_utils import nullBinaryIO
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
 	Option,
 )
 
+from .reader import Reader
+from .writer import Writer
+
 __all__ = [
 	"Reader",
 	"Writer",
@@ -66,337 +47,3 @@
 		comment="Convert XDXF entries to HTML",
 	),
 }
-
-
-class Reader:
-	_encoding: str = "utf-8"
-	_xdxf_to_html: bool = True
-
-	compressions = stdCompressions
-	depends = {
-		"lxml": "lxml",
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._file: io.IOBase = nullBinaryIO
-		self._fileSize = 0
-		self._xdxfTr: XdxfTransformer | None = None
-
-	def xdxf_setup(self) -> XdxfTransformer:
-		from pyglossary.xdxf.transform import XdxfTransformer
-
-		self._xdxfTr = tr = XdxfTransformer(encoding="utf-8")
-		return tr
-
-	def xdxf_transform(self, text: str) -> str:
-		tr = self._xdxfTr
-		if tr is None:
-			tr = self.xdxf_setup()
-		return tr.transformByInnerString(text)
-
-	def __len__(self) -> int:
-		return 0
-
-	def close(self) -> None:
-		self._file.close()
-		self._file = nullBinaryIO
-		self._filename = ""
-		self._fileSize = 0
-
-	def open(self, filename: str) -> None:
-		try:
-			from lxml import etree as ET
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install lxml` to install")
-			raise
-
-		self._filename = filename
-		cfile = compressionOpen(filename, mode="rb")
-
-		if cfile.seekable():
-			cfile.seek(0, 2)
-			self._fileSize = cfile.tell()
-			cfile.seek(0)
-			# self._glos.setInfo("input_file_size", f"{self._fileSize}")
-		else:
-			log.warning("StarDict Textual File Reader: file is not seekable")
-
-		context = ET.iterparse(  # type: ignore # noqa: PGH003
-			cfile,
-			events=("end",),
-			tag="info",
-		)
-		for _, elem in context:
-			self.setMetadata(elem)  # type: ignore
-			break
-
-		cfile.close()
-
-	def setGlosInfo(self, key: str, value: str) -> None:
-		if value is None:
-			return
-		self._glos.setInfo(key, unescape_unicode(value))
-
-	def setMetadata(self, header: Element) -> None:
-		if (elem := header.find("./bookname")) is not None and elem.text:
-			self.setGlosInfo("name", elem.text)
-
-		if (elem := header.find("./author")) is not None and elem.text:
-			self.setGlosInfo("author", elem.text)
-
-		if (elem := header.find("./email")) is not None and elem.text:
-			self.setGlosInfo("email", elem.text)
-
-		if (elem := header.find("./website")) is not None and elem.text:
-			self.setGlosInfo("website", elem.text)
-
-		if (elem := header.find("./description")) is not None and elem.text:
-			self.setGlosInfo("description", elem.text)
-
-		if (elem := header.find("./bookname")) is not None and elem.text:
-			self.setGlosInfo("name", elem.text)
-
-		if (elem := header.find("./bookname")) is not None and elem.text:
-			self.setGlosInfo("name", elem.text)
-
-		if (elem := header.find("./date")) is not None and elem.text:
-			self.setGlosInfo("creationTime", elem.text)
-
-		# if (elem := header.find("./dicttype")) is not None and elem.text:
-		# 	self.setGlosInfo("dicttype", elem.text)
-
-	def renderDefiList(
-		self,
-		defisWithFormat: list[tuple[str, str]],
-	) -> tuple[str, str]:
-		if not defisWithFormat:
-			return "", ""
-		if len(defisWithFormat) == 1:
-			return defisWithFormat[0]
-
-		defiFormatSet: set[str] = set()
-		defiFormatSet.update(_type for _, _type in defisWithFormat)
-
-		if len(defiFormatSet) == 1:
-			format_ = defiFormatSet.pop()
-			if format_ == "h":
-				return "\n<hr>".join([defi for defi, _ in defisWithFormat]), format_
-			return "\n".join([defi for defi, _ in defisWithFormat]), format_
-
-		# convert plaintext or xdxf to html
-		defis: list[str] = []
-		for defi_, format_ in defisWithFormat:
-			if format_ == "m":
-				defis.append("<pre>" + defi_.replace("\n", "<br/>") + "</pre>")
-			elif format_ == "x":
-				defis.append(self.xdxf_transform(defi_))
-			else:
-				defis.append(defi_)
-		return "\n<hr>\n".join(defis), "h"
-
-	def __iter__(self) -> Iterator[EntryType]:
-		from lxml import etree as ET
-
-		glos = self._glos
-		fileSize = self._fileSize
-		self._file = file = compressionOpen(self._filename, mode="rb")
-		context = ET.iterparse(  # type: ignore # noqa: PGH003
-			self._file,
-			events=("end",),
-			tag="article",
-		)
-		for _, _elem in context:
-			elem = cast("Element", _elem)
-			words: list[str] = []
-			defisWithFormat: list[tuple[str, str]] = []
-			for child in elem.iterchildren():
-				if not child.text:
-					continue
-				if child.tag in {"key", "synonym"}:
-					words.append(child.text)
-				elif child.tag == "definition":
-					type_ = child.attrib.get("type", "")
-					if type_:
-						new_type = {
-							"m": "m",
-							"t": "m",
-							"y": "m",
-							"g": "h",
-							"h": "h",
-							"x": "x",
-						}.get(type_, "")
-						if not new_type:
-							log.warning(f"unsupported definition type {type_}")
-						type_ = new_type
-					if not type_:
-						type_ = "m"
-					defi_ = child.text.strip()
-					if type_ == "x" and self._xdxf_to_html:
-						defi_ = self.xdxf_transform(defi_)
-						type_ = "h"
-					defisWithFormat.append((defi_, type_))
-				# TODO: child.tag == "definition-r"
-				else:
-					log.warning(f"unknown tag {child.tag}")
-
-			defi, defiFormat = self.renderDefiList(defisWithFormat)
-
-			yield glos.newEntry(
-				words,
-				defi,
-				defiFormat=defiFormat,
-				byteProgress=(file.tell(), fileSize),
-			)
-
-			# clean up preceding siblings to save memory
-			# this can reduce memory usage from >300 MB to ~25 MB
-			while elem.getprevious() is not None:
-				parent = elem.getparent()
-				if parent is None:
-					break
-				del parent[0]
-
-
-class Writer:
-	_encoding: str = "utf-8"
-
-	compressions = stdCompressions
-	depends = {
-		"lxml": "lxml",
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._resDir = ""
-
-	def open(
-		self,
-		filename: str,
-	) -> None:
-		self._filename = filename
-		self._resDir = join(dirname(self._filename), "res")
-		self._file = compressionOpen(
-			self._filename,
-			mode="w",
-			encoding=self._encoding,
-		)
-
-	def finish(self) -> None:
-		self._file.close()
-
-	def writeInfo(
-		self,
-		maker: builder.ElementMaker,
-		pretty: bool,
-	) -> None:
-		from lxml import etree as ET
-
-		glos = self._glos
-
-		desc = glos.getInfo("description")
-		copyright_ = glos.getInfo("copyright")
-		if copyright_:
-			desc = f"{copyright_}\n{desc}"
-		publisher = glos.getInfo("publisher")
-		if publisher:
-			desc = f"Publisher: {publisher}\n{desc}"
-
-		info = maker.info(
-			maker.version("3.0.0"),
-			maker.bookname(glos.getInfo("name")),
-			maker.author(glos.getInfo("author")),
-			maker.email(glos.getInfo("email")),
-			maker.website(glos.getInfo("website")),
-			maker.description(desc),
-			maker.date(glos.getInfo("creationTime")),
-			maker.dicttype(""),
-		)
-		file = self._file
-		file.write(
-			cast(
-				"bytes",
-				ET.tostring(
-					info,
-					encoding=self._encoding,
-					pretty_print=pretty,
-				),
-			).decode(self._encoding)
-			+ "\n",
-		)
-
-	def writeDataEntry(
-		self,
-		maker: builder.ElementMaker,  # noqa: ARG002
-		entry: EntryType,
-	) -> None:
-		entry.save(self._resDir)
-		# TODO: create article tag with "definition-r" in it?
-		# or just save the file to res/ directory? or both?
-		# article = maker.article(
-		# 	maker.key(entry.s_word),
-		# 	maker.definition_r(
-		# 		ET.CDATA(entry.defi),
-		# 		**{"type": ext})
-		# 	)
-		# )
-
-	def write(self) -> Generator[None, EntryType, None]:
-		from lxml import builder
-		from lxml import etree as ET
-
-		file = self._file
-		encoding = self._encoding
-		maker = builder.ElementMaker()
-
-		file.write(
-			"""<?xml version="1.0" encoding="UTF-8" ?>
-<stardict xmlns:xi="http://www.w3.org/2003/XInclude">
-""",
-		)
-
-		self.writeInfo(maker, pretty=True)
-
-		if not isdir(self._resDir):
-			os.mkdir(self._resDir)
-
-		pretty = True
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				self.writeDataEntry(maker, entry)
-				continue
-			entry.detectDefiFormat()
-			article = maker.article(
-				maker.key(entry.l_word[0]),
-			)
-			for alt in entry.l_word[1:]:
-				article.append(maker.synonym(alt))
-			article.append(
-				maker.definition(
-					ET.CDATA(entry.defi),
-					type=entry.defiFormat,
-				),
-			)
-			ET.indent(article, space="")
-			articleStr = cast(
-				"bytes",
-				ET.tostring(
-					article,
-					pretty_print=pretty,
-					encoding=encoding,
-				),
-			).decode(encoding)
-			# for some reason, "´k" becomes " ́k" (for example) # noqa: RUF003
-			# stardict-text2bin tool also does this.
-			# https://en.wiktionary.org/wiki/%CB%88#Translingual
-			self._file.write(articleStr + "\n")
-
-		file.write("</stardict>")
-
-		if not os.listdir(self._resDir):
-			os.rmdir(self._resDir)
diff --git a/pyglossary/plugins/stardict_textual/reader.py b/pyglossary/plugins/stardict_textual/reader.py
new file mode 100644
index 000000000..91fea26c8
--- /dev/null
+++ b/pyglossary/plugins/stardict_textual/reader.py
@@ -0,0 +1,212 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+	from pyglossary.lxml_types import Element
+	from pyglossary.xdxf.transform import XdxfTransformer
+
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import exc_note, log, pip
+from pyglossary.html_utils import unescape_unicode
+from pyglossary.io_utils import nullBinaryIO
+
+
+class Reader:
+	_encoding: str = "utf-8"
+	_xdxf_to_html: bool = True
+
+	compressions = stdCompressions
+	depends = {
+		"lxml": "lxml",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._file: io.IOBase = nullBinaryIO
+		self._fileSize = 0
+		self._xdxfTr: XdxfTransformer | None = None
+
+	def xdxf_setup(self) -> XdxfTransformer:
+		from pyglossary.xdxf.transform import XdxfTransformer
+
+		self._xdxfTr = tr = XdxfTransformer(encoding="utf-8")
+		return tr
+
+	def xdxf_transform(self, text: str) -> str:
+		tr = self._xdxfTr
+		if tr is None:
+			tr = self.xdxf_setup()
+		return tr.transformByInnerString(text)
+
+	def __len__(self) -> int:
+		return 0
+
+	def close(self) -> None:
+		self._file.close()
+		self._file = nullBinaryIO
+		self._filename = ""
+		self._fileSize = 0
+
+	def open(self, filename: str) -> None:
+		try:
+			from lxml import etree as ET
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install lxml` to install")
+			raise
+
+		self._filename = filename
+		cfile = compressionOpen(filename, mode="rb")
+
+		if cfile.seekable():
+			cfile.seek(0, 2)
+			self._fileSize = cfile.tell()
+			cfile.seek(0)
+			# self._glos.setInfo("input_file_size", f"{self._fileSize}")
+		else:
+			log.warning("StarDict Textual File Reader: file is not seekable")
+
+		context = ET.iterparse(  # type: ignore # noqa: PGH003
+			cfile,
+			events=("end",),
+			tag="info",
+		)
+		for _, elem in context:
+			self.setMetadata(elem)  # type: ignore
+			break
+
+		cfile.close()
+
+	def setGlosInfo(self, key: str, value: str) -> None:
+		if value is None:
+			return
+		self._glos.setInfo(key, unescape_unicode(value))
+
+	def setMetadata(self, header: Element) -> None:
+		if (elem := header.find("./bookname")) is not None and elem.text:
+			self.setGlosInfo("name", elem.text)
+
+		if (elem := header.find("./author")) is not None and elem.text:
+			self.setGlosInfo("author", elem.text)
+
+		if (elem := header.find("./email")) is not None and elem.text:
+			self.setGlosInfo("email", elem.text)
+
+		if (elem := header.find("./website")) is not None and elem.text:
+			self.setGlosInfo("website", elem.text)
+
+		if (elem := header.find("./description")) is not None and elem.text:
+			self.setGlosInfo("description", elem.text)
+
+		if (elem := header.find("./bookname")) is not None and elem.text:
+			self.setGlosInfo("name", elem.text)
+
+		if (elem := header.find("./bookname")) is not None and elem.text:
+			self.setGlosInfo("name", elem.text)
+
+		if (elem := header.find("./date")) is not None and elem.text:
+			self.setGlosInfo("creationTime", elem.text)
+
+		# if (elem := header.find("./dicttype")) is not None and elem.text:
+		# 	self.setGlosInfo("dicttype", elem.text)
+
+	def renderDefiList(
+		self,
+		defisWithFormat: list[tuple[str, str]],
+	) -> tuple[str, str]:
+		if not defisWithFormat:
+			return "", ""
+		if len(defisWithFormat) == 1:
+			return defisWithFormat[0]
+
+		defiFormatSet: set[str] = set()
+		defiFormatSet.update(_type for _, _type in defisWithFormat)
+
+		if len(defiFormatSet) == 1:
+			format_ = defiFormatSet.pop()
+			if format_ == "h":
+				return "\n<hr>".join([defi for defi, _ in defisWithFormat]), format_
+			return "\n".join([defi for defi, _ in defisWithFormat]), format_
+
+		# convert plaintext or xdxf to html
+		defis: list[str] = []
+		for defi_, format_ in defisWithFormat:
+			if format_ == "m":
+				defis.append("<pre>" + defi_.replace("\n", "<br/>") + "</pre>")
+			elif format_ == "x":
+				defis.append(self.xdxf_transform(defi_))
+			else:
+				defis.append(defi_)
+		return "\n<hr>\n".join(defis), "h"
+
+	def __iter__(self) -> Iterator[EntryType]:
+		from lxml import etree as ET
+
+		glos = self._glos
+		fileSize = self._fileSize
+		self._file = file = compressionOpen(self._filename, mode="rb")
+		context = ET.iterparse(  # type: ignore # noqa: PGH003
+			self._file,
+			events=("end",),
+			tag="article",
+		)
+		for _, _elem in context:
+			elem = cast("Element", _elem)
+			words: list[str] = []
+			defisWithFormat: list[tuple[str, str]] = []
+			for child in elem.iterchildren():
+				if not child.text:
+					continue
+				if child.tag in {"key", "synonym"}:
+					words.append(child.text)
+				elif child.tag == "definition":
+					type_ = child.attrib.get("type", "")
+					if type_:
+						new_type = {
+							"m": "m",
+							"t": "m",
+							"y": "m",
+							"g": "h",
+							"h": "h",
+							"x": "x",
+						}.get(type_, "")
+						if not new_type:
+							log.warning(f"unsupported definition type {type_}")
+						type_ = new_type
+					if not type_:
+						type_ = "m"
+					defi_ = child.text.strip()
+					if type_ == "x" and self._xdxf_to_html:
+						defi_ = self.xdxf_transform(defi_)
+						type_ = "h"
+					defisWithFormat.append((defi_, type_))
+				# TODO: child.tag == "definition-r"
+				else:
+					log.warning(f"unknown tag {child.tag}")
+
+			defi, defiFormat = self.renderDefiList(defisWithFormat)
+
+			yield glos.newEntry(
+				words,
+				defi,
+				defiFormat=defiFormat,
+				byteProgress=(file.tell(), fileSize),
+			)
+
+			# clean up preceding siblings to save memory
+			# this can reduce memory usage from >300 MB to ~25 MB
+			while elem.getprevious() is not None:
+				parent = elem.getparent()
+				if parent is None:
+					break
+				del parent[0]
diff --git a/pyglossary/plugins/stardict_textual/writer.py b/pyglossary/plugins/stardict_textual/writer.py
new file mode 100644
index 000000000..c7681d839
--- /dev/null
+++ b/pyglossary/plugins/stardict_textual/writer.py
@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import os
+from os.path import dirname, isdir, join
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from lxml import builder
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+
+
+class Writer:
+	_encoding: str = "utf-8"
+
+	compressions = stdCompressions
+	depends = {
+		"lxml": "lxml",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._resDir = ""
+
+	def open(
+		self,
+		filename: str,
+	) -> None:
+		self._filename = filename
+		self._resDir = join(dirname(self._filename), "res")
+		self._file = compressionOpen(
+			self._filename,
+			mode="w",
+			encoding=self._encoding,
+		)
+
+	def finish(self) -> None:
+		self._file.close()
+
+	def writeInfo(
+		self,
+		maker: builder.ElementMaker,
+		pretty: bool,
+	) -> None:
+		from lxml import etree as ET
+
+		glos = self._glos
+
+		desc = glos.getInfo("description")
+		copyright_ = glos.getInfo("copyright")
+		if copyright_:
+			desc = f"{copyright_}\n{desc}"
+		publisher = glos.getInfo("publisher")
+		if publisher:
+			desc = f"Publisher: {publisher}\n{desc}"
+
+		info = maker.info(
+			maker.version("3.0.0"),
+			maker.bookname(glos.getInfo("name")),
+			maker.author(glos.getInfo("author")),
+			maker.email(glos.getInfo("email")),
+			maker.website(glos.getInfo("website")),
+			maker.description(desc),
+			maker.date(glos.getInfo("creationTime")),
+			maker.dicttype(""),
+		)
+		file = self._file
+		file.write(
+			cast(
+				"bytes",
+				ET.tostring(
+					info,
+					encoding=self._encoding,
+					pretty_print=pretty,
+				),
+			).decode(self._encoding)
+			+ "\n",
+		)
+
+	def writeDataEntry(
+		self,
+		maker: builder.ElementMaker,  # noqa: ARG002
+		entry: EntryType,
+	) -> None:
+		entry.save(self._resDir)
+		# TODO: create article tag with "definition-r" in it?
+		# or just save the file to res/ directory? or both?
+		# article = maker.article(
+		# 	maker.key(entry.s_word),
+		# 	maker.definition_r(
+		# 		ET.CDATA(entry.defi),
+		# 		**{"type": ext})
+		# 	)
+		# )
+
+	def write(self) -> Generator[None, EntryType, None]:
+		from lxml import builder
+		from lxml import etree as ET
+
+		file = self._file
+		encoding = self._encoding
+		maker = builder.ElementMaker()
+
+		file.write(
+			"""<?xml version="1.0" encoding="UTF-8" ?>
+<stardict xmlns:xi="http://www.w3.org/2003/XInclude">
+""",
+		)
+
+		self.writeInfo(maker, pretty=True)
+
+		if not isdir(self._resDir):
+			os.mkdir(self._resDir)
+
+		pretty = True
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				self.writeDataEntry(maker, entry)
+				continue
+			entry.detectDefiFormat()
+			article = maker.article(
+				maker.key(entry.l_word[0]),
+			)
+			for alt in entry.l_word[1:]:
+				article.append(maker.synonym(alt))
+			article.append(
+				maker.definition(
+					ET.CDATA(entry.defi),
+					type=entry.defiFormat,
+				),
+			)
+			ET.indent(article, space="")
+			articleStr = cast(
+				"bytes",
+				ET.tostring(
+					article,
+					pretty_print=pretty,
+					encoding=encoding,
+				),
+			).decode(encoding)
+			# for some reason, "´k" becomes " ́k" (for example) # noqa: RUF003
+			# stardict-text2bin tool also does this.
+			# https://en.wiktionary.org/wiki/%CB%88#Translingual
+			self._file.write(articleStr + "\n")
+
+		file.write("</stardict>")
+
+		if not os.listdir(self._resDir):
+			os.rmdir(self._resDir)
diff --git a/pyglossary/plugins/tabfile/__init__.py b/pyglossary/plugins/tabfile/__init__.py
index f6324e1e5..a0939400b 100644
--- a/pyglossary/plugins/tabfile/__init__.py
+++ b/pyglossary/plugins/tabfile/__init__.py
@@ -2,28 +2,31 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-from pyglossary.compression import stdCompressions
-from pyglossary.core import log
 from pyglossary.option import (
 	BoolOption,
 	EncodingOption,
 	FileSizeOption,
 	Option,
 )
-from pyglossary.text_reader import TextGlossaryReader
-from pyglossary.text_utils import (
-	splitByBarUnescapeNTB,
-	unescapeNTB,
-)
-
-if TYPE_CHECKING:
-	from collections.abc import Generator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
 
-__all__ = ["Reader"]
+from .reader import Reader
+from .writer import Writer
+
+__all__ = [
+	"Reader",
+	"Writer",
+	"description",
+	"enable",
+	"extensionCreate",
+	"extensions",
+	"kind",
+	"lname",
+	"name",
+	"optionsProp",
+	"singleFile",
+	"website",
+	"wiki",
+]
 
 enable = True
 lname = "tabfile"
@@ -50,89 +53,3 @@
 		comment="Add headwords title to beginning of definition",
 	),
 }
-
-
-class Reader(TextGlossaryReader):
-	@classmethod
-	def isInfoWord(cls, word: str) -> bool:
-		return word.startswith("#")
-
-	@classmethod
-	def fixInfoWord(cls, word: str) -> str:
-		return word.lstrip("#")
-
-	def nextBlock(self) -> tuple[str | list[str], str, None] | None:
-		if not self._file:
-			raise StopIteration
-		line = self.readline()
-		if not line:
-			raise StopIteration
-		line = line.rstrip("\n")
-		if not line:
-			return None
-		###
-		word: str | list[str]
-		word, tab, defi = line.partition("\t")
-		if not tab:
-			log.warning(
-				f"Warning: line starting with {line[:10]!r} has no tab!",
-			)
-			return None
-		###
-		if self._glos.alts:
-			word = splitByBarUnescapeNTB(word)
-			if len(word) == 1:
-				word = word[0]
-		else:
-			word = unescapeNTB(word, bar=False)
-		###
-		defi = unescapeNTB(defi)
-		###
-		return word, defi, None
-
-
-class Writer:
-	_encoding: str = "utf-8"
-	_enable_info: bool = True
-	_resources: bool = True
-	_file_size_approx: int = 0
-	_word_title: bool = False
-
-	compressions = stdCompressions
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-
-	def open(
-		self,
-		filename: str,
-	) -> None:
-		self._filename = filename
-
-	def finish(self) -> None:
-		pass
-
-	def write(self) -> Generator[None, EntryType, None]:
-		from pyglossary.text_utils import escapeNTB, joinByBar
-		from pyglossary.text_writer import TextGlossaryWriter
-
-		writer = TextGlossaryWriter(
-			self._glos,
-			entryFmt="{word}\t{defi}\n",
-			writeInfo=self._enable_info,
-			outInfoKeysAliasDict=None,
-		)
-		writer.setAttrs(
-			encoding=self._encoding,
-			wordListEncodeFunc=joinByBar,
-			wordEscapeFunc=escapeNTB,
-			defiEscapeFunc=escapeNTB,
-			ext=".txt",
-			resources=self._resources,
-			word_title=self._word_title,
-			file_size_approx=self._file_size_approx,
-		)
-		writer.open(self._filename)
-		yield from writer.write()
-		writer.finish()
diff --git a/pyglossary/plugins/tabfile/reader.py b/pyglossary/plugins/tabfile/reader.py
new file mode 100644
index 000000000..c834b288c
--- /dev/null
+++ b/pyglossary/plugins/tabfile/reader.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from pyglossary.core import log
+from pyglossary.text_reader import TextGlossaryReader
+from pyglossary.text_utils import (
+	splitByBarUnescapeNTB,
+	unescapeNTB,
+)
+
+
+class Reader(TextGlossaryReader):
+	@classmethod
+	def isInfoWord(cls, word: str) -> bool:
+		return word.startswith("#")
+
+	@classmethod
+	def fixInfoWord(cls, word: str) -> str:
+		return word.lstrip("#")
+
+	def nextBlock(self) -> tuple[str | list[str], str, None] | None:
+		if not self._file:
+			raise StopIteration
+		line = self.readline()
+		if not line:
+			raise StopIteration
+		line = line.rstrip("\n")
+		if not line:
+			return None
+		###
+		word: str | list[str]
+		word, tab, defi = line.partition("\t")
+		if not tab:
+			log.warning(
+				f"Warning: line starting with {line[:10]!r} has no tab!",
+			)
+			return None
+		###
+		if self._glos.alts:
+			word = splitByBarUnescapeNTB(word)
+			if len(word) == 1:
+				word = word[0]
+		else:
+			word = unescapeNTB(word, bar=False)
+		###
+		defi = unescapeNTB(defi)
+		###
+		return word, defi, None
diff --git a/pyglossary/plugins/tabfile/writer.py b/pyglossary/plugins/tabfile/writer.py
new file mode 100644
index 000000000..cbdf42fe8
--- /dev/null
+++ b/pyglossary/plugins/tabfile/writer.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pyglossary.compression import stdCompressions
+
+if TYPE_CHECKING:
+	from collections.abc import Generator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Writer:
+	_encoding: str = "utf-8"
+	_enable_info: bool = True
+	_resources: bool = True
+	_file_size_approx: int = 0
+	_word_title: bool = False
+
+	compressions = stdCompressions
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+
+	def open(
+		self,
+		filename: str,
+	) -> None:
+		self._filename = filename
+
+	def finish(self) -> None:
+		pass
+
+	def write(self) -> Generator[None, EntryType, None]:
+		from pyglossary.text_utils import escapeNTB, joinByBar
+		from pyglossary.text_writer import TextGlossaryWriter
+
+		writer = TextGlossaryWriter(
+			self._glos,
+			entryFmt="{word}\t{defi}\n",
+			writeInfo=self._enable_info,
+			outInfoKeysAliasDict=None,
+		)
+		writer.setAttrs(
+			encoding=self._encoding,
+			wordListEncodeFunc=joinByBar,
+			wordEscapeFunc=escapeNTB,
+			defiEscapeFunc=escapeNTB,
+			ext=".txt",
+			resources=self._resources,
+			word_title=self._word_title,
+			file_size_approx=self._file_size_approx,
+		)
+		writer.open(self._filename)
+		yield from writer.write()
+		writer.finish()
diff --git a/pyglossary/plugins/testformat/__init__.py b/pyglossary/plugins/testformat/__init__.py
index be780e5fc..a38dbd2e8 100644
--- a/pyglossary/plugins/testformat/__init__.py
+++ b/pyglossary/plugins/testformat/__init__.py
@@ -1,13 +1,11 @@
-
-
-from __future__ import annotations
-
 # -*- coding: utf-8 -*-
-from collections.abc import Generator, Iterator
+from __future__ import annotations
 
-from pyglossary.glossary_types import EntryType, GlossaryType
 from pyglossary.option import Option
 
+from .reader import Reader
+from .writer import Writer
+
 __all__ = [
 	"Reader",
 	"Writer",
@@ -37,87 +35,3 @@
 
 # key is option/argument name, value is instance of Option
 optionsProp: dict[str, Option] = {}
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._wordCount = 0
-
-	def __len__(self) -> int:
-		# return the number of entries if you have it
-		# if you don't, return 0 and progressbar will be disabled
-		# self._wordCount can be set in self.open function
-		# but if you want to set it, you should set it before
-		# iteration begins and __iter__ method is called
-		return self._wordCount
-
-	def open(self, filename: str) -> None:
-		# open the file, read headers / info and set info to self._glos
-		# and set self._wordCount if you can
-		# read-options should be keyword arguments in this method
-		self._wordCount = 100
-		# log.info(f"some useful message")
-		# here read info from file and set to Glossary object
-		self._glos.setInfo("name", "Test")
-		desc = "Test glossary created by a PyGlossary plugin"
-		self._glos.setInfo("description", desc)
-		self._glos.setInfo("author", "Me")
-		self._glos.setInfo("copyright", "GPL")
-
-	def close(self) -> None:
-		# this is called after reading/conversion is finished
-		# if you have an open file object, close it here
-		# if you need to clean up temp files, do it here
-		pass
-
-	def __iter__(self) -> Iterator[EntryType]:
-		# the easiest and simplest way to implement an Iterator is
-		# by writing a generator, by calling: yield glos.newEntry(word, defi)
-		# inside a loop (typically iterating over a file object for text file)
-		# another way (which is harder) is by implementing __next__ method
-		# and returning self in __iter__
-		# that forces you to keep the state manually because __next__ is called
-		# repeatedly, but __iter__ is only called once
-		glos = self._glos
-		for i in range(self._wordCount):
-			# here get word and definition from file(depending on your format)
-			word = f"word_{i}"
-			defi = f"definition {i}"
-			yield glos.newEntry(word, defi)
-
-
-class Writer:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-
-	def write(self) -> Generator[None, EntryType, None]:
-		glos = self._glos
-		filename = self._filename  # noqa
-		# log.info(f"some useful message")
-		while True:
-			entry = yield
-			if entry is None:
-				break
-			if entry.isData():
-				# can save it with entry.save(directory)
-				continue
-			word = entry.s_word  # noqa
-			defi = entry.defi  # noqa
-			# here write word and defi to the output file (depending on
-			# your format)
-		# here read info from Glossaey object
-		name = glos.getInfo("name")  # noqa
-		desc = glos.getInfo("description")  # noqa
-		author = glos.author  # noqa
-		copyright = glos.getInfo("copyright")  # noqa
-		# if an info key doesn't exist, getInfo returns empty string
-		# now write info to the output file (depending on your output format)
-
-	def finish(self) -> None:
-		self._filename = ""
diff --git a/pyglossary/plugins/testformat/reader.py b/pyglossary/plugins/testformat/reader.py
new file mode 100644
index 000000000..7f59bcff3
--- /dev/null
+++ b/pyglossary/plugins/testformat/reader.py
@@ -0,0 +1,57 @@
+
+
+from __future__ import annotations
+
+# -*- coding: utf-8 -*-
+from collections.abc import Iterator
+
+from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._wordCount = 0
+
+	def __len__(self) -> int:
+		# return the number of entries if you have it
+		# if you don't, return 0 and progressbar will be disabled
+		# self._wordCount can be set in self.open function
+		# but if you want to set it, you should set it before
+		# iteration begins and __iter__ method is called
+		return self._wordCount
+
+	def open(self, filename: str) -> None:
+		# open the file, read headers / info and set info to self._glos
+		# and set self._wordCount if you can
+		# read-options should be keyword arguments in this method
+		self._wordCount = 100
+		# log.info(f"some useful message")
+		# here read info from file and set to Glossary object
+		self._glos.setInfo("name", "Test")
+		desc = "Test glossary created by a PyGlossary plugin"
+		self._glos.setInfo("description", desc)
+		self._glos.setInfo("author", "Me")
+		self._glos.setInfo("copyright", "GPL")
+
+	def close(self) -> None:
+		# this is called after reading/conversion is finished
+		# if you have an open file object, close it here
+		# if you need to clean up temp files, do it here
+		pass
+
+	def __iter__(self) -> Iterator[EntryType]:
+		# the easiest and simplest way to implement an Iterator is
+		# by writing a generator, by calling: yield glos.newEntry(word, defi)
+		# inside a loop (typically iterating over a file object for text file)
+		# another way (which is harder) is by implementing __next__ method
+		# and returning self in __iter__
+		# that forces you to keep the state manually because __next__ is called
+		# repeatedly, but __iter__ is only called once
+		glos = self._glos
+		for i in range(self._wordCount):
+			# here get word and definition from file(depending on your format)
+			word = f"word_{i}"
+			defi = f"definition {i}"
+			yield glos.newEntry(word, defi)
diff --git a/pyglossary/plugins/testformat/writer.py b/pyglossary/plugins/testformat/writer.py
new file mode 100644
index 000000000..48f18b227
--- /dev/null
+++ b/pyglossary/plugins/testformat/writer.py
@@ -0,0 +1,43 @@
+
+
+from __future__ import annotations
+
+# -*- coding: utf-8 -*-
+from collections.abc import Generator
+
+from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Writer:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+
+	def write(self) -> Generator[None, EntryType, None]:
+		glos = self._glos
+		filename = self._filename  # noqa
+		# log.info(f"some useful message")
+		while True:
+			entry = yield
+			if entry is None:
+				break
+			if entry.isData():
+				# can save it with entry.save(directory)
+				continue
+			word = entry.s_word  # noqa
+			defi = entry.defi  # noqa
+			# here write word and defi to the output file (depending on
+			# your format)
+		# here read info from Glossaey object
+		name = glos.getInfo("name")  # noqa
+		desc = glos.getInfo("description")  # noqa
+		author = glos.author  # noqa
+		copyright = glos.getInfo("copyright")  # noqa
+		# if an info key doesn't exist, getInfo returns empty string
+		# now write info to the output file (depending on your output format)
+
+	def finish(self) -> None:
+		self._filename = ""
diff --git a/pyglossary/plugins/wiktextract/__init__.py b/pyglossary/plugins/wiktextract/__init__.py
index 9fa987bb6..8c3ab13d1 100644
--- a/pyglossary/plugins/wiktextract/__init__.py
+++ b/pyglossary/plugins/wiktextract/__init__.py
@@ -1,26 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import collections
-from collections import Counter
-from io import BytesIO, IOBase
-from json import loads as json_loads
-from typing import TYPE_CHECKING, cast
-
-if TYPE_CHECKING:
-	from collections.abc import Callable, Iterator
-	from typing import Any
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
-	from pyglossary.lxml_types import Element, T_htmlfile
-
-
-from pyglossary.compression import (
-	compressionOpen,
-	stdCompressions,
-)
-from pyglossary.core import exc_note, log, pip
-from pyglossary.io_utils import nullBinaryIO
 from pyglossary.option import (
 	BoolOption,
 	ListOption,
@@ -28,6 +8,8 @@
 	StrOption,
 )
 
+from .reader import Reader
+
 __all__ = [
 	"Reader",
 	"description",
@@ -82,636 +64,3 @@
 		comment="Enable categories",
 	),
 }
-
-
-class Reader:
-	compressions = stdCompressions
-	depends = {
-		"lxml": "lxml",
-	}
-
-	_word_title: bool = False
-	_pron_color: str = "gray"
-	_gram_color: str = "green"
-
-	# 'top right' or 'top right bottom left'
-	_example_padding: str = "10px 20px"
-
-	_audio: bool = True
-
-	_audio_formats: list[str] = ["ogg", "mp3"]
-
-	_categories: bool = False
-
-	topicStyle = (
-		"color:white;"
-		"background:green;"
-		"padding-left:3px;"
-		"padding-right:3px;"
-		"border-radius:0.5ex;"
-		# 0.5ex ~= 0.3em, but "ex" is recommended
-	)
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._file: IOBase = nullBinaryIO
-		self._fileSize = 0
-		self._wordCount = 0
-
-	def open(
-		self,
-		filename: str,
-	) -> None:
-		try:
-			pass
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install lxml` to install")
-			raise
-
-		self._filename = filename
-		cfile = compressionOpen(filename, mode="rt", encoding="utf-8")
-
-		if cfile.seekable():
-			cfile.seek(0, 2)
-			self._fileSize = cfile.tell()
-			cfile.seek(0)
-			self._glos.setInfo("input_file_size", str(self._fileSize))
-		else:
-			self.warning("Wiktextract Reader: file is not seekable")
-
-		self._glos.setDefaultDefiFormat("h")
-
-		if self._word_title:
-			self._glos.setInfo("definition_has_headwords", "True")
-
-		self._file = cfile
-		self._warnings: Counter[str] = collections.Counter()
-
-	def close(self) -> None:
-		self._file.close()
-		self._file = nullBinaryIO
-		self._filename = ""
-		self._fileSize = 0
-
-	def __len__(self) -> int:
-		return 0
-
-	def __iter__(self) -> Iterator[EntryType]:
-		while line := self._file.readline():
-			line = line.strip()
-			if not line:
-				continue
-			yield self.makeEntry(json_loads(line))
-		for _msg, count in self._warnings.most_common():
-			msg = _msg
-			if count > 1:
-				msg = f"[{count} times] {msg}"
-			log.warning(msg)
-
-	def warning(self, msg: str) -> None:
-		self._warnings[msg] += 1
-
-	def makeEntry(self, data: dict[str, Any]) -> EntryType:  # noqa: PLR0912
-		from lxml import etree as ET
-
-		glos = self._glos
-		f = BytesIO()
-
-		def br() -> Element:
-			return ET.Element("br")
-
-		keywords: list[str] = []
-		inflectedKeywords: list[str] = []
-
-		word = data.get("word")
-		if word:
-			keywords.append(word)
-
-		for formDict in data.get("forms", []):
-			form: str = formDict.get("form", "")
-			if not form:
-				continue
-			if len(form) > 80:
-				self.warning(f"'form' too long: {form}")
-				continue
-			source: str = formDict.get("source", "")
-			# tags = formDict.get("tags", [])
-			if source == "Inflection":
-				inflectedKeywords.append(form)
-			else:
-				keywords.append(form)
-
-		keywords += inflectedKeywords
-
-		with ET.htmlfile(f, encoding="utf-8") as hf:
-			with hf.element("div"):
-				if self._word_title:
-					for keyword in keywords:
-						with hf.element(glos.titleTag(keyword)):
-							hf.write(keyword)
-						hf.write(br())
-
-				hf_ = cast("T_htmlfile", hf)
-
-				self.writeSoundList(hf_, data.get("sounds"))
-
-				pos: str | None = data.get("pos")
-				if pos:
-					with hf.element("div", attrib={"class": "pos"}):
-						with hf.element("font", color=self._gram_color):
-							hf.write(pos)
-
-				senses = data.get("senses") or []
-
-				self.writeSenseList(hf_, senses)  # type: ignore
-
-				self.writeSynonyms(hf_, data.get("synonyms"))  # type: ignore
-
-				self.writeAntonyms(hf_, data.get("antonyms"))  # type: ignore
-
-				# TODO: data.get("translations")
-				# list[dict[str, str]]
-				# dict keys: code, "lang", "sense", "word"
-
-				etymology: str = data.get("etymology_text", "")
-				if etymology:
-					hf.write(br())
-					with hf.element("div"):
-						hf.write(f"Etymology: {etymology}")
-
-				if self._categories:
-					categories = []
-					for sense in senses:
-						senseCats = sense.get("categories")
-						if senseCats:
-							categories += senseCats
-					self.writeSenseCategories(hf_, categories)
-
-		defi = f.getvalue().decode("utf-8")
-		# defi = defi.replace("\xa0", "&nbsp;")  # do we need to do this?
-		file = self._file
-		return self._glos.newEntry(
-			keywords,
-			defi,
-			defiFormat="h",
-			byteProgress=(file.tell(), self._fileSize),
-		)
-
-	def writeSoundPron(
-		self,
-		hf: T_htmlfile,
-		sound: dict[str, Any],
-	) -> None:
-		# "homophone" key found in Dutch and Arabic dictionaries
-		# (similar-sounding words for Arabic)
-		for key in ("ipa", "other", "rhymes", "homophone"):
-			value = sound.get(key)
-			if not value:
-				continue
-			with hf.element("font", color=self._pron_color):
-				hf.write(str(value))
-			hf.write(f" ({key})")
-
-	def writeSoundAudio(
-		self,
-		hf: T_htmlfile,
-		sound: dict[str, Any],
-	) -> None:
-		# TODO: add a read-option for audio
-		# keys for audio:
-		# "audio" (file name), "text" (link text), "ogg_url", "mp3_url"
-		# possible "tags" (list[str])
-
-		text = sound.get("text")
-		if text:
-			hf.write(f"{text}: ")
-		with hf.element("audio", attrib={"controls": ""}):
-			for _format in self._audio_formats:
-				url = sound.get(f"{_format}_url")
-				if not url:
-					continue
-				with hf.element(
-					"source",
-					attrib={
-						"src": url,
-						"type": f"audio/{_format}",
-					},
-				):
-					pass
-
-	def writeSoundList(
-		self,
-		hf: T_htmlfile,
-		soundList: list[dict[str, Any]] | None,
-	) -> None:
-		if not soundList:
-			return
-
-		pronList: list[dict[str, Any]] = []
-		audioList: list[dict[str, Any]] = []
-
-		for sound in soundList:
-			if "audio" in sound:
-				if self._audio:
-					audioList.append(sound)
-				continue
-			pronList.append(sound)
-			# can it contain both audio and pronunciation?
-
-		if pronList:
-			with hf.element("div", attrib={"class": "pronunciations"}):
-				for i, sound in enumerate(pronList):
-					if i > 0:
-						hf.write(", ")
-					self.writeSoundPron(hf, sound)
-
-		for sound in audioList:
-			with hf.element("div", attrib={"class": "audio"}):
-				self.writeSoundAudio(hf, sound)
-
-	def writeSenseList(
-		self,
-		hf: T_htmlfile,
-		senseList: list[dict[str, Any]],
-	) -> None:
-		if not senseList:
-			return
-
-		self.makeList(
-			hf,
-			senseList,
-			self.writeSense,
-		)
-
-	def writeSenseGloss(  # noqa: PLR6301
-		self,
-		hf: T_htmlfile,
-		text: str | None,
-	) -> None:
-		hf.write(text or "")
-
-	def writeSenseCategory(  # noqa: PLR6301
-		self,
-		hf: T_htmlfile,
-		category: dict[str, Any],
-	) -> None:
-		# keys: name: str, kind: str, parents: list, source: str
-		# values for "source" (that I found): "w", "w+disamb"
-		name = category.get("name")
-		if not name:
-			self.warning(f"{category = }")
-			return
-		desc = name
-		source = category.get("source")
-		if source:
-			desc = f"{desc} (source: {source})"
-		hf.write(desc)
-
-	def writeSenseCategories(
-		self,
-		hf: T_htmlfile,
-		categories: list[dict[str, Any]] | None,
-	) -> None:
-		if not categories:
-			return
-		# long names, mostly about grammar?
-		with hf.element("div", attrib={"class": "categories"}):
-			hf.write("Categories: ")
-			self.makeList(hf, categories, self.writeSenseCategory)
-
-	def writeSenseExample(  # noqa: PLR6301, PLR0912
-		self,
-		hf: T_htmlfile,
-		example: dict[str, str | list],
-	) -> None:
-		# example keys: text, "english", "ref", "type"
-		textList: list[tuple[str | None, str]] = []
-		text_: str | list = example.pop("example", "")
-		if text_:
-			assert isinstance(text_, str)
-			textList.append((None, text_))
-
-		example.pop("ref", "")
-		example.pop("type", "")
-
-		for key, value in example.items():
-			if not value:
-				continue
-			prefix: str | None = key
-			if prefix in ("text",):  # noqa: PLR6201, FURB171
-				prefix = None
-			if isinstance(value, str):
-				textList.append((prefix, value))
-			elif isinstance(value, list):
-				for item in value:
-					if isinstance(item, str):
-						textList.append((prefix, item))
-					elif isinstance(item, list):
-						textList += [(prefix, item2) for item2 in item]
-			else:
-				log.error(f"writeSenseExample: invalid type for {value=}")
-
-		if not textList:
-			return
-
-		def writePair(prefix: str | None, text: str) -> None:
-			if prefix:
-				with hf.element("b"):
-					hf.write(prefix)
-				hf.write(": ")
-			hf.write(text)
-
-		if len(textList) == 1:
-			prefix, text = textList[0]
-			writePair(prefix, text)
-			return
-
-		with hf.element("ul"):
-			for prefix, text in textList:
-				with hf.element("li"):
-					writePair(prefix, text)
-
-	def writeSenseExamples(
-		self,
-		hf: T_htmlfile,
-		examples: list[dict[str, str | list]] | None,
-	) -> None:
-		from lxml import etree as ET
-
-		if not examples:
-			return
-		hf.write(ET.Element("br"))
-		with hf.element("div", attrib={"class": "examples"}):
-			hf.write("Examples:")
-			hf.write(ET.Element("br"))
-			for example in examples:
-				with hf.element(
-					"div",
-					attrib={
-						"class": "example",
-						"style": f"padding: {self._example_padding};",
-					},
-				):
-					self.writeSenseExample(hf, example)
-
-	def writeSenseFormOf(  # noqa: PLR6301
-		self,
-		hf: T_htmlfile,
-		form_of: dict[str, str],
-	) -> None:
-		from lxml import etree as ET
-
-		# {"word": ..., "extra": ...}
-		word = form_of.get("word")
-		if not word:
-			return
-		hf.write(word)
-		extra = form_of.get("extra")
-		if extra:
-			hf.write(ET.Element("br"))
-			hf.write(extra)
-
-	def writeSenseFormOfList(
-		self,
-		hf: T_htmlfile,
-		form_of_list: list[dict[str, str]] | None,
-	) -> None:
-		if not form_of_list:
-			return
-		with hf.element("div", attrib={"class": "form_of"}):
-			hf.write("Form of: ")
-			self.makeList(hf, form_of_list, self.writeSenseFormOf)
-
-	def writeTags(
-		self,
-		hf: T_htmlfile,
-		tags: list[str] | None,
-		toRemove: list[str] | None,
-	) -> None:
-		if not tags:
-			return
-
-		if toRemove:
-			for tag in toRemove:
-				if tag in tags:
-					tags.remove(tag)
-		if not tags:
-			return
-
-		with hf.element("div", attrib={"class": "tags"}):
-			for i, tag in enumerate(tags):
-				if i > 0:
-					hf.write(", ")
-				with hf.element("font", color=self._gram_color):
-					hf.write(tag)
-
-	def writeTopics(
-		self,
-		hf: T_htmlfile,
-		topics: list[str] | None,
-	) -> None:
-		if not topics:
-			return
-
-		with hf.element("div", attrib={"class": "tags"}):
-			for i, topic in enumerate(topics):
-				if i > 0:
-					hf.write(" ")
-				with hf.element("span", style=self.topicStyle):
-					hf.write(topic)
-
-	def addWordLink(  # noqa: PLR6301
-		self,
-		hf: T_htmlfile,
-		word: str,
-		wordClass: str = "",
-	) -> None:
-		i = word.find(" [")
-		if i >= 0:
-			word = word[:i]
-		if not word:
-			return
-		attrib = {"href": f"bword://{word}"}
-		if wordClass:
-			attrib["class"] = wordClass
-		with hf.element(
-			"a",
-			attrib=attrib,
-		):
-			hf.write(word)
-
-	def writeSynonyms(
-		self,
-		hf: T_htmlfile,
-		synonyms: list[dict[str, Any]] | None,
-	) -> None:
-		if not synonyms:
-			return
-
-		#   "word": "str",
-		#   "sense": "str",
-		#   "_dis1": "str",
-		#   "tags": list[str]
-		#   "extra": "str",
-		#   "english": "str"
-
-		with hf.element("div"):
-			hf.write("Synonyms: ")
-			for i, item in enumerate(synonyms):
-				if i > 0:
-					hf.write(", ")
-				word = item.get("word")
-				if not word:
-					continue
-				self.addWordLink(hf, word)
-
-	def writeAntonyms(
-		self,
-		hf: T_htmlfile,
-		antonyms: list[dict[str, str]] | None,
-	) -> None:
-		if not antonyms:
-			return
-		# dict keys: word
-		with hf.element("div"):
-			hf.write("Antonyms: ")
-			for i, item in enumerate(antonyms):
-				if i > 0:
-					hf.write(", ")
-				word = item.get("word")
-				if not word:
-					continue
-				self.addWordLink(hf, word, wordClass="antonym")
-
-	def writeRelated(
-		self,
-		hf: T_htmlfile,
-		relatedList: list[dict[str, str]] | None,
-	) -> None:
-		if not relatedList:
-			return
-		# dict keys: sense, "word", "english"
-		with hf.element("div"):
-			hf.write("Related: ")
-			for i, item in enumerate(relatedList):
-				if i > 0:
-					hf.write(", ")
-				word = item.get("word")
-				if not word:
-					continue
-				self.addWordLink(hf, word)
-
-	def writeSenseLinks(
-		self,
-		hf: T_htmlfile,
-		linkList: list[list[str]] | None,
-	) -> None:
-		if not linkList:
-			return
-		with hf.element("div"):
-			hf.write("Links: ")
-			for i, link in enumerate(linkList):
-				if len(link) != 2:
-					self.warning(f"unexpected {link =}")
-					continue
-				text, ref = link
-				sq = ref.find("#")
-				if sq == 0:
-					ref = text
-				elif sq > 0:
-					ref = ref[:sq]
-				if i > 0:
-					hf.write(", ")
-				self.addWordLink(hf, ref)
-
-	def writeSense(
-		self,
-		hf: T_htmlfile,
-		sense: dict[str, Any],
-	) -> None:
-		from lxml import etree as ET
-
-		# tags seem to be mostly about grammar, so with format it like grammar
-		self.writeTags(
-			hf,
-			sense.get("tags"),
-			toRemove=["form-of"],
-		)
-
-		# for key in ("english",):
-		# 	text: "str | None" = sense.get("english")
-		# 	if not text:
-		# 		continue
-		# 	keyCap = key.capitalize()
-		# 	with hf.element("div"):
-		# 		with hf.element("b"):
-		# 			hf.write(keyCap)
-		# 		hf.write(f": {text}")
-
-		# sense["glosses"] and sense["english"] seems to be unreliable
-		# for example:
-		#   "raw_glosses": ["(short) story, fable, play"],
-		#   "english": "short",
-		#   "glosses": ["story, fable, play"],
-
-		glosses: list[str] | None = sense.get("raw_glosses")
-		if not glosses:
-			glosses = sense.get("glosses")
-		if glosses:
-			self.makeList(hf, glosses, self.writeSenseGloss)
-
-		self.writeTopics(hf, sense.get("topics"))
-
-		self.writeSenseFormOfList(hf, sense.get("form_of"))
-
-		self.writeSynonyms(hf, sense.get("synonyms"))
-
-		self.writeAntonyms(hf, sense.get("antonyms"))
-
-		self.writeRelated(hf, sense.get("related"))
-
-		self.writeSenseLinks(hf, sense.get("links"))
-
-		self.writeSenseExamples(hf, sense.get("examples"))
-
-		# alt_of[i]["word"] seem to point to a word that is
-		# mentioned in sense["raw_glosses"]
-		# so we could try to find that word and turn it into a link
-		# sense.get("alt_of"): list[dict[str, str]] | None
-
-		# sense.get("wikipedia", []): list[str]
-		# sense.get("wikidata", []): list[str]
-		# sense.get("id", ""): str  # not useful
-		# sense.get("senseid", []): list[str]  # not useful
-
-		hf.write(ET.Element("br"))
-
-	@staticmethod
-	def makeList(  # noqa: PLR0913
-		hf: T_htmlfile,
-		input_objects: list[Any],
-		processor: Callable,
-		ordered: bool = True,
-		skip_single: bool = True,
-		# single_prefix: str = "",
-		# list_type: str = "",
-	) -> None:
-		"""Wrap elements into <ol> if more than one element."""
-		if not input_objects:
-			return
-
-		if skip_single and len(input_objects) == 1:
-			# if single_prefix:
-			# 	hf.write(single_prefix)
-			processor(hf, input_objects[0])
-			return
-
-		attrib: dict[str, str] = {}
-		# if list_type:
-		# 	attrib["type"] = list_type
-
-		with hf.element("ol" if ordered else "ul", attrib=attrib):
-			for el in input_objects:
-				with hf.element("li"):
-					processor(hf, el)
diff --git a/pyglossary/plugins/wiktextract/reader.py b/pyglossary/plugins/wiktextract/reader.py
new file mode 100644
index 000000000..a029edd19
--- /dev/null
+++ b/pyglossary/plugins/wiktextract/reader.py
@@ -0,0 +1,656 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import collections
+from collections import Counter
+from io import BytesIO, IOBase
+from json import loads as json_loads
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	from collections.abc import Callable, Iterator
+	from typing import Any
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+	from pyglossary.lxml_types import Element, T_htmlfile
+
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import exc_note, log, pip
+from pyglossary.io_utils import nullBinaryIO
+
+
+class Reader:
+	compressions = stdCompressions
+	depends = {
+		"lxml": "lxml",
+	}
+
+	_word_title: bool = False
+	_pron_color: str = "gray"
+	_gram_color: str = "green"
+
+	# 'top right' or 'top right bottom left'
+	_example_padding: str = "10px 20px"
+
+	_audio: bool = True
+
+	_audio_formats: list[str] = ["ogg", "mp3"]
+
+	_categories: bool = False
+
+	topicStyle = (
+		"color:white;"
+		"background:green;"
+		"padding-left:3px;"
+		"padding-right:3px;"
+		"border-radius:0.5ex;"
+		# 0.5ex ~= 0.3em, but "ex" is recommended
+	)
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._file: IOBase = nullBinaryIO
+		self._fileSize = 0
+		self._wordCount = 0
+
+	def open(
+		self,
+		filename: str,
+	) -> None:
+		try:
+			pass
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install lxml` to install")
+			raise
+
+		self._filename = filename
+		cfile = compressionOpen(filename, mode="rt", encoding="utf-8")
+
+		if cfile.seekable():
+			cfile.seek(0, 2)
+			self._fileSize = cfile.tell()
+			cfile.seek(0)
+			self._glos.setInfo("input_file_size", str(self._fileSize))
+		else:
+			self.warning("Wiktextract Reader: file is not seekable")
+
+		self._glos.setDefaultDefiFormat("h")
+
+		if self._word_title:
+			self._glos.setInfo("definition_has_headwords", "True")
+
+		self._file = cfile
+		self._warnings: Counter[str] = collections.Counter()
+
+	def close(self) -> None:
+		self._file.close()
+		self._file = nullBinaryIO
+		self._filename = ""
+		self._fileSize = 0
+
+	def __len__(self) -> int:
+		return 0
+
+	def __iter__(self) -> Iterator[EntryType]:
+		while line := self._file.readline():
+			line = line.strip()
+			if not line:
+				continue
+			yield self.makeEntry(json_loads(line))
+		for _msg, count in self._warnings.most_common():
+			msg = _msg
+			if count > 1:
+				msg = f"[{count} times] {msg}"
+			log.warning(msg)
+
+	def warning(self, msg: str) -> None:
+		self._warnings[msg] += 1
+
+	def makeEntry(self, data: dict[str, Any]) -> EntryType:  # noqa: PLR0912
+		from lxml import etree as ET
+
+		glos = self._glos
+		f = BytesIO()
+
+		def br() -> Element:
+			return ET.Element("br")
+
+		keywords: list[str] = []
+		inflectedKeywords: list[str] = []
+
+		word = data.get("word")
+		if word:
+			keywords.append(word)
+
+		for formDict in data.get("forms", []):
+			form: str = formDict.get("form", "")
+			if not form:
+				continue
+			if len(form) > 80:
+				self.warning(f"'form' too long: {form}")
+				continue
+			source: str = formDict.get("source", "")
+			# tags = formDict.get("tags", [])
+			if source == "Inflection":
+				inflectedKeywords.append(form)
+			else:
+				keywords.append(form)
+
+		keywords += inflectedKeywords
+
+		with ET.htmlfile(f, encoding="utf-8") as hf:
+			with hf.element("div"):
+				if self._word_title:
+					for keyword in keywords:
+						with hf.element(glos.titleTag(keyword)):
+							hf.write(keyword)
+						hf.write(br())
+
+				hf_ = cast("T_htmlfile", hf)
+
+				self.writeSoundList(hf_, data.get("sounds"))
+
+				pos: str | None = data.get("pos")
+				if pos:
+					with hf.element("div", attrib={"class": "pos"}):
+						with hf.element("font", color=self._gram_color):
+							hf.write(pos)
+
+				senses = data.get("senses") or []
+
+				self.writeSenseList(hf_, senses)  # type: ignore
+
+				self.writeSynonyms(hf_, data.get("synonyms"))  # type: ignore
+
+				self.writeAntonyms(hf_, data.get("antonyms"))  # type: ignore
+
+				# TODO: data.get("translations")
+				# list[dict[str, str]]
+				# dict keys: code, "lang", "sense", "word"
+
+				etymology: str = data.get("etymology_text", "")
+				if etymology:
+					hf.write(br())
+					with hf.element("div"):
+						hf.write(f"Etymology: {etymology}")
+
+				if self._categories:
+					categories = []
+					for sense in senses:
+						senseCats = sense.get("categories")
+						if senseCats:
+							categories += senseCats
+					self.writeSenseCategories(hf_, categories)
+
+		defi = f.getvalue().decode("utf-8")
+		# defi = defi.replace("\xa0", "&nbsp;")  # do we need to do this?
+		file = self._file
+		return self._glos.newEntry(
+			keywords,
+			defi,
+			defiFormat="h",
+			byteProgress=(file.tell(), self._fileSize),
+		)
+
+	def writeSoundPron(
+		self,
+		hf: T_htmlfile,
+		sound: dict[str, Any],
+	) -> None:
+		# "homophone" key found in Dutch and Arabic dictionaries
+		# (similar-sounding words for Arabic)
+		for key in ("ipa", "other", "rhymes", "homophone"):
+			value = sound.get(key)
+			if not value:
+				continue
+			with hf.element("font", color=self._pron_color):
+				hf.write(str(value))
+			hf.write(f" ({key})")
+
+	def writeSoundAudio(
+		self,
+		hf: T_htmlfile,
+		sound: dict[str, Any],
+	) -> None:
+		# TODO: add a read-option for audio
+		# keys for audio:
+		# "audio" (file name), "text" (link text), "ogg_url", "mp3_url"
+		# possible "tags" (list[str])
+
+		text = sound.get("text")
+		if text:
+			hf.write(f"{text}: ")
+		with hf.element("audio", attrib={"controls": ""}):
+			for _format in self._audio_formats:
+				url = sound.get(f"{_format}_url")
+				if not url:
+					continue
+				with hf.element(
+					"source",
+					attrib={
+						"src": url,
+						"type": f"audio/{_format}",
+					},
+				):
+					pass
+
+	def writeSoundList(
+		self,
+		hf: T_htmlfile,
+		soundList: list[dict[str, Any]] | None,
+	) -> None:
+		if not soundList:
+			return
+
+		pronList: list[dict[str, Any]] = []
+		audioList: list[dict[str, Any]] = []
+
+		for sound in soundList:
+			if "audio" in sound:
+				if self._audio:
+					audioList.append(sound)
+				continue
+			pronList.append(sound)
+			# can it contain both audio and pronunciation?
+
+		if pronList:
+			with hf.element("div", attrib={"class": "pronunciations"}):
+				for i, sound in enumerate(pronList):
+					if i > 0:
+						hf.write(", ")
+					self.writeSoundPron(hf, sound)
+
+		for sound in audioList:
+			with hf.element("div", attrib={"class": "audio"}):
+				self.writeSoundAudio(hf, sound)
+
+	def writeSenseList(
+		self,
+		hf: T_htmlfile,
+		senseList: list[dict[str, Any]],
+	) -> None:
+		if not senseList:
+			return
+
+		self.makeList(
+			hf,
+			senseList,
+			self.writeSense,
+		)
+
+	def writeSenseGloss(  # noqa: PLR6301
+		self,
+		hf: T_htmlfile,
+		text: str | None,
+	) -> None:
+		hf.write(text or "")
+
+	def writeSenseCategory(  # noqa: PLR6301
+		self,
+		hf: T_htmlfile,
+		category: dict[str, Any],
+	) -> None:
+		# keys: name: str, kind: str, parents: list, source: str
+		# values for "source" (that I found): "w", "w+disamb"
+		name = category.get("name")
+		if not name:
+			self.warning(f"{category = }")
+			return
+		desc = name
+		source = category.get("source")
+		if source:
+			desc = f"{desc} (source: {source})"
+		hf.write(desc)
+
+	def writeSenseCategories(
+		self,
+		hf: T_htmlfile,
+		categories: list[dict[str, Any]] | None,
+	) -> None:
+		if not categories:
+			return
+		# long names, mostly about grammar?
+		with hf.element("div", attrib={"class": "categories"}):
+			hf.write("Categories: ")
+			self.makeList(hf, categories, self.writeSenseCategory)
+
+	def writeSenseExample(  # noqa: PLR6301, PLR0912
+		self,
+		hf: T_htmlfile,
+		example: dict[str, str | list],
+	) -> None:
+		# example keys: text, "english", "ref", "type"
+		textList: list[tuple[str | None, str]] = []
+		text_: str | list = example.pop("example", "")
+		if text_:
+			assert isinstance(text_, str)
+			textList.append((None, text_))
+
+		example.pop("ref", "")
+		example.pop("type", "")
+
+		for key, value in example.items():
+			if not value:
+				continue
+			prefix: str | None = key
+			if prefix in ("text",):  # noqa: PLR6201, FURB171
+				prefix = None
+			if isinstance(value, str):
+				textList.append((prefix, value))
+			elif isinstance(value, list):
+				for item in value:
+					if isinstance(item, str):
+						textList.append((prefix, item))
+					elif isinstance(item, list):
+						textList += [(prefix, item2) for item2 in item]
+			else:
+				log.error(f"writeSenseExample: invalid type for {value=}")
+
+		if not textList:
+			return
+
+		def writePair(prefix: str | None, text: str) -> None:
+			if prefix:
+				with hf.element("b"):
+					hf.write(prefix)
+				hf.write(": ")
+			hf.write(text)
+
+		if len(textList) == 1:
+			prefix, text = textList[0]
+			writePair(prefix, text)
+			return
+
+		with hf.element("ul"):
+			for prefix, text in textList:
+				with hf.element("li"):
+					writePair(prefix, text)
+
+	def writeSenseExamples(
+		self,
+		hf: T_htmlfile,
+		examples: list[dict[str, str | list]] | None,
+	) -> None:
+		from lxml import etree as ET
+
+		if not examples:
+			return
+		hf.write(ET.Element("br"))
+		with hf.element("div", attrib={"class": "examples"}):
+			hf.write("Examples:")
+			hf.write(ET.Element("br"))
+			for example in examples:
+				with hf.element(
+					"div",
+					attrib={
+						"class": "example",
+						"style": f"padding: {self._example_padding};",
+					},
+				):
+					self.writeSenseExample(hf, example)
+
+	def writeSenseFormOf(  # noqa: PLR6301
+		self,
+		hf: T_htmlfile,
+		form_of: dict[str, str],
+	) -> None:
+		from lxml import etree as ET
+
+		# {"word": ..., "extra": ...}
+		word = form_of.get("word")
+		if not word:
+			return
+		hf.write(word)
+		extra = form_of.get("extra")
+		if extra:
+			hf.write(ET.Element("br"))
+			hf.write(extra)
+
+	def writeSenseFormOfList(
+		self,
+		hf: T_htmlfile,
+		form_of_list: list[dict[str, str]] | None,
+	) -> None:
+		if not form_of_list:
+			return
+		with hf.element("div", attrib={"class": "form_of"}):
+			hf.write("Form of: ")
+			self.makeList(hf, form_of_list, self.writeSenseFormOf)
+
+	def writeTags(
+		self,
+		hf: T_htmlfile,
+		tags: list[str] | None,
+		toRemove: list[str] | None,
+	) -> None:
+		if not tags:
+			return
+
+		if toRemove:
+			for tag in toRemove:
+				if tag in tags:
+					tags.remove(tag)
+		if not tags:
+			return
+
+		with hf.element("div", attrib={"class": "tags"}):
+			for i, tag in enumerate(tags):
+				if i > 0:
+					hf.write(", ")
+				with hf.element("font", color=self._gram_color):
+					hf.write(tag)
+
+	def writeTopics(
+		self,
+		hf: T_htmlfile,
+		topics: list[str] | None,
+	) -> None:
+		if not topics:
+			return
+
+		with hf.element("div", attrib={"class": "tags"}):
+			for i, topic in enumerate(topics):
+				if i > 0:
+					hf.write(" ")
+				with hf.element("span", style=self.topicStyle):
+					hf.write(topic)
+
+	def addWordLink(  # noqa: PLR6301
+		self,
+		hf: T_htmlfile,
+		word: str,
+		wordClass: str = "",
+	) -> None:
+		i = word.find(" [")
+		if i >= 0:
+			word = word[:i]
+		if not word:
+			return
+		attrib = {"href": f"bword://{word}"}
+		if wordClass:
+			attrib["class"] = wordClass
+		with hf.element(
+			"a",
+			attrib=attrib,
+		):
+			hf.write(word)
+
+	def writeSynonyms(
+		self,
+		hf: T_htmlfile,
+		synonyms: list[dict[str, Any]] | None,
+	) -> None:
+		if not synonyms:
+			return
+
+		#   "word": "str",
+		#   "sense": "str",
+		#   "_dis1": "str",
+		#   "tags": list[str]
+		#   "extra": "str",
+		#   "english": "str"
+
+		with hf.element("div"):
+			hf.write("Synonyms: ")
+			for i, item in enumerate(synonyms):
+				if i > 0:
+					hf.write(", ")
+				word = item.get("word")
+				if not word:
+					continue
+				self.addWordLink(hf, word)
+
+	def writeAntonyms(
+		self,
+		hf: T_htmlfile,
+		antonyms: list[dict[str, str]] | None,
+	) -> None:
+		if not antonyms:
+			return
+		# dict keys: word
+		with hf.element("div"):
+			hf.write("Antonyms: ")
+			for i, item in enumerate(antonyms):
+				if i > 0:
+					hf.write(", ")
+				word = item.get("word")
+				if not word:
+					continue
+				self.addWordLink(hf, word, wordClass="antonym")
+
+	def writeRelated(
+		self,
+		hf: T_htmlfile,
+		relatedList: list[dict[str, str]] | None,
+	) -> None:
+		if not relatedList:
+			return
+		# dict keys: sense, "word", "english"
+		with hf.element("div"):
+			hf.write("Related: ")
+			for i, item in enumerate(relatedList):
+				if i > 0:
+					hf.write(", ")
+				word = item.get("word")
+				if not word:
+					continue
+				self.addWordLink(hf, word)
+
+	def writeSenseLinks(
+		self,
+		hf: T_htmlfile,
+		linkList: list[list[str]] | None,
+	) -> None:
+		if not linkList:
+			return
+		with hf.element("div"):
+			hf.write("Links: ")
+			for i, link in enumerate(linkList):
+				if len(link) != 2:
+					self.warning(f"unexpected {link =}")
+					continue
+				text, ref = link
+				sq = ref.find("#")
+				if sq == 0:
+					ref = text
+				elif sq > 0:
+					ref = ref[:sq]
+				if i > 0:
+					hf.write(", ")
+				self.addWordLink(hf, ref)
+
+	def writeSense(
+		self,
+		hf: T_htmlfile,
+		sense: dict[str, Any],
+	) -> None:
+		from lxml import etree as ET
+
+		# tags seem to be mostly about grammar, so with format it like grammar
+		self.writeTags(
+			hf,
+			sense.get("tags"),
+			toRemove=["form-of"],
+		)
+
+		# for key in ("english",):
+		# 	text: "str | None" = sense.get("english")
+		# 	if not text:
+		# 		continue
+		# 	keyCap = key.capitalize()
+		# 	with hf.element("div"):
+		# 		with hf.element("b"):
+		# 			hf.write(keyCap)
+		# 		hf.write(f": {text}")
+
+		# sense["glosses"] and sense["english"] seems to be unreliable
+		# for example:
+		#   "raw_glosses": ["(short) story, fable, play"],
+		#   "english": "short",
+		#   "glosses": ["story, fable, play"],
+
+		glosses: list[str] | None = sense.get("raw_glosses")
+		if not glosses:
+			glosses = sense.get("glosses")
+		if glosses:
+			self.makeList(hf, glosses, self.writeSenseGloss)
+
+		self.writeTopics(hf, sense.get("topics"))
+
+		self.writeSenseFormOfList(hf, sense.get("form_of"))
+
+		self.writeSynonyms(hf, sense.get("synonyms"))
+
+		self.writeAntonyms(hf, sense.get("antonyms"))
+
+		self.writeRelated(hf, sense.get("related"))
+
+		self.writeSenseLinks(hf, sense.get("links"))
+
+		self.writeSenseExamples(hf, sense.get("examples"))
+
+		# alt_of[i]["word"] seem to point to a word that is
+		# mentioned in sense["raw_glosses"]
+		# so we could try to find that word and turn it into a link
+		# sense.get("alt_of"): list[dict[str, str]] | None
+
+		# sense.get("wikipedia", []): list[str]
+		# sense.get("wikidata", []): list[str]
+		# sense.get("id", ""): str  # not useful
+		# sense.get("senseid", []): list[str]  # not useful
+
+		hf.write(ET.Element("br"))
+
+	@staticmethod
+	def makeList(  # noqa: PLR0913
+		hf: T_htmlfile,
+		input_objects: list[Any],
+		processor: Callable,
+		ordered: bool = True,
+		skip_single: bool = True,
+		# single_prefix: str = "",
+		# list_type: str = "",
+	) -> None:
+		"""Wrap elements into <ol> if more than one element."""
+		if not input_objects:
+			return
+
+		if skip_single and len(input_objects) == 1:
+			# if single_prefix:
+			# 	hf.write(single_prefix)
+			processor(hf, input_objects[0])
+			return
+
+		attrib: dict[str, str] = {}
+		# if list_type:
+		# 	attrib["type"] = list_type
+
+		with hf.element("ol" if ordered else "ul", attrib=attrib):
+			for el in input_objects:
+				with hf.element("li"):
+					processor(hf, el)
diff --git a/pyglossary/plugins/wordnet/__init__.py b/pyglossary/plugins/wordnet/__init__.py
index b9e9007e4..ad237d80f 100644
--- a/pyglossary/plugins/wordnet/__init__.py
+++ b/pyglossary/plugins/wordnet/__init__.py
@@ -1,33 +1,11 @@
 # -*- coding: utf-8 -*-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 3
-# as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License <http://www.gnu.org/licenses/gpl-3.0.txt>
-# for more details.
-#
-# Copyright (C) 2023 Saeed Rasooli
-# Copyright (C) 2015 Igor Tkach
-#
-# This plugin is based on https://github.com/itkach/wordnet2slob
 from __future__ import annotations
 
-import os
-import re
-import sys
-from collections import defaultdict
 from typing import TYPE_CHECKING
 
-from pyglossary.core import log
+from .reader import Reader
 
 if TYPE_CHECKING:
-	import io
-	from collections.abc import Iterator
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
 	from pyglossary.option import Option
 
 __all__ = [
@@ -61,303 +39,3 @@
 
 # key is option/argument name, value is instance of Option
 optionsProp: dict[str, Option] = {}
-
-# original expression from
-# http://stackoverflow.com/questions/694344/regular-expression-that-matches-between-quotes-containing-escaped-quotes
-# "(?:[^\\"]+|\\.)*"
-# some examples don't have closing quote which
-# make the subn with this expression hang
-# quotedTextPattern = re.compile(r'"(?:[^"]+|\.)*["|\n]')
-
-# make it a capturing group so that we can get rid of quotes
-quotedTextPattern = re.compile(r'"([^"]+)"')
-
-refPattern = re.compile(r"`(\w+)'")
-
-
-class SynSet:
-	def __init__(self, line: str | bytes) -> None:
-		self.line = line
-		if isinstance(line, bytes):
-			line = line.decode("utf-8")
-		meta, self.gloss = line.split("|")
-		self.meta_parts = meta.split()
-
-	@property
-	def offset(self) -> int:
-		return int(self.meta_parts[0])
-
-	@property
-	def lex_filenum(self) -> str:
-		return self.meta_parts[1]
-
-	@property
-	def ss_type(self) -> str:
-		return self.meta_parts[2]
-
-	@property
-	def w_cnt(self) -> int:
-		return int(self.meta_parts[3], 16)
-
-	@property
-	def words(self) -> list[str]:
-		return [self.meta_parts[4 + 2 * i].replace("_", " ") for i in range(self.w_cnt)]
-
-	@property
-	def pointers(self) -> list[Pointer]:
-		p_cnt_index = 4 + 2 * self.w_cnt
-		p_cnt = self.meta_parts[p_cnt_index]
-		pointer_count = int(p_cnt)
-		start = p_cnt_index + 1
-		return [
-			Pointer(*self.meta_parts[start + i * 4 : start + (i + 1) * 4])  # type: ignore
-			for i in range(pointer_count)
-		]
-
-	def __repr__(self) -> str:
-		return f"SynSet({self.line!r})"
-
-
-class PointerSymbols:
-	n = {
-		"!": "Antonyms",
-		"@": "Hypernyms",
-		"@i": "Instance hypernyms",
-		"~": "Hyponyms",
-		"~i": "Instance hyponyms",
-		"#m": "Member holonyms",
-		"#s": "Substance holonyms",
-		"#p": "Part holonyms",
-		"%m": "Member meronyms",
-		"%s": "Substance meronyms",
-		"%p": "Part meronyms",
-		"=": "Attributes",
-		"+": "Derivationally related forms",
-		";c": "Domain of synset - TOPIC",
-		"-c": "Member of this domain - TOPIC",
-		";r": "Domain of synset - REGION",
-		"-r": "Member of this domain - REGION",
-		";u": "Domain of synset - USAGE",
-		"-u": "Member of this domain - USAGE",
-		"^": "Also see",
-	}
-
-	v = {
-		"!": "Antonyms",
-		"@": "Hypernyms",
-		"~": "Hyponyms",
-		"*": "Entailments",
-		">": "Cause",
-		"^": "Also see",
-		"$": "Verb group",
-		"+": "Derivationally related forms",
-		";c": "Domain of synset - TOPIC",
-		";r": "Domain of synset - REGION",
-		";u": "Domain of synset - USAGE",
-	}
-
-	a = s = {
-		"!": "Antonyms",
-		"+": "Derivationally related forms",
-		"&": "Similar to",
-		"<": "Participle of verb",
-		"\\": "Pertainyms",
-		"=": "Attributes",
-		"^": "Also see",
-		";c": "Domain of synset - TOPIC",
-		";r": "Domain of synset - REGION",
-		";u": "Domain of synset - USAGE",
-	}
-
-	r = {
-		"!": "Antonyms",
-		"\\": "Derived from adjective",
-		"+": "Derivationally related forms",
-		";c": "Domain of synset - TOPIC",
-		";r": "Domain of synset - REGION",
-		";u": "Domain of synset - USAGE",
-		"^": "Also see",
-	}
-
-
-class Pointer:
-	def __init__(self, symbol: str, offset: str, pos: str, source_target: str) -> None:
-		self.symbol = symbol
-		self.offset = int(offset)
-		self.pos = pos
-		self.source_target = source_target
-		self.source = int(source_target[:2], 16)
-		self.target = int(source_target[2:], 16)
-
-	def __repr__(self) -> str:
-		return (
-			f"Pointer({self.symbol!r}, {self.offset!r}, "
-			f"{self.pos!r}, {self.source_target!r})"
-		)
-
-
-class WordNet:
-	article_template = "<h1>%s</h1><span>%s</span>"
-	synSetTypes = {
-		"n": "n.",
-		"v": "v.",
-		"a": "adj.",
-		"s": "adj. satellite",
-		"r": "adv.",
-	}
-
-	file2pos = {
-		"data.adj": ["a", "s"],
-		"data.adv": ["r"],
-		"data.noun": ["n"],
-		"data.verb": ["v"],
-	}
-
-	def __init__(self, wordnetdir: str) -> None:
-		self.wordnetdir = wordnetdir
-		self.collector: dict[str, list[str]] = defaultdict(list)
-
-	@staticmethod
-	def iterlines(dict_dir: str) -> Iterator[str]:
-		for name in os.listdir(dict_dir):
-			if not name.startswith("data."):
-				continue
-			with open(os.path.join(dict_dir, name), encoding="utf-8") as f:
-				for line in f:
-					if not line.startswith("  "):
-						yield line
-
-	# PLR0912 Too many branches (16 > 12)
-	def prepare(self) -> None:  # noqa: PLR0912
-		synSetTypes = self.synSetTypes
-		file2pos = self.file2pos
-
-		dict_dir = self.wordnetdir
-
-		files: dict[str, io.TextIOWrapper] = {}
-		for name in os.listdir(dict_dir):
-			if name.startswith("data.") and name in file2pos:
-				f = open(os.path.join(dict_dir, name), encoding="utf-8")  # noqa: SIM115
-				for key in file2pos[name]:
-					files[key] = f
-
-		def a(word: str) -> str:
-			return f'<a href="{word}">{word}</a>'
-
-		for index, line in enumerate(self.iterlines(dict_dir)):
-			if index % 100 == 0 and index > 0:
-				sys.stdout.write(".")
-				sys.stdout.flush()
-			if index % 5000 == 0 and index > 0:
-				sys.stdout.write("\n")
-				sys.stdout.flush()
-			if not line or not line.strip():
-				continue
-			synset = SynSet(line)
-			gloss_with_examples, _ = quotedTextPattern.subn(
-				lambda x: f'<cite class="ex">{x.group(1)}</cite>',
-				synset.gloss,
-			)
-			gloss_with_examples, _ = refPattern.subn(
-				lambda x: a(x.group(1)),
-				gloss_with_examples,
-			)
-
-			words = synset.words
-			for index2, word in enumerate(words):
-				# TODO: move this block to a func
-				synonyms = ", ".join(a(w) for w in words if w != word)
-				synonyms_str = (
-					f'<br/><small class="co">Synonyms:</small> {synonyms}'
-					if synonyms
-					else ""
-				)
-				pointers = defaultdict(list)
-				for pointer in synset.pointers:
-					if (
-						pointer.source
-						and pointer.target
-						and pointer.source - 1 != index2
-					):
-						continue
-					symbol = pointer.symbol
-					if symbol and symbol[:1] in {";", "-"}:
-						continue
-					try:
-						symbol_desc = getattr(PointerSymbols, synset.ss_type)[symbol]
-					except KeyError:
-						log.warning(
-							f"unknown pointer symbol {symbol} for {synset.ss_type} ",
-						)
-						symbol_desc = symbol
-
-					data_file = files[pointer.pos]
-					data_file.seek(pointer.offset)
-					referenced_synset = SynSet(data_file.readline())
-					if pointer.source == pointer.target == 0:
-						pointers[symbol_desc] = [
-							w for w in referenced_synset.words if w not in words
-						]
-					else:
-						referenced_word = referenced_synset.words[pointer.target - 1]
-						if referenced_word not in pointers[symbol_desc]:
-							pointers[symbol_desc].append(referenced_word)
-
-				pointers_str = "".join(
-					[
-						f'<br/><small class="co">{symbol_desc}:</small> '
-						+ ", ".join(a(w) for w in referenced_words)
-						for symbol_desc, referenced_words in pointers.items()
-						if referenced_words
-					],
-				)
-				self.collector[word].append(
-					f'<i class="pos grammar">{synSetTypes[synset.ss_type]}</i>'
-					f" {gloss_with_examples}{synonyms_str}{pointers_str}",
-				)
-		sys.stdout.write("\n")
-		sys.stdout.flush()
-
-	def process(self) -> Iterator[tuple[str, str]]:
-		article_template = self.article_template
-
-		for title in self.collector:
-			article_pieces = self.collector[title]
-			article_pieces_count = len(article_pieces)
-			text = None
-			if article_pieces_count > 1:
-				ol = ["<ol>"] + [f"<li>{ap}</li>" for ap in article_pieces] + ["</ol>"]
-				text = article_template % (title, "".join(ol))
-			elif article_pieces_count == 1:
-				text = article_template % (title, article_pieces[0])
-
-			if text:
-				yield title, text
-
-
-class Reader:
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._wordCount = 0
-		self.wordnet: WordNet | None = None
-
-	def __len__(self) -> int:
-		return self._wordCount
-
-	def open(self, filename: str) -> None:
-		self.wordnet = WordNet(filename)
-		log.info("Running wordnet.prepare()")
-		self.wordnet.prepare()
-
-		# TODO: metadata
-
-	def close(self) -> None:
-		self.wordnet = None
-
-	def __iter__(self) -> Iterator[EntryType]:
-		if self.wordnet is None:
-			raise ValueError("self.wordnet is None")
-		glos = self._glos
-		for word, defi in self.wordnet.process():
-			yield glos.newEntry(word, defi)
diff --git a/pyglossary/plugins/wordnet/reader.py b/pyglossary/plugins/wordnet/reader.py
new file mode 100644
index 000000000..434cafe2f
--- /dev/null
+++ b/pyglossary/plugins/wordnet/reader.py
@@ -0,0 +1,330 @@
+# -*- coding: utf-8 -*-
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License <http://www.gnu.org/licenses/gpl-3.0.txt>
+# for more details.
+#
+# Copyright (C) 2023 Saeed Rasooli
+# Copyright (C) 2015 Igor Tkach
+#
+# This plugin is based on https://github.com/itkach/wordnet2slob
+from __future__ import annotations
+
+import os
+import re
+import sys
+from collections import defaultdict
+from typing import TYPE_CHECKING
+
+from pyglossary.core import log
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Iterator
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+# original expression from
+# http://stackoverflow.com/questions/694344/regular-expression-that-matches-between-quotes-containing-escaped-quotes
+# "(?:[^\\"]+|\\.)*"
+# some examples don't have closing quote which
+# make the subn with this expression hang
+# quotedTextPattern = re.compile(r'"(?:[^"]+|\.)*["|\n]')
+
+# make it a capturing group so that we can get rid of quotes
+quotedTextPattern = re.compile(r'"([^"]+)"')
+
+refPattern = re.compile(r"`(\w+)'")
+
+
+class SynSet:
+	def __init__(self, line: str | bytes) -> None:
+		self.line = line
+		if isinstance(line, bytes):
+			line = line.decode("utf-8")
+		meta, self.gloss = line.split("|")
+		self.meta_parts = meta.split()
+
+	@property
+	def offset(self) -> int:
+		return int(self.meta_parts[0])
+
+	@property
+	def lex_filenum(self) -> str:
+		return self.meta_parts[1]
+
+	@property
+	def ss_type(self) -> str:
+		return self.meta_parts[2]
+
+	@property
+	def w_cnt(self) -> int:
+		return int(self.meta_parts[3], 16)
+
+	@property
+	def words(self) -> list[str]:
+		return [self.meta_parts[4 + 2 * i].replace("_", " ") for i in range(self.w_cnt)]
+
+	@property
+	def pointers(self) -> list[Pointer]:
+		p_cnt_index = 4 + 2 * self.w_cnt
+		p_cnt = self.meta_parts[p_cnt_index]
+		pointer_count = int(p_cnt)
+		start = p_cnt_index + 1
+		return [
+			Pointer(*self.meta_parts[start + i * 4 : start + (i + 1) * 4])  # type: ignore
+			for i in range(pointer_count)
+		]
+
+	def __repr__(self) -> str:
+		return f"SynSet({self.line!r})"
+
+
+class PointerSymbols:
+	n = {
+		"!": "Antonyms",
+		"@": "Hypernyms",
+		"@i": "Instance hypernyms",
+		"~": "Hyponyms",
+		"~i": "Instance hyponyms",
+		"#m": "Member holonyms",
+		"#s": "Substance holonyms",
+		"#p": "Part holonyms",
+		"%m": "Member meronyms",
+		"%s": "Substance meronyms",
+		"%p": "Part meronyms",
+		"=": "Attributes",
+		"+": "Derivationally related forms",
+		";c": "Domain of synset - TOPIC",
+		"-c": "Member of this domain - TOPIC",
+		";r": "Domain of synset - REGION",
+		"-r": "Member of this domain - REGION",
+		";u": "Domain of synset - USAGE",
+		"-u": "Member of this domain - USAGE",
+		"^": "Also see",
+	}
+
+	v = {
+		"!": "Antonyms",
+		"@": "Hypernyms",
+		"~": "Hyponyms",
+		"*": "Entailments",
+		">": "Cause",
+		"^": "Also see",
+		"$": "Verb group",
+		"+": "Derivationally related forms",
+		";c": "Domain of synset - TOPIC",
+		";r": "Domain of synset - REGION",
+		";u": "Domain of synset - USAGE",
+	}
+
+	a = s = {
+		"!": "Antonyms",
+		"+": "Derivationally related forms",
+		"&": "Similar to",
+		"<": "Participle of verb",
+		"\\": "Pertainyms",
+		"=": "Attributes",
+		"^": "Also see",
+		";c": "Domain of synset - TOPIC",
+		";r": "Domain of synset - REGION",
+		";u": "Domain of synset - USAGE",
+	}
+
+	r = {
+		"!": "Antonyms",
+		"\\": "Derived from adjective",
+		"+": "Derivationally related forms",
+		";c": "Domain of synset - TOPIC",
+		";r": "Domain of synset - REGION",
+		";u": "Domain of synset - USAGE",
+		"^": "Also see",
+	}
+
+
+class Pointer:
+	def __init__(self, symbol: str, offset: str, pos: str, source_target: str) -> None:
+		self.symbol = symbol
+		self.offset = int(offset)
+		self.pos = pos
+		self.source_target = source_target
+		self.source = int(source_target[:2], 16)
+		self.target = int(source_target[2:], 16)
+
+	def __repr__(self) -> str:
+		return (
+			f"Pointer({self.symbol!r}, {self.offset!r}, "
+			f"{self.pos!r}, {self.source_target!r})"
+		)
+
+
+class WordNet:
+	article_template = "<h1>%s</h1><span>%s</span>"
+	synSetTypes = {
+		"n": "n.",
+		"v": "v.",
+		"a": "adj.",
+		"s": "adj. satellite",
+		"r": "adv.",
+	}
+
+	file2pos = {
+		"data.adj": ["a", "s"],
+		"data.adv": ["r"],
+		"data.noun": ["n"],
+		"data.verb": ["v"],
+	}
+
+	def __init__(self, wordnetdir: str) -> None:
+		self.wordnetdir = wordnetdir
+		self.collector: dict[str, list[str]] = defaultdict(list)
+
+	@staticmethod
+	def iterlines(dict_dir: str) -> Iterator[str]:
+		for name in os.listdir(dict_dir):
+			if not name.startswith("data."):
+				continue
+			with open(os.path.join(dict_dir, name), encoding="utf-8") as f:
+				for line in f:
+					if not line.startswith("  "):
+						yield line
+
+	# PLR0912 Too many branches (16 > 12)
+	def prepare(self) -> None:  # noqa: PLR0912
+		synSetTypes = self.synSetTypes
+		file2pos = self.file2pos
+
+		dict_dir = self.wordnetdir
+
+		files: dict[str, io.TextIOWrapper] = {}
+		for name in os.listdir(dict_dir):
+			if name.startswith("data.") and name in file2pos:
+				f = open(os.path.join(dict_dir, name), encoding="utf-8")  # noqa: SIM115
+				for key in file2pos[name]:
+					files[key] = f
+
+		def a(word: str) -> str:
+			return f'<a href="{word}">{word}</a>'
+
+		for index, line in enumerate(self.iterlines(dict_dir)):
+			if index % 100 == 0 and index > 0:
+				sys.stdout.write(".")
+				sys.stdout.flush()
+			if index % 5000 == 0 and index > 0:
+				sys.stdout.write("\n")
+				sys.stdout.flush()
+			if not line or not line.strip():
+				continue
+			synset = SynSet(line)
+			gloss_with_examples, _ = quotedTextPattern.subn(
+				lambda x: f'<cite class="ex">{x.group(1)}</cite>',
+				synset.gloss,
+			)
+			gloss_with_examples, _ = refPattern.subn(
+				lambda x: a(x.group(1)),
+				gloss_with_examples,
+			)
+
+			words = synset.words
+			for index2, word in enumerate(words):
+				# TODO: move this block to a func
+				synonyms = ", ".join(a(w) for w in words if w != word)
+				synonyms_str = (
+					f'<br/><small class="co">Synonyms:</small> {synonyms}'
+					if synonyms
+					else ""
+				)
+				pointers = defaultdict(list)
+				for pointer in synset.pointers:
+					if (
+						pointer.source
+						and pointer.target
+						and pointer.source - 1 != index2
+					):
+						continue
+					symbol = pointer.symbol
+					if symbol and symbol[:1] in {";", "-"}:
+						continue
+					try:
+						symbol_desc = getattr(PointerSymbols, synset.ss_type)[symbol]
+					except KeyError:
+						log.warning(
+							f"unknown pointer symbol {symbol} for {synset.ss_type} ",
+						)
+						symbol_desc = symbol
+
+					data_file = files[pointer.pos]
+					data_file.seek(pointer.offset)
+					referenced_synset = SynSet(data_file.readline())
+					if pointer.source == pointer.target == 0:
+						pointers[symbol_desc] = [
+							w for w in referenced_synset.words if w not in words
+						]
+					else:
+						referenced_word = referenced_synset.words[pointer.target - 1]
+						if referenced_word not in pointers[symbol_desc]:
+							pointers[symbol_desc].append(referenced_word)
+
+				pointers_str = "".join(
+					[
+						f'<br/><small class="co">{symbol_desc}:</small> '
+						+ ", ".join(a(w) for w in referenced_words)
+						for symbol_desc, referenced_words in pointers.items()
+						if referenced_words
+					],
+				)
+				self.collector[word].append(
+					f'<i class="pos grammar">{synSetTypes[synset.ss_type]}</i>'
+					f" {gloss_with_examples}{synonyms_str}{pointers_str}",
+				)
+		sys.stdout.write("\n")
+		sys.stdout.flush()
+
+	def process(self) -> Iterator[tuple[str, str]]:
+		article_template = self.article_template
+
+		for title in self.collector:
+			article_pieces = self.collector[title]
+			article_pieces_count = len(article_pieces)
+			text = None
+			if article_pieces_count > 1:
+				ol = ["<ol>"] + [f"<li>{ap}</li>" for ap in article_pieces] + ["</ol>"]
+				text = article_template % (title, "".join(ol))
+			elif article_pieces_count == 1:
+				text = article_template % (title, article_pieces[0])
+
+			if text:
+				yield title, text
+
+
+class Reader:
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._wordCount = 0
+		self.wordnet: WordNet | None = None
+
+	def __len__(self) -> int:
+		return self._wordCount
+
+	def open(self, filename: str) -> None:
+		self.wordnet = WordNet(filename)
+		log.info("Running wordnet.prepare()")
+		self.wordnet.prepare()
+
+		# TODO: metadata
+
+	def close(self) -> None:
+		self.wordnet = None
+
+	def __iter__(self) -> Iterator[EntryType]:
+		if self.wordnet is None:
+			raise ValueError("self.wordnet is None")
+		glos = self._glos
+		for word, defi in self.wordnet.process():
+			yield glos.newEntry(word, defi)
diff --git a/pyglossary/plugins/wordset/__init__.py b/pyglossary/plugins/wordset/__init__.py
index f2a0ce8b8..2cf426303 100644
--- a/pyglossary/plugins/wordset/__init__.py
+++ b/pyglossary/plugins/wordset/__init__.py
@@ -1,22 +1,12 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-from json import load
-from os import listdir
-from os.path import isfile, join, splitext
-from typing import TYPE_CHECKING
-
-from pyglossary.core import log
 from pyglossary.option import (
 	EncodingOption,
 	Option,
 )
 
-if TYPE_CHECKING:
-	from collections.abc import Iterator
-	from typing import Any
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -49,85 +39,3 @@
 optionsProp: dict[str, Option] = {
 	"encoding": EncodingOption(),
 }
-
-
-class Reader:
-	_encoding: str = "utf-8"
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._clear()
-		self.defiTemplate = (
-			"<p>"
-			'<font color="green">{speech_part}</font>'
-			"<br>"
-			"{def}"
-			"<br>"
-			"<i>{example}</i>"
-			"</p>"
-		)
-		"""
-		{
-			"id": "492099d426",
-			"def": "without musical accompaniment",
-			"example": "they performed a cappella",
-			"speech_part": "adverb"
-		},
-		"""
-
-	def close(self) -> None:
-		self._clear()
-
-	def _clear(self) -> None:
-		self._filename = ""
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-		name = self._glos.getInfo("name")
-		if not name or name == "data":
-			self._glos.setInfo("name", "Wordset.org")
-		self._glos.setDefaultDefiFormat("h")
-
-	def __len__(self) -> int:
-		return 0
-
-	@staticmethod
-	def fileNameSortKey(fname: str) -> str:
-		fname = splitext(fname)[0]
-		if fname == "misc":
-			return "\x80"
-		return fname
-
-	@staticmethod
-	def sortKey(word: str) -> Any:
-		return word.lower().encode("utf-8", errors="replace")
-
-	def __iter__(self) -> Iterator[EntryType]:
-		if not self._filename:
-			raise RuntimeError("iterating over a reader while it's not open")
-
-		direc = self._filename
-		encoding = self._encoding
-		glos = self._glos
-
-		for fname in sorted(listdir(direc), key=self.fileNameSortKey):
-			fpath = join(direc, fname)
-			if not (fname.endswith(".json") and isfile(fpath)):
-				continue
-			with open(fpath, encoding=encoding) as fileObj:
-				data: dict[str, dict[str, Any]] = load(fileObj)
-				for word in sorted(data, key=self.sortKey):
-					entryDict = data[word]
-					defi = "".join(
-						self.defiTemplate.format(
-							**{
-								"word": word,
-								"def": meaning.get("def", ""),
-								"example": meaning.get("example", ""),
-								"speech_part": meaning.get("speech_part", ""),
-							},
-						)
-						for meaning in entryDict.get("meanings", [])
-					)
-					yield glos.newEntry(word, defi, defiFormat="h")
-			log.info(f"finished reading {fname}")
diff --git a/pyglossary/plugins/wordset/reader.py b/pyglossary/plugins/wordset/reader.py
new file mode 100644
index 000000000..4c19b8b42
--- /dev/null
+++ b/pyglossary/plugins/wordset/reader.py
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+from json import load
+from os import listdir
+from os.path import isfile, join, splitext
+from typing import TYPE_CHECKING
+
+from pyglossary.core import log
+
+if TYPE_CHECKING:
+	from collections.abc import Iterator
+	from typing import Any
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+class Reader:
+	_encoding: str = "utf-8"
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._clear()
+		self.defiTemplate = (
+			"<p>"
+			'<font color="green">{speech_part}</font>'
+			"<br>"
+			"{def}"
+			"<br>"
+			"<i>{example}</i>"
+			"</p>"
+		)
+		"""
+		{
+			"id": "492099d426",
+			"def": "without musical accompaniment",
+			"example": "they performed a cappella",
+			"speech_part": "adverb"
+		},
+		"""
+
+	def close(self) -> None:
+		self._clear()
+
+	def _clear(self) -> None:
+		self._filename = ""
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+		name = self._glos.getInfo("name")
+		if not name or name == "data":
+			self._glos.setInfo("name", "Wordset.org")
+		self._glos.setDefaultDefiFormat("h")
+
+	def __len__(self) -> int:
+		return 0
+
+	@staticmethod
+	def fileNameSortKey(fname: str) -> str:
+		fname = splitext(fname)[0]
+		if fname == "misc":
+			return "\x80"
+		return fname
+
+	@staticmethod
+	def sortKey(word: str) -> Any:
+		return word.lower().encode("utf-8", errors="replace")
+
+	def __iter__(self) -> Iterator[EntryType]:
+		if not self._filename:
+			raise RuntimeError("iterating over a reader while it's not open")
+
+		direc = self._filename
+		encoding = self._encoding
+		glos = self._glos
+
+		for fname in sorted(listdir(direc), key=self.fileNameSortKey):
+			fpath = join(direc, fname)
+			if not (fname.endswith(".json") and isfile(fpath)):
+				continue
+			with open(fpath, encoding=encoding) as fileObj:
+				data: dict[str, dict[str, Any]] = load(fileObj)
+				for word in sorted(data, key=self.sortKey):
+					entryDict = data[word]
+					defi = "".join(
+						self.defiTemplate.format(
+							**{
+								"word": word,
+								"def": meaning.get("def", ""),
+								"example": meaning.get("example", ""),
+								"speech_part": meaning.get("speech_part", ""),
+							},
+						)
+						for meaning in entryDict.get("meanings", [])
+					)
+					yield glos.newEntry(word, defi, defiFormat="h")
+			log.info(f"finished reading {fname}")
diff --git a/pyglossary/plugins/xdxf/__init__.py b/pyglossary/plugins/xdxf/__init__.py
index 0df1b3a51..e3e32a2e1 100644
--- a/pyglossary/plugins/xdxf/__init__.py
+++ b/pyglossary/plugins/xdxf/__init__.py
@@ -1,52 +1,12 @@
 # -*- coding: utf-8 -*-
-# xdxf/__init__.py
 from __future__ import annotations
 
-"""xdxf file format reader and utils to convert xdxf to html."""
-#
-# Copyright © 2023 Saeed Rasooli
-# Copyright © 2016 ivan tkachenko me@ratijas.tk
-#
-# some parts of this file include code from:
-# Aard Dictionary Tools <http://aarddict.org>.
-# Copyright © 2008-2009  Igor Tkach
-#
-# This program is a free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# You can get a copy of GNU General Public License along this program
-# But you can always get it from http://www.gnu.org/licenses/gpl.txt
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-import re
-import typing
-from typing import TYPE_CHECKING, cast
-
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Iterator, Sequence
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
-	from pyglossary.lxml_types import Element
-
-from lxml import etree as ET
-
-from pyglossary.compression import (
-	compressionOpen,
-	stdCompressions,
-)
-from pyglossary.core import log
-from pyglossary.io_utils import nullBinaryIO
 from pyglossary.option import (
 	BoolOption,
 	Option,
 )
-from pyglossary.text_utils import toStr
+
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -112,212 +72,3 @@
 	...
 </xdxf>
 """
-
-if TYPE_CHECKING:
-
-	class TransformerType(typing.Protocol):
-		def transform(self, article: Element) -> str: ...
-
-
-class Reader:
-	compressions = stdCompressions
-	depends = {
-		"lxml": "lxml",
-	}
-
-	_html: bool = True
-	_xsl: bool = False
-
-	infoKeyMap = {
-		"full_name": "name",
-		"full_title": "name",
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._file: io.IOBase = nullBinaryIO
-		self._encoding = "utf-8"
-		self._htmlTr: TransformerType | None = None
-		self._re_span_k = re.compile(
-			'<span class="k">[^<>]*</span>(<br/>)?',
-		)
-
-	def makeTransformer(self) -> None:
-		if self._xsl:
-			from pyglossary.xdxf.xsl_transform import XslXdxfTransformer
-
-			self._htmlTr = XslXdxfTransformer(encoding=self._encoding)
-			return
-
-		from pyglossary.xdxf.transform import XdxfTransformer
-
-		self._htmlTr = XdxfTransformer(encoding=self._encoding)
-
-	def open(self, filename: str) -> None:  # noqa: PLR0912
-		# <!DOCTYPE xdxf SYSTEM "http://xdxf.sourceforge.net/xdxf_lousy.dtd">
-
-		self._filename = filename
-		if self._html:
-			self.makeTransformer()
-			self._glos.setDefaultDefiFormat("h")
-		else:
-			self._glos.setDefaultDefiFormat("x")
-
-		cfile = self._file = cast(
-			"io.IOBase",
-			compressionOpen(
-				self._filename,
-				mode="rb",
-			),
-		)
-
-		context = ET.iterparse(  # type: ignore
-			cfile,
-			events=("end",),
-		)
-		for _, _elem in context:
-			elem = cast("Element", _elem)
-			if elem.tag in {"meta_info", "ar", "k", "abr", "dtrn"}:
-				break
-			# every other tag before </meta_info> or </ar> is considered info
-			if elem.tag == "abbr_def":
-				continue
-			# in case of multiple <from> or multiple <to> tags, the last one
-			# will be stored.
-			# Very few formats support more than one language pair in their
-			# metadata, so it's not very useful to have multiple
-			if elem.tag == "from":
-				for key, value in elem.attrib.items():
-					if key.endswith("}lang"):
-						self._glos.sourceLangName = value.split("-")[0]
-						break
-				continue
-			if elem.tag == "to":
-				for key, value in elem.attrib.items():
-					if key.endswith("}lang"):
-						self._glos.targetLangName = value.split("-")[0]
-						break
-				continue
-			if not elem.text:
-				if elem.tag != "br":
-					log.warning(f"empty tag <{elem.tag}>")
-				continue
-			key = self.infoKeyMap.get(elem.tag, elem.tag)
-			self._glos.setInfo(key, elem.text)
-
-		del context
-
-		if cfile.seekable():
-			cfile.seek(0, 2)
-			self._fileSize = cfile.tell()
-			cfile.seek(0)
-			self._glos.setInfo("input_file_size", str(self._fileSize))
-		else:
-			log.warning("XDXF Reader: file is not seekable")
-			self._file.close()
-			self._file = compressionOpen(self._filename, mode="rb")
-
-	def __len__(self) -> int:
-		return 0
-
-	def __iter__(self) -> Iterator[EntryType]:
-		context = ET.iterparse(  # type: ignore
-			self._file,
-			events=("end",),
-			tag="ar",
-		)
-		for _, _article in context:
-			article = cast("Element", _article)
-			article.tail = None
-			words = [toStr(w) for w in self.titles(article)]
-			if self._htmlTr:
-				defi = self._htmlTr.transform(article)
-				defiFormat = "h"
-				if len(words) == 1:
-					defi = self._re_span_k.sub("", defi)
-			else:
-				b_defi = cast("bytes", ET.tostring(article, encoding=self._encoding))
-				defi = b_defi[4:-5].decode(self._encoding).strip()
-				defiFormat = "x"
-
-			# log.info(f"{defi=}, {words=}")
-			yield self._glos.newEntry(
-				words,
-				defi,
-				defiFormat=defiFormat,
-				byteProgress=(self._file.tell(), self._fileSize),
-			)
-			# clean up preceding siblings to save memory
-			# this can reduce memory usage from 1 GB to ~25 MB
-			parent = article.getparent()
-			if parent is None:
-				continue
-			while article.getprevious() is not None:
-				del parent[0]
-
-	def close(self) -> None:
-		self._file.close()
-		self._file = nullBinaryIO
-
-	@staticmethod
-	def tostring(
-		elem: Element,
-	) -> str:
-		return (
-			ET.tostring(
-				elem,
-				method="html",
-				pretty_print=True,
-			)
-			.decode("utf-8")
-			.strip()
-		)
-
-	def titles(self, article: Element) -> list[str]:
-		"""
-		:param article: <ar> tag
-		:return: (title (str) | None, alternative titles (set))
-		"""
-		from itertools import combinations
-
-		titles: list[str] = []
-		for title_element in article.findall("k"):
-			if title_element.text is None:
-				# TODO: look for <opt> tag?
-				log.warning(f"empty title element: {self.tostring(title_element)}")
-				continue
-			n_opts = len([c for c in title_element if c.tag == "opt"])
-			if n_opts:
-				titles += [
-					self._mktitle(title_element, comb)
-					for j in range(n_opts + 1)
-					for comb in combinations(list(range(n_opts)), j)
-				]
-			else:
-				titles.append(self._mktitle(title_element))
-
-		return titles
-
-	def _mktitle(  # noqa: PLR6301
-		self,
-		title_element: Element,
-		include_opts: Sequence | None = None,
-	) -> str:
-		if include_opts is None:
-			include_opts = ()
-		title = title_element.text or ""
-		opt_i = -1
-		for c in title_element:
-			if c.tag == "nu" and c.tail:
-				if title:
-					title += c.tail
-				else:
-					title = c.tail
-			if c.tag == "opt" and c.text is not None:
-				opt_i += 1
-				if opt_i in include_opts:
-					title += c.text
-				if c.tail:
-					title += c.tail
-		return title.strip()
diff --git a/pyglossary/plugins/xdxf/reader.py b/pyglossary/plugins/xdxf/reader.py
new file mode 100644
index 000000000..194d26d73
--- /dev/null
+++ b/pyglossary/plugins/xdxf/reader.py
@@ -0,0 +1,252 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2023 Saeed Rasooli
+# Copyright © 2016 ivan tkachenko me@ratijas.tk
+#
+# some parts of this file include code from:
+# Aard Dictionary Tools <http://aarddict.org>.
+# Copyright © 2008-2009  Igor Tkach
+#
+# This program is a free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# You can get a copy of GNU General Public License along this program
+# But you can always get it from http://www.gnu.org/licenses/gpl.txt
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+from __future__ import annotations
+
+import re
+import typing
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Iterator, Sequence
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+	from pyglossary.lxml_types import Element
+
+from lxml import etree as ET
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import log
+from pyglossary.io_utils import nullBinaryIO
+from pyglossary.text_utils import toStr
+
+if TYPE_CHECKING:
+
+	class TransformerType(typing.Protocol):
+		def transform(self, article: Element) -> str: ...
+
+
+class Reader:
+	compressions = stdCompressions
+	depends = {
+		"lxml": "lxml",
+	}
+
+	_html: bool = True
+	_xsl: bool = False
+
+	infoKeyMap = {
+		"full_name": "name",
+		"full_title": "name",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._file: io.IOBase = nullBinaryIO
+		self._encoding = "utf-8"
+		self._htmlTr: TransformerType | None = None
+		self._re_span_k = re.compile(
+			'<span class="k">[^<>]*</span>(<br/>)?',
+		)
+
+	def makeTransformer(self) -> None:
+		if self._xsl:
+			from pyglossary.xdxf.xsl_transform import XslXdxfTransformer
+
+			self._htmlTr = XslXdxfTransformer(encoding=self._encoding)
+			return
+
+		from pyglossary.xdxf.transform import XdxfTransformer
+
+		self._htmlTr = XdxfTransformer(encoding=self._encoding)
+
+	def open(self, filename: str) -> None:  # noqa: PLR0912
+		# <!DOCTYPE xdxf SYSTEM "http://xdxf.sourceforge.net/xdxf_lousy.dtd">
+
+		self._filename = filename
+		if self._html:
+			self.makeTransformer()
+			self._glos.setDefaultDefiFormat("h")
+		else:
+			self._glos.setDefaultDefiFormat("x")
+
+		cfile = self._file = cast(
+			"io.IOBase",
+			compressionOpen(
+				self._filename,
+				mode="rb",
+			),
+		)
+
+		context = ET.iterparse(  # type: ignore
+			cfile,
+			events=("end",),
+		)
+		for _, _elem in context:
+			elem = cast("Element", _elem)
+			if elem.tag in {"meta_info", "ar", "k", "abr", "dtrn"}:
+				break
+			# every other tag before </meta_info> or </ar> is considered info
+			if elem.tag == "abbr_def":
+				continue
+			# in case of multiple <from> or multiple <to> tags, the last one
+			# will be stored.
+			# Very few formats support more than one language pair in their
+			# metadata, so it's not very useful to have multiple
+			if elem.tag == "from":
+				for key, value in elem.attrib.items():
+					if key.endswith("}lang"):
+						self._glos.sourceLangName = value.split("-")[0]
+						break
+				continue
+			if elem.tag == "to":
+				for key, value in elem.attrib.items():
+					if key.endswith("}lang"):
+						self._glos.targetLangName = value.split("-")[0]
+						break
+				continue
+			if not elem.text:
+				if elem.tag != "br":
+					log.warning(f"empty tag <{elem.tag}>")
+				continue
+			key = self.infoKeyMap.get(elem.tag, elem.tag)
+			self._glos.setInfo(key, elem.text)
+
+		del context
+
+		if cfile.seekable():
+			cfile.seek(0, 2)
+			self._fileSize = cfile.tell()
+			cfile.seek(0)
+			self._glos.setInfo("input_file_size", str(self._fileSize))
+		else:
+			log.warning("XDXF Reader: file is not seekable")
+			self._file.close()
+			self._file = compressionOpen(self._filename, mode="rb")
+
+	def __len__(self) -> int:
+		return 0
+
+	def __iter__(self) -> Iterator[EntryType]:
+		context = ET.iterparse(  # type: ignore
+			self._file,
+			events=("end",),
+			tag="ar",
+		)
+		for _, _article in context:
+			article = cast("Element", _article)
+			article.tail = None
+			words = [toStr(w) for w in self.titles(article)]
+			if self._htmlTr:
+				defi = self._htmlTr.transform(article)
+				defiFormat = "h"
+				if len(words) == 1:
+					defi = self._re_span_k.sub("", defi)
+			else:
+				b_defi = cast("bytes", ET.tostring(article, encoding=self._encoding))
+				defi = b_defi[4:-5].decode(self._encoding).strip()
+				defiFormat = "x"
+
+			# log.info(f"{defi=}, {words=}")
+			yield self._glos.newEntry(
+				words,
+				defi,
+				defiFormat=defiFormat,
+				byteProgress=(self._file.tell(), self._fileSize),
+			)
+			# clean up preceding siblings to save memory
+			# this can reduce memory usage from 1 GB to ~25 MB
+			parent = article.getparent()
+			if parent is None:
+				continue
+			while article.getprevious() is not None:
+				del parent[0]
+
+	def close(self) -> None:
+		self._file.close()
+		self._file = nullBinaryIO
+
+	@staticmethod
+	def tostring(
+		elem: Element,
+	) -> str:
+		return (
+			ET.tostring(
+				elem,
+				method="html",
+				pretty_print=True,
+			)
+			.decode("utf-8")
+			.strip()
+		)
+
+	def titles(self, article: Element) -> list[str]:
+		"""
+		:param article: <ar> tag
+		:return: (title (str) | None, alternative titles (set))
+		"""
+		from itertools import combinations
+
+		titles: list[str] = []
+		for title_element in article.findall("k"):
+			if title_element.text is None:
+				# TODO: look for <opt> tag?
+				log.warning(f"empty title element: {self.tostring(title_element)}")
+				continue
+			n_opts = len([c for c in title_element if c.tag == "opt"])
+			if n_opts:
+				titles += [
+					self._mktitle(title_element, comb)
+					for j in range(n_opts + 1)
+					for comb in combinations(list(range(n_opts)), j)
+				]
+			else:
+				titles.append(self._mktitle(title_element))
+
+		return titles
+
+	def _mktitle(  # noqa: PLR6301
+		self,
+		title_element: Element,
+		include_opts: Sequence | None = None,
+	) -> str:
+		if include_opts is None:
+			include_opts = ()
+		title = title_element.text or ""
+		opt_i = -1
+		for c in title_element:
+			if c.tag == "nu" and c.tail:
+				if title:
+					title += c.tail
+				else:
+					title = c.tail
+			if c.tag == "opt" and c.text is not None:
+				opt_i += 1
+				if opt_i in include_opts:
+					title += c.text
+				if c.tail:
+					title += c.tail
+		return title.strip()
diff --git a/pyglossary/plugins/xdxf_css/__init__.py b/pyglossary/plugins/xdxf_css/__init__.py
index df7d27333..a0972364f 100644
--- a/pyglossary/plugins/xdxf_css/__init__.py
+++ b/pyglossary/plugins/xdxf_css/__init__.py
@@ -1,54 +1,16 @@
 # -*- coding: utf-8 -*-
-# xdxf/__init__.py
 from __future__ import annotations
 
-"""xdxf file format reader and utils to convert xdxf to html."""
-#
-# Copyright © 2023 Saeed Rasooli
-# Copyright © 2016 ivan tkachenko me@ratijas.tk
-#
-# some parts of this file include code from:
-# Aard Dictionary Tools <http://aarddict.org>.
-# Copyright © 2008-2009  Igor Tkach
-#
-# This program is a free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# You can get a copy of GNU General Public License along this program
-# But you can always get it from http://www.gnu.org/licenses/gpl.txt
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-import re
-import typing
-from os.path import join
-from typing import TYPE_CHECKING, cast
+from typing import TYPE_CHECKING
 
 from pyglossary.option import BoolOption
 
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Iterator, Sequence
+from .reader import Reader
 
-	from pyglossary.glossary_types import EntryType, GlossaryType
-	from pyglossary.lxml_types import Element
+if TYPE_CHECKING:
 	from pyglossary.option import Option
 
 
-from lxml import etree as ET
-
-from pyglossary.compression import (
-	compressionOpen,
-	stdCompressions,
-)
-from pyglossary.core import log, rootDir
-from pyglossary.io_utils import nullBinaryIO
-from pyglossary.text_utils import toStr
-
 __all__ = [
 	"Reader",
 	"description",
@@ -110,241 +72,3 @@
 	...
 </xdxf>
 """
-
-if TYPE_CHECKING:
-
-	class TransformerType(typing.Protocol):
-		def transform(self, article: Element) -> str: ...
-
-
-class Reader:
-	compressions = stdCompressions
-	depends = {
-		"lxml": "lxml",
-	}
-
-	_html: bool = True
-
-	infoKeyMap = {
-		"full_name": "name",
-		"full_title": "name",
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._file: io.IOBase = nullBinaryIO
-		self._encoding = "utf-8"
-		self._htmlTr: TransformerType | None = None
-		self._re_span_k = re.compile(
-			'<span class="k">[^<>]*</span>(<br/>)?',
-		)
-		self._has_added_css: bool = False
-		self._has_added_js: bool = False
-		self._abbr_defs_js: bytes
-
-	def makeTransformer(self) -> None:
-		from pyglossary.xdxf.css_js_transform import XdxfTransformer
-
-		self._htmlTr = XdxfTransformer(encoding=self._encoding)
-
-	def open(self, filename: str) -> None:  # noqa: PLR0912
-		# <!DOCTYPE xdxf SYSTEM "http://xdxf.sourceforge.net/xdxf_lousy.dtd">
-
-		self._filename = filename
-		self.makeTransformer()
-		self._glos.setDefaultDefiFormat("h")
-
-		cfile = self._file = cast(
-			"io.IOBase",
-			compressionOpen(
-				self._filename,
-				mode="rb",
-			),
-		)
-
-		context = ET.iterparse(  # type: ignore
-			cfile,
-			events=("end",),
-		)
-		abbr_defs: list[Element] = []
-		for _, _elem in context:
-			elem = cast("Element", _elem)
-			if elem.tag in {"meta_info", "ar", "k", "abr", "dtrn"}:
-				break
-			# every other tag before </meta_info> or </ar> is considered info
-			if elem.tag == "abbr_def":
-				abbr_defs.append(elem)
-				continue
-			# in case of multiple <from> or multiple <to> tags, the last one
-			# will be stored.
-			# Very few formats support more than one language pair in their
-			# metadata, so it's not very useful to have multiple
-			if elem.tag == "from":
-				for key, value in elem.attrib.items():
-					if key.endswith("}lang"):
-						self._glos.sourceLangName = value.split("-")[0]
-						break
-				continue
-			if elem.tag == "to":
-				for key, value in elem.attrib.items():
-					if key.endswith("}lang"):
-						self._glos.targetLangName = value.split("-")[0]
-						break
-				continue
-			if not elem.text:
-				if elem.tag != "br":
-					log.warning(f"empty tag <{elem.tag}>")
-				continue
-			key = self.infoKeyMap.get(elem.tag, elem.tag)
-			self._glos.setInfo(key, elem.text)
-		self._abbr_defs_js = self.generate_abbr_js(abbr_defs)
-		del context
-
-		if cfile.seekable():
-			cfile.seek(0, 2)
-			self._fileSize = cfile.tell()
-			cfile.seek(0)
-			self._glos.setInfo("input_file_size", str(self._fileSize))
-		else:
-			log.warning("XDXF Reader: file is not seekable")
-			self._file.close()
-			self._file = compressionOpen(self._filename, mode="rb")
-
-	def __len__(self) -> int:
-		return 0
-
-	def __iter__(self) -> Iterator[EntryType]:
-		context = ET.iterparse(  # type: ignore
-			self._file,
-			events=("end",),
-			tag="ar",
-		)
-
-		if not self._has_added_css:
-			self._has_added_css = True
-			cssPath = join(rootDir, "pyglossary", "xdxf", "xdxf.css")
-			with open(cssPath, "rb") as css_file:
-				yield self._glos.newDataEntry("css/xdxf.css", css_file.read())
-
-		if self._abbr_defs_js is not None and not self._has_added_js:
-			self._has_added_js = True
-			yield self._glos.newDataEntry("js/xdxf.js", self._abbr_defs_js)
-
-		for _, _article in context:
-			article = cast("Element", _article)
-			article.tail = None
-			words = [toStr(w) for w in self.titles(article)]
-
-			defi = self._htmlTr.transform(article)
-			defiFormat = "h"
-			if len(words) == 1:
-				defi = self._re_span_k.sub("", defi)
-
-			defi = f"""<!DOCTYPE html>
-<html>
-	<head>
-		<link rel="stylesheet" href="css/xdxf.css"/>
-	</head>
-	<body>
-		{defi}
-		<script type="text/javascript" src="js/xdxf.js"></script>
-	</body>
-</html>"""
-			# log.info(f"{defi=}, {words=}")
-			yield self._glos.newEntry(
-				words,
-				defi,
-				defiFormat=defiFormat,
-				byteProgress=(self._file.tell(), self._fileSize),
-			)
-			# clean up preceding siblings to save memory
-			# this can reduce memory usage from 1 GB to ~25 MB
-			parent = article.getparent()
-			if parent is None:
-				continue
-			while article.getprevious() is not None:
-				del parent[0]
-
-	def close(self) -> None:
-		self._file.close()
-		self._file = nullBinaryIO
-
-	def generate_abbr_js(self, abbr_defs: list[Element]) -> bytes:
-		abbr_map_js = """const abbr_map = new Map();\n"""
-		for abbr_def in abbr_defs:
-			abbr_k_list: list[str] = []
-			abbr_v_text = ""
-			for child in abbr_def.xpath("child::node()"):
-				if child.tag == "abbr_k":
-					abbr_k_list.append(self._htmlTr.stringify_children(child))
-				if child.tag == "abbr_v":
-					abbr_v_text = self._htmlTr.stringify_children(child)
-			# TODO escape apostrophes
-			for abbr_k in abbr_k_list:
-				if abbr_k and abbr_v_text:
-					abbr_map_js += f"abbr_map.set('{abbr_k}', '{abbr_v_text}');\n"
-		with open(join(rootDir, "pyglossary", "xdxf", "xdxf.js"), "rb") as js_file:
-			return abbr_map_js.encode(encoding="utf-8") + js_file.read()
-
-	@staticmethod
-	def tostring(
-		elem: Element,
-	) -> str:
-		return (
-			ET.tostring(
-				elem,
-				method="html",
-				pretty_print=True,
-			)
-			.decode("utf-8")
-			.strip()
-		)
-
-	def titles(self, article: Element) -> list[str]:
-		"""
-		:param article: <ar> tag
-		:return: (title (str) | None, alternative titles (set))
-		"""
-		from itertools import combinations
-
-		titles: list[str] = []
-		for title_element in article.findall("k"):
-			if title_element.text is None:
-				# TODO: look for <opt> tag?
-				log.warning(f"empty title element: {self.tostring(title_element)}")
-				continue
-			n_opts = len([c for c in title_element if c.tag == "opt"])
-			if n_opts:
-				titles += [
-					self._mktitle(title_element, comb)
-					for j in range(n_opts + 1)
-					for comb in combinations(list(range(n_opts)), j)
-				]
-			else:
-				titles.append(self._mktitle(title_element))
-
-		return titles
-
-	def _mktitle(  # noqa: PLR6301
-		self,
-		title_element: Element,
-		include_opts: Sequence | None = None,
-	) -> str:
-		if include_opts is None:
-			include_opts = ()
-		title = title_element.text or ""
-		opt_i = -1
-		for c in title_element:
-			if c.tag == "nu" and c.tail:
-				if title:
-					title += c.tail
-				else:
-					title = c.tail
-			if c.tag == "opt" and c.text is not None:
-				opt_i += 1
-				if opt_i in include_opts:
-					title += c.text
-				if c.tail:
-					title += c.tail
-		return title.strip()
diff --git a/pyglossary/plugins/xdxf_css/reader.py b/pyglossary/plugins/xdxf_css/reader.py
new file mode 100644
index 000000000..1a1f0a076
--- /dev/null
+++ b/pyglossary/plugins/xdxf_css/reader.py
@@ -0,0 +1,284 @@
+# -*- coding: utf-8 -*-
+# xdxf file format reader and utils to convert xdxf to html.
+#
+# Copyright © 2023 Saeed Rasooli
+# Copyright © 2016 ivan tkachenko me@ratijas.tk
+#
+# some parts of this file include code from:
+# Aard Dictionary Tools <http://aarddict.org>.
+# Copyright © 2008-2009  Igor Tkach
+#
+# This program is a free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# You can get a copy of GNU General Public License along this program
+# But you can always get it from http://www.gnu.org/licenses/gpl.txt
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+from __future__ import annotations
+
+import re
+import typing
+from os.path import join
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Iterator, Sequence
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+	from pyglossary.lxml_types import Element
+
+
+from lxml import etree as ET
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import log, rootDir
+from pyglossary.io_utils import nullBinaryIO
+from pyglossary.text_utils import toStr
+
+if TYPE_CHECKING:
+
+	class TransformerType(typing.Protocol):
+		def transform(self, article: Element) -> str: ...
+
+
+class Reader:
+	compressions = stdCompressions
+	depends = {
+		"lxml": "lxml",
+	}
+
+	_html: bool = True
+
+	infoKeyMap = {
+		"full_name": "name",
+		"full_title": "name",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._file: io.IOBase = nullBinaryIO
+		self._encoding = "utf-8"
+		self._htmlTr: TransformerType | None = None
+		self._re_span_k = re.compile(
+			'<span class="k">[^<>]*</span>(<br/>)?',
+		)
+		self._has_added_css: bool = False
+		self._has_added_js: bool = False
+		self._abbr_defs_js: bytes
+
+	def makeTransformer(self) -> None:
+		from pyglossary.xdxf.css_js_transform import XdxfTransformer
+
+		self._htmlTr = XdxfTransformer(encoding=self._encoding)
+
+	def open(self, filename: str) -> None:  # noqa: PLR0912
+		# <!DOCTYPE xdxf SYSTEM "http://xdxf.sourceforge.net/xdxf_lousy.dtd">
+
+		self._filename = filename
+		self.makeTransformer()
+		self._glos.setDefaultDefiFormat("h")
+
+		cfile = self._file = cast(
+			"io.IOBase",
+			compressionOpen(
+				self._filename,
+				mode="rb",
+			),
+		)
+
+		context = ET.iterparse(  # type: ignore
+			cfile,
+			events=("end",),
+		)
+		abbr_defs: list[Element] = []
+		for _, _elem in context:
+			elem = cast("Element", _elem)
+			if elem.tag in {"meta_info", "ar", "k", "abr", "dtrn"}:
+				break
+			# every other tag before </meta_info> or </ar> is considered info
+			if elem.tag == "abbr_def":
+				abbr_defs.append(elem)
+				continue
+			# in case of multiple <from> or multiple <to> tags, the last one
+			# will be stored.
+			# Very few formats support more than one language pair in their
+			# metadata, so it's not very useful to have multiple
+			if elem.tag == "from":
+				for key, value in elem.attrib.items():
+					if key.endswith("}lang"):
+						self._glos.sourceLangName = value.split("-")[0]
+						break
+				continue
+			if elem.tag == "to":
+				for key, value in elem.attrib.items():
+					if key.endswith("}lang"):
+						self._glos.targetLangName = value.split("-")[0]
+						break
+				continue
+			if not elem.text:
+				if elem.tag != "br":
+					log.warning(f"empty tag <{elem.tag}>")
+				continue
+			key = self.infoKeyMap.get(elem.tag, elem.tag)
+			self._glos.setInfo(key, elem.text)
+		self._abbr_defs_js = self.generate_abbr_js(abbr_defs)
+		del context
+
+		if cfile.seekable():
+			cfile.seek(0, 2)
+			self._fileSize = cfile.tell()
+			cfile.seek(0)
+			self._glos.setInfo("input_file_size", str(self._fileSize))
+		else:
+			log.warning("XDXF Reader: file is not seekable")
+			self._file.close()
+			self._file = compressionOpen(self._filename, mode="rb")
+
+	def __len__(self) -> int:
+		return 0
+
+	def __iter__(self) -> Iterator[EntryType]:
+		context = ET.iterparse(  # type: ignore
+			self._file,
+			events=("end",),
+			tag="ar",
+		)
+
+		if not self._has_added_css:
+			self._has_added_css = True
+			cssPath = join(rootDir, "pyglossary", "xdxf", "xdxf.css")
+			with open(cssPath, "rb") as css_file:
+				yield self._glos.newDataEntry("css/xdxf.css", css_file.read())
+
+		if self._abbr_defs_js is not None and not self._has_added_js:
+			self._has_added_js = True
+			yield self._glos.newDataEntry("js/xdxf.js", self._abbr_defs_js)
+
+		for _, _article in context:
+			article = cast("Element", _article)
+			article.tail = None
+			words = [toStr(w) for w in self.titles(article)]
+
+			defi = self._htmlTr.transform(article)
+			defiFormat = "h"
+			if len(words) == 1:
+				defi = self._re_span_k.sub("", defi)
+
+			defi = f"""<!DOCTYPE html>
+<html>
+	<head>
+		<link rel="stylesheet" href="css/xdxf.css"/>
+	</head>
+	<body>
+		{defi}
+		<script type="text/javascript" src="js/xdxf.js"></script>
+	</body>
+</html>"""
+			# log.info(f"{defi=}, {words=}")
+			yield self._glos.newEntry(
+				words,
+				defi,
+				defiFormat=defiFormat,
+				byteProgress=(self._file.tell(), self._fileSize),
+			)
+			# clean up preceding siblings to save memory
+			# this can reduce memory usage from 1 GB to ~25 MB
+			parent = article.getparent()
+			if parent is None:
+				continue
+			while article.getprevious() is not None:
+				del parent[0]
+
+	def close(self) -> None:
+		self._file.close()
+		self._file = nullBinaryIO
+
+	def generate_abbr_js(self, abbr_defs: list[Element]) -> bytes:
+		abbr_map_js = """const abbr_map = new Map();\n"""
+		for abbr_def in abbr_defs:
+			abbr_k_list: list[str] = []
+			abbr_v_text = ""
+			for child in abbr_def.xpath("child::node()"):
+				if child.tag == "abbr_k":
+					abbr_k_list.append(self._htmlTr.stringify_children(child))
+				if child.tag == "abbr_v":
+					abbr_v_text = self._htmlTr.stringify_children(child)
+			# TODO escape apostrophes
+			for abbr_k in abbr_k_list:
+				if abbr_k and abbr_v_text:
+					abbr_map_js += f"abbr_map.set('{abbr_k}', '{abbr_v_text}');\n"
+		with open(join(rootDir, "pyglossary", "xdxf", "xdxf.js"), "rb") as js_file:
+			return abbr_map_js.encode(encoding="utf-8") + js_file.read()
+
+	@staticmethod
+	def tostring(
+		elem: Element,
+	) -> str:
+		return (
+			ET.tostring(
+				elem,
+				method="html",
+				pretty_print=True,
+			)
+			.decode("utf-8")
+			.strip()
+		)
+
+	def titles(self, article: Element) -> list[str]:
+		"""
+		:param article: <ar> tag
+		:return: (title (str) | None, alternative titles (set))
+		"""
+		from itertools import combinations
+
+		titles: list[str] = []
+		for title_element in article.findall("k"):
+			if title_element.text is None:
+				# TODO: look for <opt> tag?
+				log.warning(f"empty title element: {self.tostring(title_element)}")
+				continue
+			n_opts = len([c for c in title_element if c.tag == "opt"])
+			if n_opts:
+				titles += [
+					self._mktitle(title_element, comb)
+					for j in range(n_opts + 1)
+					for comb in combinations(list(range(n_opts)), j)
+				]
+			else:
+				titles.append(self._mktitle(title_element))
+
+		return titles
+
+	def _mktitle(  # noqa: PLR6301
+		self,
+		title_element: Element,
+		include_opts: Sequence | None = None,
+	) -> str:
+		if include_opts is None:
+			include_opts = ()
+		title = title_element.text or ""
+		opt_i = -1
+		for c in title_element:
+			if c.tag == "nu" and c.tail:
+				if title:
+					title += c.tail
+				else:
+					title = c.tail
+			if c.tag == "opt" and c.text is not None:
+				opt_i += 1
+				if opt_i in include_opts:
+					title += c.text
+				if c.tail:
+					title += c.tail
+		return title.strip()
diff --git a/pyglossary/plugins/xdxf_lax/__init__.py b/pyglossary/plugins/xdxf_lax/__init__.py
index f6fac25bb..521c1597f 100644
--- a/pyglossary/plugins/xdxf_lax/__init__.py
+++ b/pyglossary/plugins/xdxf_lax/__init__.py
@@ -1,53 +1,12 @@
 # -*- coding: utf-8 -*-
-#
 from __future__ import annotations
 
-"""Lax implementation of xdxf reader."""
-#
-# Copyright © 2023 Saeed Rasooli
-# Copyright © 2016 ivan tkachenko me@ratijas.tk
-#
-# some parts of this file include code from:
-# Aard Dictionary Tools <http://aarddict.org>.
-# Copyright © 2008-2009  Igor Tkach
-#
-# This program is a free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# You can get a copy of GNU General Public License along this program
-# But you can always get it from http://www.gnu.org/licenses/gpl.txt
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-import re
-import typing
-from typing import TYPE_CHECKING, cast
-
-if TYPE_CHECKING:
-	import io
-	from collections.abc import Iterator, Sequence
-
-	from lxml.html import HtmlElement as Element
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
-
-from pyglossary.compression import (
-	compressionOpen,
-	stdCompressions,
-)
-from pyglossary.core import log
-from pyglossary.io_utils import nullBinaryIO
 from pyglossary.option import (
 	BoolOption,
 	Option,
 )
-from pyglossary.text_utils import toStr
-from pyglossary.xdxf.transform import XdxfTransformer
-from pyglossary.xdxf.xsl_transform import XslXdxfTransformer
+
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -83,204 +42,3 @@
 		comment="Use XSL transformation",
 	),
 }
-
-
-if TYPE_CHECKING:
-
-	class TransformerType(typing.Protocol):
-		def transform(self, article: Element) -> str: ...
-
-
-class Reader:
-	compressions = stdCompressions
-	depends = {
-		"lxml": "lxml",
-	}
-
-	_html: bool = True
-	_xsl: bool = False
-
-	infoKeyMap = {
-		"full_name": "name",
-		"full_title": "name",
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._file: io.IOBase = nullBinaryIO
-		self._encoding = "utf-8"
-		self._htmlTr: TransformerType | None = None
-		self._re_span_k = re.compile(
-			'<span class="k">[^<>]*</span>(<br/>)?',
-		)
-
-	def readUntil(self, untilByte: bytes) -> tuple[int, bytes]:
-		file = self._file
-		buf = b""
-		while True:
-			tmp = file.read(100)
-			if not tmp:
-				break
-			buf += tmp
-			index = buf.find(untilByte)
-			if index < 0:
-				continue
-			file.seek(file.tell() - len(buf) + index)
-			return index, buf[:index]
-		return -1, buf
-
-	def _readOneMetadata(self, tag: str, infoKey: str) -> None:
-		from lxml.etree import XML
-
-		endTag = f"</{tag}>".encode("ascii")
-		descStart, _ = self.readUntil(f"<{tag}>".encode("ascii"))
-		if descStart < 0:
-			log.warning(f"did not find {tag} open")
-			return
-
-		descEnd, desc = self.readUntil(endTag)
-		if descEnd < 0:
-			log.warning(f"did not find {tag} close")
-			return
-
-		desc += endTag
-		elem = XML(desc)
-		if elem.text:
-			self._glos.setInfo(infoKey, elem.text)
-
-	def readMetadata(self) -> None:
-		file = self._file
-		pos = file.tell()
-		self._readOneMetadata("full_name", "title")
-		file.seek(pos)
-		self._readOneMetadata("description", "description")
-
-	def open(self, filename: str) -> None:
-		# <!DOCTYPE xdxf SYSTEM "http://xdxf.sourceforge.net/xdxf_lousy.dtd">
-		self._filename = filename
-		if self._html:
-			if self._xsl:
-				self._htmlTr = XslXdxfTransformer(encoding=self._encoding)
-			else:
-				self._htmlTr = XdxfTransformer(encoding=self._encoding)
-			self._glos.setDefaultDefiFormat("h")
-		else:
-			self._glos.setDefaultDefiFormat("x")
-
-		cfile = self._file = compressionOpen(self._filename, mode="rb")
-
-		self.readMetadata()
-
-		cfile.seek(0, 2)
-		self._fileSize = cfile.tell()
-		cfile.seek(0)
-		self._glos.setInfo("input_file_size", str(self._fileSize))
-
-	def __len__(self) -> int:
-		return 0
-
-	def __iter__(self) -> Iterator[EntryType]:
-		from lxml.html import fromstring, tostring
-
-		while True:
-			start, _ = self.readUntil(b"<ar")
-			if start < 0:
-				break
-			end, b_article = self.readUntil(b"</ar>")
-			if end < 0:
-				break
-			b_article += b"</ar>"
-			s_article = b_article.decode("utf-8")
-			try:
-				article = cast("Element", fromstring(s_article))
-			except Exception as e:
-				log.exception(s_article)
-				raise e from None
-			words = [toStr(w) for w in self.titles(article)]
-			if self._htmlTr:
-				defi = self._htmlTr.transform(article)
-				defiFormat = "h"
-				if len(words) == 1:
-					defi = self._re_span_k.sub("", defi)
-			else:
-				b_defi = cast("bytes", tostring(article, encoding=self._encoding))
-				defi = b_defi[4:-5].decode(self._encoding).strip()
-				defiFormat = "x"
-
-			# log.info(f"{defi=}, {words=}")
-			yield self._glos.newEntry(
-				words,
-				defi,
-				defiFormat=defiFormat,
-				byteProgress=(self._file.tell(), self._fileSize),
-			)
-
-	def close(self) -> None:
-		if self._file:
-			self._file.close()
-			self._file = nullBinaryIO
-
-	@staticmethod
-	def tostring(
-		elem: Element,
-	) -> str:
-		from lxml.html import tostring
-
-		return (
-			tostring(
-				elem,
-				method="html",
-				pretty_print=True,
-			)
-			.decode("utf-8")
-			.strip()
-		)
-
-	def titles(self, article: Element) -> list[str]:
-		"""
-		:param article: <ar> tag
-		:return: (title (str) | None, alternative titles (set))
-		"""
-		from itertools import combinations
-
-		titles: list[str] = []
-		for title_element in article.findall("k"):
-			if title_element.text is None:
-				# TODO: look for <opt> tag?
-				log.warning(f"empty title element: {self.tostring(title_element)}")
-				continue
-			n_opts = len([c for c in title_element if c.tag == "opt"])
-			if n_opts:
-				titles += [
-					self._mktitle(title_element, comb)
-					for j in range(n_opts + 1)
-					for comb in combinations(list(range(n_opts)), j)
-				]
-			else:
-				titles.append(self._mktitle(title_element))
-
-		return titles
-
-	def _mktitle(  # noqa: PLR6301
-		self,
-		title_element: Element,
-		include_opts: Sequence | None = None,
-	) -> str:
-		if include_opts is None:
-			include_opts = ()
-		title = title_element.text or ""
-		opt_i = -1
-		for c in title_element:
-			if c.tag == "nu" and c.tail:
-				if title:
-					title += c.tail
-				else:
-					title = c.tail
-			if c.tag == "opt" and c.text is not None:
-				opt_i += 1
-				if opt_i in include_opts:
-					title += c.text
-				if c.tail:
-					title += c.tail
-		return title.strip()
diff --git a/pyglossary/plugins/xdxf_lax/reader.py b/pyglossary/plugins/xdxf_lax/reader.py
new file mode 100644
index 000000000..806787246
--- /dev/null
+++ b/pyglossary/plugins/xdxf_lax/reader.py
@@ -0,0 +1,246 @@
+# -*- coding: utf-8 -*-
+#
+# Lax implementation of xdxf reader.
+#
+# Copyright © 2023 Saeed Rasooli
+# Copyright © 2016 ivan tkachenko me@ratijas.tk
+#
+# some parts of this file include code from:
+# Aard Dictionary Tools <http://aarddict.org>.
+# Copyright © 2008-2009  Igor Tkach
+#
+# This program is a free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# You can get a copy of GNU General Public License along this program
+# But you can always get it from http://www.gnu.org/licenses/gpl.txt
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+from __future__ import annotations
+
+import re
+import typing
+from typing import TYPE_CHECKING, cast
+
+if TYPE_CHECKING:
+	import io
+	from collections.abc import Iterator, Sequence
+
+	from lxml.html import HtmlElement as Element
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+from pyglossary.compression import (
+	compressionOpen,
+	stdCompressions,
+)
+from pyglossary.core import log
+from pyglossary.io_utils import nullBinaryIO
+from pyglossary.text_utils import toStr
+from pyglossary.xdxf.transform import XdxfTransformer
+from pyglossary.xdxf.xsl_transform import XslXdxfTransformer
+
+if TYPE_CHECKING:
+
+	class TransformerType(typing.Protocol):
+		def transform(self, article: Element) -> str: ...
+
+
+class Reader:
+	compressions = stdCompressions
+	depends = {
+		"lxml": "lxml",
+	}
+
+	_html: bool = True
+	_xsl: bool = False
+
+	infoKeyMap = {
+		"full_name": "name",
+		"full_title": "name",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._file: io.IOBase = nullBinaryIO
+		self._encoding = "utf-8"
+		self._htmlTr: TransformerType | None = None
+		self._re_span_k = re.compile(
+			'<span class="k">[^<>]*</span>(<br/>)?',
+		)
+
+	def readUntil(self, untilByte: bytes) -> tuple[int, bytes]:
+		file = self._file
+		buf = b""
+		while True:
+			tmp = file.read(100)
+			if not tmp:
+				break
+			buf += tmp
+			index = buf.find(untilByte)
+			if index < 0:
+				continue
+			file.seek(file.tell() - len(buf) + index)
+			return index, buf[:index]
+		return -1, buf
+
+	def _readOneMetadata(self, tag: str, infoKey: str) -> None:
+		from lxml.etree import XML
+
+		endTag = f"</{tag}>".encode("ascii")
+		descStart, _ = self.readUntil(f"<{tag}>".encode("ascii"))
+		if descStart < 0:
+			log.warning(f"did not find {tag} open")
+			return
+
+		descEnd, desc = self.readUntil(endTag)
+		if descEnd < 0:
+			log.warning(f"did not find {tag} close")
+			return
+
+		desc += endTag
+		elem = XML(desc)
+		if elem.text:
+			self._glos.setInfo(infoKey, elem.text)
+
+	def readMetadata(self) -> None:
+		file = self._file
+		pos = file.tell()
+		self._readOneMetadata("full_name", "title")
+		file.seek(pos)
+		self._readOneMetadata("description", "description")
+
+	def open(self, filename: str) -> None:
+		# <!DOCTYPE xdxf SYSTEM "http://xdxf.sourceforge.net/xdxf_lousy.dtd">
+		self._filename = filename
+		if self._html:
+			if self._xsl:
+				self._htmlTr = XslXdxfTransformer(encoding=self._encoding)
+			else:
+				self._htmlTr = XdxfTransformer(encoding=self._encoding)
+			self._glos.setDefaultDefiFormat("h")
+		else:
+			self._glos.setDefaultDefiFormat("x")
+
+		cfile = self._file = compressionOpen(self._filename, mode="rb")
+
+		self.readMetadata()
+
+		cfile.seek(0, 2)
+		self._fileSize = cfile.tell()
+		cfile.seek(0)
+		self._glos.setInfo("input_file_size", str(self._fileSize))
+
+	def __len__(self) -> int:
+		return 0
+
+	def __iter__(self) -> Iterator[EntryType]:
+		from lxml.html import fromstring, tostring
+
+		while True:
+			start, _ = self.readUntil(b"<ar")
+			if start < 0:
+				break
+			end, b_article = self.readUntil(b"</ar>")
+			if end < 0:
+				break
+			b_article += b"</ar>"
+			s_article = b_article.decode("utf-8")
+			try:
+				article = cast("Element", fromstring(s_article))
+			except Exception as e:
+				log.exception(s_article)
+				raise e from None
+			words = [toStr(w) for w in self.titles(article)]
+			if self._htmlTr:
+				defi = self._htmlTr.transform(article)
+				defiFormat = "h"
+				if len(words) == 1:
+					defi = self._re_span_k.sub("", defi)
+			else:
+				b_defi = cast("bytes", tostring(article, encoding=self._encoding))
+				defi = b_defi[4:-5].decode(self._encoding).strip()
+				defiFormat = "x"
+
+			# log.info(f"{defi=}, {words=}")
+			yield self._glos.newEntry(
+				words,
+				defi,
+				defiFormat=defiFormat,
+				byteProgress=(self._file.tell(), self._fileSize),
+			)
+
+	def close(self) -> None:
+		if self._file:
+			self._file.close()
+			self._file = nullBinaryIO
+
+	@staticmethod
+	def tostring(
+		elem: Element,
+	) -> str:
+		from lxml.html import tostring
+
+		return (
+			tostring(
+				elem,
+				method="html",
+				pretty_print=True,
+			)
+			.decode("utf-8")
+			.strip()
+		)
+
+	def titles(self, article: Element) -> list[str]:
+		"""
+		:param article: <ar> tag
+		:return: (title (str) | None, alternative titles (set))
+		"""
+		from itertools import combinations
+
+		titles: list[str] = []
+		for title_element in article.findall("k"):
+			if title_element.text is None:
+				# TODO: look for <opt> tag?
+				log.warning(f"empty title element: {self.tostring(title_element)}")
+				continue
+			n_opts = len([c for c in title_element if c.tag == "opt"])
+			if n_opts:
+				titles += [
+					self._mktitle(title_element, comb)
+					for j in range(n_opts + 1)
+					for comb in combinations(list(range(n_opts)), j)
+				]
+			else:
+				titles.append(self._mktitle(title_element))
+
+		return titles
+
+	def _mktitle(  # noqa: PLR6301
+		self,
+		title_element: Element,
+		include_opts: Sequence | None = None,
+	) -> str:
+		if include_opts is None:
+			include_opts = ()
+		title = title_element.text or ""
+		opt_i = -1
+		for c in title_element:
+			if c.tag == "nu" and c.tail:
+				if title:
+					title += c.tail
+				else:
+					title = c.tail
+			if c.tag == "opt" and c.text is not None:
+				opt_i += 1
+				if opt_i in include_opts:
+					title += c.text
+				if c.tail:
+					title += c.tail
+		return title.strip()
diff --git a/pyglossary/plugins/yomichan/__init__.py b/pyglossary/plugins/yomichan/__init__.py
index 428766582..2fa262e3c 100644
--- a/pyglossary/plugins/yomichan/__init__.py
+++ b/pyglossary/plugins/yomichan/__init__.py
@@ -1,12 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 
-import json
-import os
-import re
-from os.path import join
-from typing import TYPE_CHECKING, Any
-
 from pyglossary.flags import ALWAYS
 from pyglossary.option import (
 	BoolOption,
@@ -15,10 +9,7 @@
 	StrOption,
 )
 
-if TYPE_CHECKING:
-	from collections.abc import Generator, Sequence
-
-	from pyglossary.glossary_types import EntryType, GlossaryType
+from .writer import Writer
 
 __all__ = [
 	"Writer",
@@ -180,239 +171,3 @@
 		),
 	),
 }
-
-
-def _isKana(char: str) -> bool:
-	assert len(char) == 1
-	val = ord(char)
-	return (
-		0x3040 <= val <= 0x309F  # Hiragana
-		or 0x30A0 <= val <= 0x30FF  # Katakana (incl. center dot)
-		or 0xFF65 <= val <= 0xFF9F  # Half-width Katakana (incl. center dot)
-	)
-
-
-def _isKanji(char: str) -> bool:
-	assert len(char) == 1
-	val = ord(char)
-	return (
-		0x3400 <= val <= 0x4DBF  # CJK Unified Ideographs Extension A
-		or 0x4E00 <= val <= 0x9FFF  # CJK Unified Ideographs
-		or 0xF900 <= val <= 0xFAFF  # CJK Compatibility Ideographs
-		or 0x20000 <= val <= 0x2A6DF  # CJK Unified Ideographs Extension B
-		or 0x2A700 <= val <= 0x2B73F  # CJK Unified Ideographs Extension C
-		or 0x2B740 <= val <= 0x2B81F  # CJK Unified Ideographs Extension D
-		or 0x2F800 <= val <= 0x2FA1F  # CJK Compatibility Ideographs Supplement
-	)
-
-
-def _uniqueList(lst: Sequence[str]) -> list[str]:
-	seen: set[str] = set()
-	result: list[str] = []
-	for elem in lst:
-		if elem not in seen:
-			seen.add(elem)
-			result.append(elem)
-
-	return result
-
-
-def _compilePat(pattern: str) -> re.Pattern | None:
-	if not pattern:
-		return None
-	return re.compile(pattern)
-
-
-class Writer:
-	depends = {
-		"bs4": "beautifulsoup4",
-	}
-
-	_term_bank_size = 10_000
-	_term_from_headword_only = True
-	_no_term_from_reading = True
-	_delete_word_pattern = ""
-	_ignore_word_with_pattern = ""
-	_alternates_from_word_pattern = ""
-	_alternates_from_defi_pattern = ""
-	_rule_v1_defi_pattern = ""
-	_rule_v5_defi_pattern = ""
-	_rule_vs_defi_pattern = ""
-	_rule_vk_defi_pattern = ""
-	_rule_adji_defi_pattern = ""
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		# Yomichan technically supports "structured content" that renders to
-		# HTML, but it doesn't seem widely used. So here we also strip HTML
-		# formatting for simplicity.
-		glos.removeHtmlTagsAll()
-		self.delete_word_pattern = _compilePat(self._delete_word_pattern)
-		self.ignore_word_with_pattern = _compilePat(self._ignore_word_with_pattern)
-		self.alternates_from_word_pattern = _compilePat(
-			self._alternates_from_word_pattern
-		)
-		self.alternates_from_defi_pattern = _compilePat(
-			self._alternates_from_defi_pattern
-		)
-		self.rules = [
-			(_compilePat(self._rule_v1_defi_pattern), "v1"),
-			(_compilePat(self._rule_v5_defi_pattern), "v5"),
-			(_compilePat(self._rule_vs_defi_pattern), "vs"),
-			(_compilePat(self._rule_vk_defi_pattern), "vk"),
-			(_compilePat(self._rule_adji_defi_pattern), "adj-i"),
-		]
-
-	def _getInfo(self, key: str) -> str:
-		info = self._glos.getInfo(key)
-		return info.replace("\n", "<br>")
-
-	def _getAuthor(self) -> str:
-		return self._glos.author.replace("\n", "<br>")
-
-	def _getDictionaryIndex(self) -> dict[str, Any]:
-		# Schema: https://github.com/FooSoft/yomichan/
-		# blob/master/ext/data/schemas/dictionary-index-schema.json
-		return {
-			"title": self._getInfo("title"),
-			"revision": "PyGlossary export",
-			"sequenced": True,
-			"format": 3,
-			"author": self._getAuthor(),
-			"url": self._getInfo("website"),
-			"description": self._getInfo("description"),
-		}
-
-	def _getExpressionsAndReadingFromEntry(
-		self,
-		entry: EntryType,
-	) -> tuple[list[str], str]:
-		term_expressions = entry.l_word
-
-		alternates_from_word_pattern = self.alternates_from_word_pattern
-		if alternates_from_word_pattern:
-			for word in entry.l_word:
-				term_expressions += alternates_from_word_pattern.findall(word)
-
-		if self.alternates_from_defi_pattern:
-			term_expressions += self.alternates_from_defi_pattern.findall(
-				entry.defi,
-				re.MULTILINE,
-			)
-
-		delete_word_pattern = self.delete_word_pattern
-		if delete_word_pattern:
-			term_expressions = [
-				delete_word_pattern.sub("", expression)
-				for expression in term_expressions
-			]
-
-		ignore_word_with_pattern = self.ignore_word_with_pattern
-		if ignore_word_with_pattern:
-			term_expressions = [
-				expression
-				for expression in term_expressions
-				if not ignore_word_with_pattern.search(expression)
-			]
-
-		term_expressions = _uniqueList(term_expressions)
-
-		try:
-			reading = next(
-				expression
-				for expression in entry.l_word + term_expressions
-				if all(map(_isKana, expression))
-			)
-		except StopIteration:
-			reading = ""
-
-		if self._no_term_from_reading and len(term_expressions) > 1:
-			term_expressions = [
-				expression for expression in term_expressions if expression != reading
-			]
-
-		if self._term_from_headword_only:
-			term_expressions = term_expressions[:1]
-
-		return term_expressions, reading
-
-	def _getRuleIdentifiersFromEntry(self, entry: EntryType) -> list[str]:
-		return [
-			rule
-			for pattern, rule in self.rules
-			if pattern and pattern.search(entry.defi, re.MULTILINE)
-		]
-
-	def _getTermsFromEntry(
-		self,
-		entry: EntryType,
-		sequenceNumber: int,
-	) -> list[list[Any]]:
-		termExpressions, reading = self._getExpressionsAndReadingFromEntry(entry)
-		ruleIdentifiers = self._getRuleIdentifiersFromEntry(entry)
-
-		# Schema: https://github.com/FooSoft/yomichan/
-		# blob/master/ext/data/schemas/dictionary-term-bank-v3-schema.json
-		return [
-			[
-				expression,
-				# reading only added if expression contains kanji
-				reading if any(map(_isKanji, expression)) else "",
-				"",  # definition tags
-				" ".join(ruleIdentifiers),
-				0,  # score
-				[entry.defi],
-				sequenceNumber,
-				"",  # term tags
-			]
-			for expression in termExpressions
-		]
-
-	def open(self, filename: str) -> None:
-		self._filename = filename
-		self._glos.mergeEntriesWithSameHeadwordPlaintext()
-
-	def finish(self) -> None:
-		self._filename = ""
-
-	def write(self) -> Generator[None, EntryType, None]:
-		direc = self._filename
-
-		os.makedirs(direc, exist_ok=True)
-
-		with open(join(direc, "index.json"), "w", encoding="utf-8") as f:
-			json.dump(self._getDictionaryIndex(), f, ensure_ascii=False)
-
-		entryCount = 0
-		termBankIndex = 0
-		terms: list[list[Any]] = []
-
-		def flushTerms() -> None:
-			nonlocal termBankIndex
-			if not terms:
-				return
-			with open(
-				join(direc, f"term_bank_{termBankIndex + 1}.json"),
-				mode="w",
-				encoding="utf-8",
-			) as _file:
-				json.dump(terms, _file, ensure_ascii=False)
-			terms.clear()
-			termBankIndex += 1
-
-		while True:
-			entry: EntryType
-			entry = yield
-			if entry is None:
-				break
-
-			if entry.isData():
-				continue
-
-			terms.extend(self._getTermsFromEntry(entry, entryCount))
-			entryCount += 1
-			if len(terms) >= self._term_bank_size:
-				flushTerms()
-
-		flushTerms()
diff --git a/pyglossary/plugins/yomichan/writer.py b/pyglossary/plugins/yomichan/writer.py
new file mode 100644
index 000000000..94b6a2a31
--- /dev/null
+++ b/pyglossary/plugins/yomichan/writer.py
@@ -0,0 +1,249 @@
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import json
+import os
+import re
+from os.path import join
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+	from collections.abc import Generator, Sequence
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+
+def _isKana(char: str) -> bool:
+	assert len(char) == 1
+	val = ord(char)
+	return (
+		0x3040 <= val <= 0x309F  # Hiragana
+		or 0x30A0 <= val <= 0x30FF  # Katakana (incl. center dot)
+		or 0xFF65 <= val <= 0xFF9F  # Half-width Katakana (incl. center dot)
+	)
+
+
+def _isKanji(char: str) -> bool:
+	assert len(char) == 1
+	val = ord(char)
+	return (
+		0x3400 <= val <= 0x4DBF  # CJK Unified Ideographs Extension A
+		or 0x4E00 <= val <= 0x9FFF  # CJK Unified Ideographs
+		or 0xF900 <= val <= 0xFAFF  # CJK Compatibility Ideographs
+		or 0x20000 <= val <= 0x2A6DF  # CJK Unified Ideographs Extension B
+		or 0x2A700 <= val <= 0x2B73F  # CJK Unified Ideographs Extension C
+		or 0x2B740 <= val <= 0x2B81F  # CJK Unified Ideographs Extension D
+		or 0x2F800 <= val <= 0x2FA1F  # CJK Compatibility Ideographs Supplement
+	)
+
+
+def _uniqueList(lst: Sequence[str]) -> list[str]:
+	seen: set[str] = set()
+	result: list[str] = []
+	for elem in lst:
+		if elem not in seen:
+			seen.add(elem)
+			result.append(elem)
+
+	return result
+
+
+def _compilePat(pattern: str) -> re.Pattern | None:
+	if not pattern:
+		return None
+	return re.compile(pattern)
+
+
+class Writer:
+	depends = {
+		"bs4": "beautifulsoup4",
+	}
+
+	_term_bank_size = 10_000
+	_term_from_headword_only = True
+	_no_term_from_reading = True
+	_delete_word_pattern = ""
+	_ignore_word_with_pattern = ""
+	_alternates_from_word_pattern = ""
+	_alternates_from_defi_pattern = ""
+	_rule_v1_defi_pattern = ""
+	_rule_v5_defi_pattern = ""
+	_rule_vs_defi_pattern = ""
+	_rule_vk_defi_pattern = ""
+	_rule_adji_defi_pattern = ""
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		# Yomichan technically supports "structured content" that renders to
+		# HTML, but it doesn't seem widely used. So here we also strip HTML
+		# formatting for simplicity.
+		glos.removeHtmlTagsAll()
+		self.delete_word_pattern = _compilePat(self._delete_word_pattern)
+		self.ignore_word_with_pattern = _compilePat(self._ignore_word_with_pattern)
+		self.alternates_from_word_pattern = _compilePat(
+			self._alternates_from_word_pattern
+		)
+		self.alternates_from_defi_pattern = _compilePat(
+			self._alternates_from_defi_pattern
+		)
+		self.rules = [
+			(_compilePat(self._rule_v1_defi_pattern), "v1"),
+			(_compilePat(self._rule_v5_defi_pattern), "v5"),
+			(_compilePat(self._rule_vs_defi_pattern), "vs"),
+			(_compilePat(self._rule_vk_defi_pattern), "vk"),
+			(_compilePat(self._rule_adji_defi_pattern), "adj-i"),
+		]
+
+	def _getInfo(self, key: str) -> str:
+		info = self._glos.getInfo(key)
+		return info.replace("\n", "<br>")
+
+	def _getAuthor(self) -> str:
+		return self._glos.author.replace("\n", "<br>")
+
+	def _getDictionaryIndex(self) -> dict[str, Any]:
+		# Schema: https://github.com/FooSoft/yomichan/
+		# blob/master/ext/data/schemas/dictionary-index-schema.json
+		return {
+			"title": self._getInfo("title"),
+			"revision": "PyGlossary export",
+			"sequenced": True,
+			"format": 3,
+			"author": self._getAuthor(),
+			"url": self._getInfo("website"),
+			"description": self._getInfo("description"),
+		}
+
+	def _getExpressionsAndReadingFromEntry(
+		self,
+		entry: EntryType,
+	) -> tuple[list[str], str]:
+		term_expressions = entry.l_word
+
+		alternates_from_word_pattern = self.alternates_from_word_pattern
+		if alternates_from_word_pattern:
+			for word in entry.l_word:
+				term_expressions += alternates_from_word_pattern.findall(word)
+
+		if self.alternates_from_defi_pattern:
+			term_expressions += self.alternates_from_defi_pattern.findall(
+				entry.defi,
+				re.MULTILINE,
+			)
+
+		delete_word_pattern = self.delete_word_pattern
+		if delete_word_pattern:
+			term_expressions = [
+				delete_word_pattern.sub("", expression)
+				for expression in term_expressions
+			]
+
+		ignore_word_with_pattern = self.ignore_word_with_pattern
+		if ignore_word_with_pattern:
+			term_expressions = [
+				expression
+				for expression in term_expressions
+				if not ignore_word_with_pattern.search(expression)
+			]
+
+		term_expressions = _uniqueList(term_expressions)
+
+		try:
+			reading = next(
+				expression
+				for expression in entry.l_word + term_expressions
+				if all(map(_isKana, expression))
+			)
+		except StopIteration:
+			reading = ""
+
+		if self._no_term_from_reading and len(term_expressions) > 1:
+			term_expressions = [
+				expression for expression in term_expressions if expression != reading
+			]
+
+		if self._term_from_headword_only:
+			term_expressions = term_expressions[:1]
+
+		return term_expressions, reading
+
+	def _getRuleIdentifiersFromEntry(self, entry: EntryType) -> list[str]:
+		return [
+			rule
+			for pattern, rule in self.rules
+			if pattern and pattern.search(entry.defi, re.MULTILINE)
+		]
+
+	def _getTermsFromEntry(
+		self,
+		entry: EntryType,
+		sequenceNumber: int,
+	) -> list[list[Any]]:
+		termExpressions, reading = self._getExpressionsAndReadingFromEntry(entry)
+		ruleIdentifiers = self._getRuleIdentifiersFromEntry(entry)
+
+		# Schema: https://github.com/FooSoft/yomichan/
+		# blob/master/ext/data/schemas/dictionary-term-bank-v3-schema.json
+		return [
+			[
+				expression,
+				# reading only added if expression contains kanji
+				reading if any(map(_isKanji, expression)) else "",
+				"",  # definition tags
+				" ".join(ruleIdentifiers),
+				0,  # score
+				[entry.defi],
+				sequenceNumber,
+				"",  # term tags
+			]
+			for expression in termExpressions
+		]
+
+	def open(self, filename: str) -> None:
+		self._filename = filename
+		self._glos.mergeEntriesWithSameHeadwordPlaintext()
+
+	def finish(self) -> None:
+		self._filename = ""
+
+	def write(self) -> Generator[None, EntryType, None]:
+		direc = self._filename
+
+		os.makedirs(direc, exist_ok=True)
+
+		with open(join(direc, "index.json"), "w", encoding="utf-8") as f:
+			json.dump(self._getDictionaryIndex(), f, ensure_ascii=False)
+
+		entryCount = 0
+		termBankIndex = 0
+		terms: list[list[Any]] = []
+
+		def flushTerms() -> None:
+			nonlocal termBankIndex
+			if not terms:
+				return
+			with open(
+				join(direc, f"term_bank_{termBankIndex + 1}.json"),
+				mode="w",
+				encoding="utf-8",
+			) as _file:
+				json.dump(terms, _file, ensure_ascii=False)
+			terms.clear()
+			termBankIndex += 1
+
+		while True:
+			entry: EntryType
+			entry = yield
+			if entry is None:
+				break
+
+			if entry.isData():
+				continue
+
+			terms.extend(self._getTermsFromEntry(entry, entryCount))
+			entryCount += 1
+			if len(terms) >= self._term_bank_size:
+				flushTerms()
+
+		flushTerms()
diff --git a/pyglossary/plugins/zimfile/__init__.py b/pyglossary/plugins/zimfile/__init__.py
index f8951c0a2..8fb211e65 100644
--- a/pyglossary/plugins/zimfile/__init__.py
+++ b/pyglossary/plugins/zimfile/__init__.py
@@ -1,20 +1,9 @@
 # -*- coding: utf-8 -*-
-
 from __future__ import annotations
 
-import os
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-	from collections.abc import Iterator
-
-	from libzim.reader import Archive  # type: ignore
+from pyglossary.option import Option, UnicodeErrorsOption
 
-	from pyglossary.glossary_types import EntryType, GlossaryType
-	from pyglossary.option import Option
-
-from pyglossary.core import cacheDir, exc_note, log, pip
-from pyglossary.option import UnicodeErrorsOption
+from .reader import Reader
 
 __all__ = [
 	"Reader",
@@ -61,172 +50,3 @@
 
 # I can't find any way to download zim files from https://library.kiwix.org/
 # which wiki.openzim.org points at for downloaing zim files
-
-
-class Reader:
-	_text_unicode_errors = "replace"
-	_html_unicode_errors = "replace"
-	depends = {
-		"libzim": "libzim>=1.0",
-	}
-
-	resourceMimeTypes = {
-		"image/png",
-		"image/jpeg",
-		"image/gif",
-		"image/svg+xml",
-		"image/webp",
-		"image/x-icon",
-		"text/css",
-		"text/javascript",
-		"application/javascript",
-		"application/json",
-		"application/octet-stream",
-		"application/octet-stream+xapian",
-		"application/x-chrome-extension",
-		"application/warc-headers",
-		"application/font-woff",
-	}
-
-	def __init__(self, glos: GlossaryType) -> None:
-		self._glos = glos
-		self._filename = ""
-		self._zimfile: Archive | None = None
-
-	def open(self, filename: str) -> None:
-		try:
-			from libzim.reader import Archive
-		except ModuleNotFoundError as e:
-			exc_note(e, f"Run `{pip} install libzim` to install")
-			raise
-
-		self._filename = filename
-		self._zimfile = Archive(filename)
-
-	def close(self) -> None:
-		self._filename = ""
-		self._zimfile = None
-
-	def __len__(self) -> int:
-		if self._zimfile is None:
-			log.error("len(reader) called before reader.open()")
-			return 0
-		return self._zimfile.entry_count
-
-	def __iter__(self) -> Iterator[EntryType | None]:  # noqa: PLR0912
-		glos = self._glos
-		zimfile = self._zimfile
-		if zimfile is None:
-			return
-		emptyContentCount = 0
-		invalidMimeTypeCount = 0
-		undefinedMimeTypeCount = 0
-		entryCount = zimfile.entry_count
-
-		redirectCount = 0
-
-		windows = os.sep == "\\"
-
-		try:
-			f_namemax = os.statvfs(cacheDir).f_namemax  # type: ignore
-		except AttributeError:
-			log.warning("Unsupported operating system (no os.statvfs)")
-			# Windows: CreateFileA has a limit of 260 characters.
-			# CreateFileW supports names up to about 32760 characters (64kB).
-			f_namemax = 200
-
-		fileNameTooLong: list[str] = []
-
-		text_unicode_errors = self._text_unicode_errors
-		html_unicode_errors = self._html_unicode_errors
-
-		for entryIndex in range(entryCount):
-			zEntry = zimfile._get_entry_by_id(entryIndex)
-			word = zEntry.title
-
-			if zEntry.is_redirect:
-				redirectCount += 1
-				targetWord = zEntry.get_redirect_entry().title
-				yield glos.newEntry(
-					word,
-					f'Redirect: <a href="bword://{targetWord}">{targetWord}</a>',
-					defiFormat="h",
-				)
-				continue
-
-			zItem = zEntry.get_item()
-			b_content = zItem.content.tobytes()
-
-			if not b_content:
-				emptyContentCount += 1
-				yield None
-				# TODO: test with more zim files
-				# Looks like: zItem.path == zEntry.path == "-" + word
-				# print(f"b_content empty, {word=}, {zEntry.path=}, {zItem.path=}")
-				# if zEntry.path == "-" + word:
-				# 	yield None
-				# else:
-				# 	defi = f"Path: {zEntry.path}"
-				# 	yield glos.newEntry(word, defi, defiFormat="m")
-				continue
-
-			try:
-				mimetype = zItem.mimetype
-			except RuntimeError:
-				invalidMimeTypeCount += 1
-				mimetype = ""
-				yield glos.newDataEntry(word, b_content)
-
-			if mimetype == "undefined":
-				undefinedMimeTypeCount += 1
-				continue
-
-			mimetype = mimetype.split(";")[0]
-
-			if mimetype.startswith("text/html"):
-				# can be "text/html;raw=true"
-				defi = b_content.decode("utf-8", errors=html_unicode_errors)
-				defi = defi.replace(' src="../I/', ' src="./')
-				yield glos.newEntry(word, defi, defiFormat="h")
-				continue
-
-			if mimetype == "text/plain":
-				yield glos.newEntry(
-					word,
-					b_content.decode("utf-8", errors=text_unicode_errors),
-					defiFormat="m",
-				)
-				continue
-
-			if mimetype not in self.resourceMimeTypes:
-				log.warning(f"Unrecognized {mimetype=}")
-
-			if len(word) > f_namemax:
-				fileNameTooLong.append(word)
-				continue
-
-			if "|" in word:
-				log.warning(f"resource title: {word}")
-				if windows:
-					continue
-
-			try:
-				entry = glos.newDataEntry(word, b_content)
-			except Exception as e:
-				log.error(f"error creating file: {e}")
-				continue
-			yield entry
-
-		log.info(f"ZIM Entry Count: {entryCount}")
-
-		if fileNameTooLong:
-			log.warning(f"Files with name too long: {len(fileNameTooLong)}")
-
-		if emptyContentCount > 0:
-			log.info(f"Empty Content Count: {emptyContentCount}")
-		if invalidMimeTypeCount > 0:
-			log.info(f"Invalid MIME-Type Count: {invalidMimeTypeCount}")
-		if undefinedMimeTypeCount > 0:
-			log.info(f"MIME-Type 'undefined' Count: {invalidMimeTypeCount}")
-		if redirectCount > 0:
-			log.info(f"Redirect Count: {redirectCount}")
diff --git a/pyglossary/plugins/zimfile/reader.py b/pyglossary/plugins/zimfile/reader.py
new file mode 100644
index 000000000..3ed362f03
--- /dev/null
+++ b/pyglossary/plugins/zimfile/reader.py
@@ -0,0 +1,184 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import annotations
+
+import os
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+	from collections.abc import Iterator
+
+	from libzim.reader import Archive  # type: ignore
+
+	from pyglossary.glossary_types import EntryType, GlossaryType
+
+from pyglossary.core import cacheDir, exc_note, log, pip
+
+
+class Reader:
+	_text_unicode_errors = "replace"
+	_html_unicode_errors = "replace"
+	depends = {
+		"libzim": "libzim>=1.0",
+	}
+
+	resourceMimeTypes = {
+		"image/png",
+		"image/jpeg",
+		"image/gif",
+		"image/svg+xml",
+		"image/webp",
+		"image/x-icon",
+		"text/css",
+		"text/javascript",
+		"application/javascript",
+		"application/json",
+		"application/octet-stream",
+		"application/octet-stream+xapian",
+		"application/x-chrome-extension",
+		"application/warc-headers",
+		"application/font-woff",
+	}
+
+	def __init__(self, glos: GlossaryType) -> None:
+		self._glos = glos
+		self._filename = ""
+		self._zimfile: Archive | None = None
+
+	def open(self, filename: str) -> None:
+		try:
+			from libzim.reader import Archive
+		except ModuleNotFoundError as e:
+			exc_note(e, f"Run `{pip} install libzim` to install")
+			raise
+
+		self._filename = filename
+		self._zimfile = Archive(filename)
+
+	def close(self) -> None:
+		self._filename = ""
+		self._zimfile = None
+
+	def __len__(self) -> int:
+		if self._zimfile is None:
+			log.error("len(reader) called before reader.open()")
+			return 0
+		return self._zimfile.entry_count
+
+	def __iter__(self) -> Iterator[EntryType | None]:  # noqa: PLR0912
+		glos = self._glos
+		zimfile = self._zimfile
+		if zimfile is None:
+			return
+		emptyContentCount = 0
+		invalidMimeTypeCount = 0
+		undefinedMimeTypeCount = 0
+		entryCount = zimfile.entry_count
+
+		redirectCount = 0
+
+		windows = os.sep == "\\"
+
+		try:
+			f_namemax = os.statvfs(cacheDir).f_namemax  # type: ignore
+		except AttributeError:
+			log.warning("Unsupported operating system (no os.statvfs)")
+			# Windows: CreateFileA has a limit of 260 characters.
+			# CreateFileW supports names up to about 32760 characters (64kB).
+			f_namemax = 200
+
+		fileNameTooLong: list[str] = []
+
+		text_unicode_errors = self._text_unicode_errors
+		html_unicode_errors = self._html_unicode_errors
+
+		for entryIndex in range(entryCount):
+			zEntry = zimfile._get_entry_by_id(entryIndex)
+			word = zEntry.title
+
+			if zEntry.is_redirect:
+				redirectCount += 1
+				targetWord = zEntry.get_redirect_entry().title
+				yield glos.newEntry(
+					word,
+					f'Redirect: <a href="bword://{targetWord}">{targetWord}</a>',
+					defiFormat="h",
+				)
+				continue
+
+			zItem = zEntry.get_item()
+			b_content = zItem.content.tobytes()
+
+			if not b_content:
+				emptyContentCount += 1
+				yield None
+				# TODO: test with more zim files
+				# Looks like: zItem.path == zEntry.path == "-" + word
+				# print(f"b_content empty, {word=}, {zEntry.path=}, {zItem.path=}")
+				# if zEntry.path == "-" + word:
+				# 	yield None
+				# else:
+				# 	defi = f"Path: {zEntry.path}"
+				# 	yield glos.newEntry(word, defi, defiFormat="m")
+				continue
+
+			try:
+				mimetype = zItem.mimetype
+			except RuntimeError:
+				invalidMimeTypeCount += 1
+				mimetype = ""
+				yield glos.newDataEntry(word, b_content)
+
+			if mimetype == "undefined":
+				undefinedMimeTypeCount += 1
+				continue
+
+			mimetype = mimetype.split(";")[0]
+
+			if mimetype.startswith("text/html"):
+				# can be "text/html;raw=true"
+				defi = b_content.decode("utf-8", errors=html_unicode_errors)
+				defi = defi.replace(' src="../I/', ' src="./')
+				yield glos.newEntry(word, defi, defiFormat="h")
+				continue
+
+			if mimetype == "text/plain":
+				yield glos.newEntry(
+					word,
+					b_content.decode("utf-8", errors=text_unicode_errors),
+					defiFormat="m",
+				)
+				continue
+
+			if mimetype not in self.resourceMimeTypes:
+				log.warning(f"Unrecognized {mimetype=}")
+
+			if len(word) > f_namemax:
+				fileNameTooLong.append(word)
+				continue
+
+			if "|" in word:
+				log.warning(f"resource title: {word}")
+				if windows:
+					continue
+
+			try:
+				entry = glos.newDataEntry(word, b_content)
+			except Exception as e:
+				log.error(f"error creating file: {e}")
+				continue
+			yield entry
+
+		log.info(f"ZIM Entry Count: {entryCount}")
+
+		if fileNameTooLong:
+			log.warning(f"Files with name too long: {len(fileNameTooLong)}")
+
+		if emptyContentCount > 0:
+			log.info(f"Empty Content Count: {emptyContentCount}")
+		if invalidMimeTypeCount > 0:
+			log.info(f"Invalid MIME-Type Count: {invalidMimeTypeCount}")
+		if undefinedMimeTypeCount > 0:
+			log.info(f"MIME-Type 'undefined' Count: {invalidMimeTypeCount}")
+		if redirectCount > 0:
+			log.info(f"Redirect Count: {redirectCount}")
diff --git a/tests/deprecated/glossary_security_test.py b/tests/deprecated/glossary_security_test.py
index 78f55f060..81fd531d8 100644
--- a/tests/deprecated/glossary_security_test.py
+++ b/tests/deprecated/glossary_security_test.py
@@ -62,5 +62,6 @@ def test_convert_4(self):
 		self.assertIsNone(res)
 		self.assertLogCritical("Unable to detect output format!")
 
+
 if __name__ == "__main__":
 	unittest.main()