From 957b4da54a8f365ab05d7661cdbc9222405d40a8 Mon Sep 17 00:00:00 2001
From: Adam Turner <9087854+aa-turner@users.noreply.github.com>
Date: Thu, 18 Jan 2024 04:10:52 +0000
Subject: [PATCH] Refactoring

---
 sphinx/builders/__init__.py      | 111 +++++++++++++++++--------------
 sphinx/builders/_epub_base.py    |   2 +-
 sphinx/builders/html/__init__.py |  16 +++--
 sphinx/builders/linkcheck.py     |   2 +-
 sphinx/config.py                 |   1 +
 5 files changed, 74 insertions(+), 58 deletions(-)

diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py
index ec93b89ba63..dd2072943e7 100644
--- a/sphinx/builders/__init__.py
+++ b/sphinx/builders/__init__.py
@@ -79,11 +79,10 @@ class Builder:
     #: post transformation, to be merged to the main builder in
     #: :py:class:`~sphinx.builders.Builder.merge_builder_post_transform`.
     #: Attributes in the list must be pickleable.
-    #: The approach improves performance when
-    #: pickling and sending data over pipes because only a
-    #: subset of the builder attributes are commonly needed for merging
-    #: to the main process builder instance.
-    post_transform_merge_attr: list[str] = []
+    #: The approach improves performance when pickling and sending data
+    #: over pipes because only a subset of the builder attributes
+    #: are commonly needed for merging to the main process builder instance.
+    post_transform_merge_attr: tuple[str, ...] = ()
 
     def __init__(self, app: Sphinx, env: BuildEnvironment) -> None:
         self.srcdir = app.srcdir
@@ -135,19 +134,21 @@ def init(self) -> None:
         pass
 
     def merge_builder_post_transform(self, new_attrs: dict[str, Any]) -> None:
-        """Give builders the option to merge any post-transform information
-        coming from a parallel sub-process back to the main process builder.
-        This can be useful for extensions that consume that information
-        in the build-finish phase.
+        """Merge post-transform data into the master builder.
+
+        This method allows builders to merge any post-transform information
+        coming from parallel subprocesses back into the builder in
+        the main process. This can be useful for extensions that consume
+        that information in the build-finish phase.
         The function is called once for each finished subprocess.
-        Builders that implement this function must also define
-        the class attribute
-        :py:attr:`~sphinx.builders.Builder.post_transform_merge_attr` as it defines
-        which builder attributes shall be returned to the main process for merging.
+        Builders that implement this function must also define the class
+        attribute :py:attr:`~sphinx.builders.Builder.post_transform_merge_attr`
+        as it defines which builder attributes shall be returned to
+        the main process for merging.
 
         The default implementation does nothing.
 
-        :param new_attrs: the attributes from the parallel subprocess to be
+        :param new_attrs: The attributes from the parallel subprocess to be
                           updated in the main builder
         """
         pass
@@ -591,7 +592,8 @@ def write(
 
         if self.parallel_ok:
             # number of subprocesses is parallel-1 because the main process
-            # is busy loading and post-transforming doctrees and doing write_doc_serialized();
+            # is busy loading and post-transforming doctrees and doing
+            # write_doc_serialized();
             # in case the global configuration enable_parallel_post_transform
             # is active the main process does nothing
             self._write_parallel(sorted(docnames),
@@ -610,6 +612,34 @@ def _write_serial(self, docnames: Sequence[str]) -> None:
                 self.write_doc(docname, doctree)
 
     def _write_parallel(self, docnames: Sequence[str], nproc: int) -> None:
+        def write_process_serial_post_transform(
+            docs: list[tuple[str, nodes.document]],
+        ) -> None:
+            self.app.phase = BuildPhase.WRITING
+            # The doctree has been post-transformed (incl. write_doc_serialized)
+            # in the main process, only write_doc() is needed here
+            for docname, doctree in docs:
+                self.write_doc(docname, doctree)
+
+        def write_process_parallel_post_transform(docs: list[str]) -> bytes:
+            assert self.env.config.enable_parallel_post_transform
+            self.app.phase = BuildPhase.WRITING
+            # run post-transform, doctree-resolved and write_doc_serialized in parallel
+            for docname in docs:
+                doctree = self.env.get_and_resolve_doctree(docname, self)
+                # write_doc_serialized is assumed to be safe for all Sphinx
+                # internal builders. Some builders merge information from post-transform
+                # and write_doc_serialized back to the main process using
+                # Builder.post_transform_merge_attr and
+                # Builder.merge_builder_post_transform
+                self.write_doc_serialized(docname, doctree)
+                self.write_doc(docname, doctree)
+            merge_attr = {
+                attr: getattr(self, attr, None)
+                for attr in self.post_transform_merge_attr
+            }
+            return pickle.dumps(merge_attr, pickle.HIGHEST_PROTOCOL)
+
         # warm up caches/compile templates using the first document
         firstname, docnames = docnames[0], docnames[1:]
         self.app.phase = BuildPhase.RESOLVING
@@ -618,30 +648,6 @@ def _write_parallel(self, docnames: Sequence[str], nproc: int) -> None:
         self.write_doc_serialized(firstname, doctree)
         self.write_doc(firstname, doctree)
 
-        def write_process(docs: list[tuple[str, nodes.document]]) -> bytes | None:
-            self.app.phase = BuildPhase.WRITING
-            if self.env.config.enable_parallel_post_transform:
-                # run post-transform, doctree-resolved and write_doc_serialized in parallel
-                for docname, _ in docs:
-                    doctree = self.env.get_and_resolve_doctree(docname, self)
-                    # write_doc_serialized is assumed to be safe for all Sphinx
-                    # internal builders. Some builders merge information from post-transform
-                    # and write_doc_serialized back to the main process using
-                    # Builder.post_transform_merge_attr and
-                    # Builder.merge_builder_post_transform
-                    self.write_doc_serialized(docname, doctree)
-                    self.write_doc(docname, doctree)
-                merge_attr = {
-                    attr: getattr(self, attr, None)
-                    for attr in self.post_transform_merge_attr
-                }
-                return pickle.dumps(merge_attr, pickle.HIGHEST_PROTOCOL)
-            for docname, doctree in docs:
-                # doctree has been post-transformed (incl. write_doc_serialized)
-                # in the main process, only write_doc is needed here
-                self.write_doc(docname, doctree)
-            return None
-
         tasks = ParallelTasks(nproc)
         chunks = make_chunks(docnames, nproc)
 
@@ -650,23 +656,30 @@ def write_process(docs: list[tuple[str, nodes.document]]) -> bytes | None:
         progress = status_iterator(chunks, __('writing output... '), "darkgreen",
                                    len(chunks), self.app.verbosity)
 
-        def merge_builder(args: list[tuple[str, NoneType]], new_attrs_pickle: bytes) -> None:
-            if self.env.config.enable_parallel_post_transform:
-                new_attrs: dict[str, Any] = pickle.loads(new_attrs_pickle)
-                self.merge_builder_post_transform(new_attrs)
+        def on_chunk_done(args: list[tuple[str, NoneType]], result: NoneType) -> None:
+            next(progress)
+
+        def merge_builder(
+            args: list[tuple[str, NoneType]], pickled_new_attrs: bytes, /,
+        ) -> None:
+            assert self.env.config.enable_parallel_post_transform
+            new_attrs: dict[str, Any] = pickle.loads(pickled_new_attrs)
+            self.merge_builder_post_transform(new_attrs)
             next(progress)
 
         self.app.phase = BuildPhase.RESOLVING
         for chunk in chunks:
-            arg: list[tuple[str, nodes.document | None]] = []
-            for docname in chunk:
-                if not self.env.config.enable_parallel_post_transform:
+            if self.env.config.enable_parallel_post_transform:
+                tasks.add_task(write_process_parallel_post_transform,
+                               chunk, merge_builder)
+            else:
+                arg = []
+                for docname in chunk:
                     doctree = self.env.get_and_resolve_doctree(docname, self)
                     self.write_doc_serialized(docname, doctree)
                     arg.append((docname, doctree))
-                else:
-                    arg.append((docname, None))
-            tasks.add_task(write_process, arg, merge_builder)
+                tasks.add_task(write_process_serial_post_transform,
+                               arg, on_chunk_done)
 
         # make sure all threads have finished
         tasks.join()
diff --git a/sphinx/builders/_epub_base.py b/sphinx/builders/_epub_base.py
index 56ffcdf1c62..8d1599e6502 100644
--- a/sphinx/builders/_epub_base.py
+++ b/sphinx/builders/_epub_base.py
@@ -155,7 +155,7 @@ class EpubBuilder(StandaloneHTMLBuilder):
     refuri_re = REFURI_RE
     template_dir = ""
     doctype = ""
-    post_transform_merge_attr = ['images']
+    post_transform_merge_attr = ('images',)
 
     def init(self) -> None:
         super().init()
diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py
index 3930073b6f2..93df8fbcc0f 100644
--- a/sphinx/builders/html/__init__.py
+++ b/sphinx/builders/html/__init__.py
@@ -188,7 +188,7 @@ class StandaloneHTMLBuilder(Builder):
 
     imgpath: str = ''
     domain_indices: list[DOMAIN_INDEX_TYPE] = []
-    post_transform_merge_attr = ['images', 'indexer']
+    post_transform_merge_attr: tuple[str, ...] = ('images', 'indexer')
 
     def __init__(self, app: Sphinx, env: BuildEnvironment) -> None:
         super().__init__(app, env)
@@ -255,12 +255,14 @@ def merge_builder_post_transform(self, new_attrs: dict[str, Any]) -> None:
             self.indexer = IndexBuilder(self.env, lang,
                                         self.config.html_search_options,
                                         self.config.html_search_scorer)
-        self.indexer._all_titles.update(new_attrs['indexer']._all_titles)
-        self.indexer._filenames.update(new_attrs['indexer']._filenames)
-        self.indexer._index_entries.update(new_attrs['indexer']._index_entries)
-        self.indexer._mapping.update(new_attrs['indexer']._mapping)
-        self.indexer._title_mapping.update(new_attrs['indexer']._title_mapping)
-        self.indexer._titles.update(new_attrs['indexer']._titles)
+        indexer_data = new_attrs['indexer']
+        self.indexer._all_titles |= indexer_data._all_titles
+        self.indexer._filenames |= indexer_data._filenames
+        self.indexer._index_entries |= indexer_data._index_entries
+        self.indexer._mapping |= indexer_data._mapping
+        self.indexer._title_mapping |= indexer_data._title_mapping
+        self.indexer._titles |= indexer_data._titles
+
         # handle images
         for filepath, filename in new_attrs['images'].items():
             if filepath not in self.images:
diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py
index 53420f5a17e..59abb6b9157 100644
--- a/sphinx/builders/linkcheck.py
+++ b/sphinx/builders/linkcheck.py
@@ -64,7 +64,7 @@ class CheckExternalLinksBuilder(DummyBuilder):
     epilog = __('Look for any errors in the above output or in '
                 '%(outdir)s/output.txt')
 
-    post_transform_merge_attr = ['hyperlinks']
+    post_transform_merge_attr = ('hyperlinks',)
 
     def init(self) -> None:
         self.broken_hyperlinks = 0
diff --git a/sphinx/config.py b/sphinx/config.py
index a9ac0751909..89316f0ed0b 100644
--- a/sphinx/config.py
+++ b/sphinx/config.py
@@ -202,6 +202,7 @@ class Config:
         'gettext_allow_fuzzy_translations': _Opt(False, 'gettext', ()),
         'translation_progress_classes': _Opt(
             False, 'env', ENUM(True, False, 'translated', 'untranslated')),
+
         'master_doc': _Opt('index', 'env', ()),
         'root_doc': _Opt(lambda config: config.master_doc, 'env', ()),
         # ``source_suffix`` type is actually ``dict[str, str | None]``: