convert coalescese_streams function to CoalesceStreamsPreprocessor

`coalescese_streams` was the last remaining "decorated function Preprocessor", and I couldn't find an example of how to use it. Here it is converted to be a Preprocessor subclass, like the others. A top-level --coalesce-streams flag is also added.
jupyter · Dec 26, 2023 · 8fb5d4c · 8fb5d4c
1 parent 1562531
commit 8fb5d4c
Show file tree

Hide file tree

Showing 6 changed files with 90 additions and 111 deletions.
diff --git a/docs/source/api/preprocessors.rst b/docs/source/api/preprocessors.rst
@@ -43,11 +43,13 @@ Metadata and header control
 
 .. autoclass:: CSSHTMLHeaderPreprocessor
 
-Removing cells, inputs, and outputs
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Removing/Manipulating cells, inputs, and outputs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: ClearOutputPreprocessor
 
+.. autoclass:: CoalesceStreamsPreprocessor
+
 .. autoclass:: RegexRemovePreprocessor
 
 .. autoclass:: TagRemovePreprocessor
@@ -59,5 +61,3 @@ Executing Notebooks
     :members:
 
 .. autoclass:: CellExecutionError
-
-.. autofunction:: coalesce_streams
diff --git a/nbconvert/exporters/exporter.py b/nbconvert/exporters/exporter.py
@@ -89,8 +89,8 @@ class Exporter(LoggingConfigurable):
             "nbconvert.preprocessors.TagRemovePreprocessor",
             "nbconvert.preprocessors.RegexRemovePreprocessor",
             "nbconvert.preprocessors.ClearOutputPreprocessor",
+            "nbconvert.preprocessors.CoalesceStreamsPreprocessor",
             "nbconvert.preprocessors.ExecutePreprocessor",
-            "nbconvert.preprocessors.coalesce_streams",
             "nbconvert.preprocessors.SVG2PDFPreprocessor",
             "nbconvert.preprocessors.LatexPreprocessor",
             "nbconvert.preprocessors.HighlightMagicsPreprocessor",

diff --git a/nbconvert/nbconvertapp.py b/nbconvert/nbconvertapp.py
@@ -118,6 +118,14 @@ def validate(self, obj, value):
             """Clear output of current file and save in place,
         overwriting the existing notebook. """,
         ),
+        "coalesce-streams": (
+            {
+                "NbConvertApp": {"use_output_suffix": False, "export_format": "notebook"},
+                "FilesWriter": {"build_directory": ""},
+                "CoalesceStreamsPreprocessor": {"enabled": True},
+            },
+            """Coalesce consecutive stdout and stderr outputs into one stream (within each cell).""",
+        ),
         "no-prompt": (
             {
                 "TemplateExporter": {

diff --git a/nbconvert/preprocessors/__init__.py b/nbconvert/preprocessors/__init__.py
@@ -5,9 +5,7 @@
 from .base import Preprocessor
 from .clearmetadata import ClearMetadataPreprocessor
 from .clearoutput import ClearOutputPreprocessor
-
-# decorated function Preprocessors
-from .coalescestreams import coalesce_streams
+from .coalescestreams import CoalesceStreamsPreprocessor
 from .convertfigures import ConvertFiguresPreprocessor
 from .csshtmlheader import CSSHTMLHeaderPreprocessor
 from .execute import ExecutePreprocessor
@@ -24,6 +22,7 @@
     "Preprocessor",
     "ClearMetadataPreprocessor",
     "ClearOutputPreprocessor",
+    "CoalesceStreamsPreprocessor",
     "ConvertFiguresPreprocessor",
     "CSSHTMLHeaderPreprocessor",
     "ExecutePreprocessor",

diff --git a/nbconvert/preprocessors/coalescestreams.py b/nbconvert/preprocessors/coalescestreams.py
@@ -1,81 +1,44 @@
 """Preprocessor for merging consecutive stream outputs for easier handling."""
+import re
 
 # Copyright (c) IPython Development Team.
 # Distributed under the terms of the Modified BSD License.
+from nbconvert.preprocessors import Preprocessor
 
-import functools
-import re
-
-from traitlets.log import get_logger
-
-
-def cell_preprocessor(function):
-    """
-    Wrap a function to be executed on all cells of a notebook
-
-    The wrapped function should have these parameters:
-
-    cell : NotebookNode cell
-        Notebook cell being processed
-    resources : dictionary
-        Additional resources used in the conversion process.  Allows
-        preprocessors to pass variables into the Jinja engine.
-    index : int
-        Index of the cell being processed
-    """
-
-    @functools.wraps(function)
-    def wrappedfunc(nb, resources):
-        get_logger().debug("Applying preprocessor: %s", function.__name__)
-        for index, cell in enumerate(nb.cells):
-            nb.cells[index], resources = function(cell, resources, index)
-        return nb, resources
-
-    return wrappedfunc
-
+CR_PAT = re.compile(r".*\r(?=[^\n])")
 
-cr_pat = re.compile(r".*\r(?=[^\n])")
 
-
-@cell_preprocessor
-def coalesce_streams(cell, resources, index):
+class CoalesceStreamsPreprocessor(Preprocessor):
     """
     Merge consecutive sequences of stream output into single stream
     to prevent extra newlines inserted at flush calls
-
-    Parameters
-    ----------
-    cell : NotebookNode cell
-        Notebook cell being processed
-    resources : dictionary
-        Additional resources used in the conversion process.  Allows
-        transformers to pass variables into the Jinja engine.
-    index : int
-        Index of the cell being processed
     """
 
-    outputs = cell.get("outputs", [])
-    if not outputs:
+    def preprocess_cell(self, cell, resources, cell_index):
+        """
+        Apply a transformation on each cell. See base.py for details.
+        """
+        outputs = cell.get("outputs", [])
+        if not outputs:
+            return cell, resources
+
+        last = outputs[0]
+        new_outputs = [last]
+        for output in outputs[1:]:
+            if (
+                output.output_type == "stream"
+                and last.output_type == "stream"
+                and last.name == output.name
+            ):
+                last.text += output.text
+            else:
+                new_outputs.append(output)
+                last = output
+
+        # process \r characters
+        for output in new_outputs:
+            if output.output_type == "stream" and "\r" in output.text:
+                output.text = CR_PAT.sub("", output.text)
+
+        cell.outputs = new_outputs
         return cell, resources
-
-    last = outputs[0]
-    new_outputs = [last]
-    for output in outputs[1:]:
-        if (
-            output.output_type == "stream"
-            and last.output_type == "stream"
-            and last.name == output.name
-        ):
-            last.text += output.text
-
-        else:
-            new_outputs.append(output)
-            last = output
-
-    # process \r characters
-    for output in new_outputs:
-        if output.output_type == "stream" and "\r" in output.text:
-            output.text = cr_pat.sub("", output.text)
-
-    cell.outputs = new_outputs
-    return cell, resources
diff --git a/tests/preprocessors/test_coalescestreams.py b/tests/preprocessors/test_coalescestreams.py
@@ -5,59 +5,68 @@
 
 from nbformat import v4 as nbformat
 
-from nbconvert.preprocessors.coalescestreams import coalesce_streams
+from nbconvert.preprocessors.coalescestreams import CoalesceStreamsPreprocessor
 
 from .base import PreprocessorTestsBase
 
 
 class TestCoalesceStreams(PreprocessorTestsBase):
     """Contains test functions for coalescestreams.py"""
 
+    def build_preprocessor(self):
+        """Make an instance of a preprocessor"""
+        preprocessor = CoalesceStreamsPreprocessor()
+        preprocessor.enabled = True
+        return preprocessor
+
+    def test_constructor(self):
+        """Can a CoalesceStreamsPreprocessor be constructed?"""
+        self.build_preprocessor()
+
+    def process_outputs(self, outputs):
+        """Process outputs"""
+        cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)]
+        nb = nbformat.new_notebook(cells=cells)
+        res = self.build_resources()
+        preprocessor = self.build_preprocessor()
+        nb, res = preprocessor(nb, res)
+        return nb.cells[0].outputs
+
     def test_coalesce_streams(self):
-        """coalesce_streams preprocessor output test"""
+        """Test the output of a CoalesceStreamsPreprocessor"""
         nb = self.build_notebook()
-        res = self.build_resources()
-        nb, res = coalesce_streams(nb, res)
-        outputs = nb.cells[0].outputs
+        outputs = self.process_outputs(nb.cells[0].outputs)
         self.assertEqual(outputs[0].text, "a")
         self.assertEqual(outputs[1].output_type, "display_data")
         self.assertEqual(outputs[2].text, "cd")
         self.assertEqual(outputs[3].text, "ef")
 
     def test_coalesce_sequenced_streams(self):
         """Can the coalesce streams preprocessor merge a sequence of streams?"""
-        outputs = [
-            nbformat.new_output(output_type="stream", name="stdout", text="0"),
-            nbformat.new_output(output_type="stream", name="stdout", text="1"),
-            nbformat.new_output(output_type="stream", name="stdout", text="2"),
-            nbformat.new_output(output_type="stream", name="stdout", text="3"),
-            nbformat.new_output(output_type="stream", name="stdout", text="4"),
-            nbformat.new_output(output_type="stream", name="stdout", text="5"),
-            nbformat.new_output(output_type="stream", name="stdout", text="6"),
-            nbformat.new_output(output_type="stream", name="stdout", text="7"),
-        ]
-        cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)]
-
-        nb = nbformat.new_notebook(cells=cells)
-        res = self.build_resources()
-        nb, res = coalesce_streams(nb, res)
-        outputs = nb.cells[0].outputs
+        outputs = self.process_outputs(
+            [
+                nbformat.new_output(output_type="stream", name="stdout", text="0"),
+                nbformat.new_output(output_type="stream", name="stdout", text="1"),
+                nbformat.new_output(output_type="stream", name="stdout", text="2"),
+                nbformat.new_output(output_type="stream", name="stdout", text="3"),
+                nbformat.new_output(output_type="stream", name="stdout", text="4"),
+                nbformat.new_output(output_type="stream", name="stdout", text="5"),
+                nbformat.new_output(output_type="stream", name="stdout", text="6"),
+                nbformat.new_output(output_type="stream", name="stdout", text="7"),
+            ]
+        )
         self.assertEqual(outputs[0].text, "01234567")
 
     def test_coalesce_replace_streams(self):
         """Are \\r characters handled?"""
-        outputs = [
-            nbformat.new_output(output_type="stream", name="stdout", text="z"),
-            nbformat.new_output(output_type="stream", name="stdout", text="\ra"),
-            nbformat.new_output(output_type="stream", name="stdout", text="\nz\rb"),
-            nbformat.new_output(output_type="stream", name="stdout", text="\nz"),
-            nbformat.new_output(output_type="stream", name="stdout", text="\rc\n"),
-            nbformat.new_output(output_type="stream", name="stdout", text="z\rz\rd"),
-        ]
-        cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)]
-
-        nb = nbformat.new_notebook(cells=cells)
-        res = self.build_resources()
-        nb, res = coalesce_streams(nb, res)
-        outputs = nb.cells[0].outputs
+        outputs = self.process_outputs(
+            [
+                nbformat.new_output(output_type="stream", name="stdout", text="z"),
+                nbformat.new_output(output_type="stream", name="stdout", text="\ra"),
+                nbformat.new_output(output_type="stream", name="stdout", text="\nz\rb"),
+                nbformat.new_output(output_type="stream", name="stdout", text="\nz"),
+                nbformat.new_output(output_type="stream", name="stdout", text="\rc\n"),
+                nbformat.new_output(output_type="stream", name="stdout", text="z\rz\rd"),
+            ]
+        )
         self.assertEqual(outputs[0].text, "a\nb\nc\nd")