Skip to content

Commit

Permalink
convert coalescese_streams function to CoalesceStreamsPreprocessor
Browse files Browse the repository at this point in the history
`coalescese_streams` was the last remaining "decorated function Preprocessor", and I couldn't find an example of how to use it. Here it is converted to be a Preprocessor subclass, like the others. A top-level --coalesce-streams flag is also added.
  • Loading branch information
ryan-williams committed Dec 26, 2023
1 parent 1562531 commit 8fb5d4c
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 111 deletions.
8 changes: 4 additions & 4 deletions docs/source/api/preprocessors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@ Metadata and header control

.. autoclass:: CSSHTMLHeaderPreprocessor

Removing cells, inputs, and outputs
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Removing/Manipulating cells, inputs, and outputs
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: ClearOutputPreprocessor

.. autoclass:: CoalesceStreamsPreprocessor

.. autoclass:: RegexRemovePreprocessor

.. autoclass:: TagRemovePreprocessor
Expand All @@ -59,5 +61,3 @@ Executing Notebooks
:members:

.. autoclass:: CellExecutionError

.. autofunction:: coalesce_streams
2 changes: 1 addition & 1 deletion nbconvert/exporters/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ class Exporter(LoggingConfigurable):
"nbconvert.preprocessors.TagRemovePreprocessor",
"nbconvert.preprocessors.RegexRemovePreprocessor",
"nbconvert.preprocessors.ClearOutputPreprocessor",
"nbconvert.preprocessors.CoalesceStreamsPreprocessor",
"nbconvert.preprocessors.ExecutePreprocessor",
"nbconvert.preprocessors.coalesce_streams",
"nbconvert.preprocessors.SVG2PDFPreprocessor",
"nbconvert.preprocessors.LatexPreprocessor",
"nbconvert.preprocessors.HighlightMagicsPreprocessor",
Expand Down
8 changes: 8 additions & 0 deletions nbconvert/nbconvertapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ def validate(self, obj, value):
"""Clear output of current file and save in place,
overwriting the existing notebook. """,
),
"coalesce-streams": (
{
"NbConvertApp": {"use_output_suffix": False, "export_format": "notebook"},
"FilesWriter": {"build_directory": ""},
"CoalesceStreamsPreprocessor": {"enabled": True},
},
"""Coalesce consecutive stdout and stderr outputs into one stream (within each cell).""",
),
"no-prompt": (
{
"TemplateExporter": {
Expand Down
5 changes: 2 additions & 3 deletions nbconvert/preprocessors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
from .base import Preprocessor
from .clearmetadata import ClearMetadataPreprocessor
from .clearoutput import ClearOutputPreprocessor

# decorated function Preprocessors
from .coalescestreams import coalesce_streams
from .coalescestreams import CoalesceStreamsPreprocessor
from .convertfigures import ConvertFiguresPreprocessor
from .csshtmlheader import CSSHTMLHeaderPreprocessor
from .execute import ExecutePreprocessor
Expand All @@ -24,6 +22,7 @@
"Preprocessor",
"ClearMetadataPreprocessor",
"ClearOutputPreprocessor",
"CoalesceStreamsPreprocessor",
"ConvertFiguresPreprocessor",
"CSSHTMLHeaderPreprocessor",
"ExecutePreprocessor",
Expand Down
99 changes: 31 additions & 68 deletions nbconvert/preprocessors/coalescestreams.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,44 @@
"""Preprocessor for merging consecutive stream outputs for easier handling."""
import re

# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from nbconvert.preprocessors import Preprocessor

import functools
import re

from traitlets.log import get_logger


def cell_preprocessor(function):
"""
Wrap a function to be executed on all cells of a notebook
The wrapped function should have these parameters:
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
index : int
Index of the cell being processed
"""

@functools.wraps(function)
def wrappedfunc(nb, resources):
get_logger().debug("Applying preprocessor: %s", function.__name__)
for index, cell in enumerate(nb.cells):
nb.cells[index], resources = function(cell, resources, index)
return nb, resources

return wrappedfunc

CR_PAT = re.compile(r".*\r(?=[^\n])")

cr_pat = re.compile(r".*\r(?=[^\n])")


@cell_preprocessor
def coalesce_streams(cell, resources, index):
class CoalesceStreamsPreprocessor(Preprocessor):
"""
Merge consecutive sequences of stream output into single stream
to prevent extra newlines inserted at flush calls
Parameters
----------
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
transformers to pass variables into the Jinja engine.
index : int
Index of the cell being processed
"""

outputs = cell.get("outputs", [])
if not outputs:
def preprocess_cell(self, cell, resources, cell_index):
"""
Apply a transformation on each cell. See base.py for details.
"""
outputs = cell.get("outputs", [])
if not outputs:
return cell, resources

last = outputs[0]
new_outputs = [last]
for output in outputs[1:]:
if (
output.output_type == "stream"
and last.output_type == "stream"
and last.name == output.name
):
last.text += output.text
else:
new_outputs.append(output)
last = output

# process \r characters
for output in new_outputs:
if output.output_type == "stream" and "\r" in output.text:
output.text = CR_PAT.sub("", output.text)

cell.outputs = new_outputs
return cell, resources

last = outputs[0]
new_outputs = [last]
for output in outputs[1:]:
if (
output.output_type == "stream"
and last.output_type == "stream"
and last.name == output.name
):
last.text += output.text

else:
new_outputs.append(output)
last = output

# process \r characters
for output in new_outputs:
if output.output_type == "stream" and "\r" in output.text:
output.text = cr_pat.sub("", output.text)

cell.outputs = new_outputs
return cell, resources
79 changes: 44 additions & 35 deletions tests/preprocessors/test_coalescestreams.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,68 @@

from nbformat import v4 as nbformat

from nbconvert.preprocessors.coalescestreams import coalesce_streams
from nbconvert.preprocessors.coalescestreams import CoalesceStreamsPreprocessor

from .base import PreprocessorTestsBase


class TestCoalesceStreams(PreprocessorTestsBase):
"""Contains test functions for coalescestreams.py"""

def build_preprocessor(self):
"""Make an instance of a preprocessor"""
preprocessor = CoalesceStreamsPreprocessor()
preprocessor.enabled = True
return preprocessor

def test_constructor(self):
"""Can a CoalesceStreamsPreprocessor be constructed?"""
self.build_preprocessor()

def process_outputs(self, outputs):
"""Process outputs"""
cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)]
nb = nbformat.new_notebook(cells=cells)
res = self.build_resources()
preprocessor = self.build_preprocessor()
nb, res = preprocessor(nb, res)
return nb.cells[0].outputs

def test_coalesce_streams(self):
"""coalesce_streams preprocessor output test"""
"""Test the output of a CoalesceStreamsPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
nb, res = coalesce_streams(nb, res)
outputs = nb.cells[0].outputs
outputs = self.process_outputs(nb.cells[0].outputs)
self.assertEqual(outputs[0].text, "a")
self.assertEqual(outputs[1].output_type, "display_data")
self.assertEqual(outputs[2].text, "cd")
self.assertEqual(outputs[3].text, "ef")

def test_coalesce_sequenced_streams(self):
"""Can the coalesce streams preprocessor merge a sequence of streams?"""
outputs = [
nbformat.new_output(output_type="stream", name="stdout", text="0"),
nbformat.new_output(output_type="stream", name="stdout", text="1"),
nbformat.new_output(output_type="stream", name="stdout", text="2"),
nbformat.new_output(output_type="stream", name="stdout", text="3"),
nbformat.new_output(output_type="stream", name="stdout", text="4"),
nbformat.new_output(output_type="stream", name="stdout", text="5"),
nbformat.new_output(output_type="stream", name="stdout", text="6"),
nbformat.new_output(output_type="stream", name="stdout", text="7"),
]
cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)]

nb = nbformat.new_notebook(cells=cells)
res = self.build_resources()
nb, res = coalesce_streams(nb, res)
outputs = nb.cells[0].outputs
outputs = self.process_outputs(
[
nbformat.new_output(output_type="stream", name="stdout", text="0"),
nbformat.new_output(output_type="stream", name="stdout", text="1"),
nbformat.new_output(output_type="stream", name="stdout", text="2"),
nbformat.new_output(output_type="stream", name="stdout", text="3"),
nbformat.new_output(output_type="stream", name="stdout", text="4"),
nbformat.new_output(output_type="stream", name="stdout", text="5"),
nbformat.new_output(output_type="stream", name="stdout", text="6"),
nbformat.new_output(output_type="stream", name="stdout", text="7"),
]
)
self.assertEqual(outputs[0].text, "01234567")

def test_coalesce_replace_streams(self):
"""Are \\r characters handled?"""
outputs = [
nbformat.new_output(output_type="stream", name="stdout", text="z"),
nbformat.new_output(output_type="stream", name="stdout", text="\ra"),
nbformat.new_output(output_type="stream", name="stdout", text="\nz\rb"),
nbformat.new_output(output_type="stream", name="stdout", text="\nz"),
nbformat.new_output(output_type="stream", name="stdout", text="\rc\n"),
nbformat.new_output(output_type="stream", name="stdout", text="z\rz\rd"),
]
cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)]

nb = nbformat.new_notebook(cells=cells)
res = self.build_resources()
nb, res = coalesce_streams(nb, res)
outputs = nb.cells[0].outputs
outputs = self.process_outputs(
[
nbformat.new_output(output_type="stream", name="stdout", text="z"),
nbformat.new_output(output_type="stream", name="stdout", text="\ra"),
nbformat.new_output(output_type="stream", name="stdout", text="\nz\rb"),
nbformat.new_output(output_type="stream", name="stdout", text="\nz"),
nbformat.new_output(output_type="stream", name="stdout", text="\rc\n"),
nbformat.new_output(output_type="stream", name="stdout", text="z\rz\rd"),
]
)
self.assertEqual(outputs[0].text, "a\nb\nc\nd")

0 comments on commit 8fb5d4c

Please sign in to comment.