Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert coalescese_streams function to CoalesceStreamsPreprocessor #2089

Merged
merged 1 commit into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/source/api/preprocessors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@ Metadata and header control

.. autoclass:: CSSHTMLHeaderPreprocessor

Removing cells, inputs, and outputs
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Removing/Manipulating cells, inputs, and outputs
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: ClearOutputPreprocessor

.. autoclass:: CoalesceStreamsPreprocessor

.. autoclass:: RegexRemovePreprocessor

.. autoclass:: TagRemovePreprocessor
Expand All @@ -59,5 +61,3 @@ Executing Notebooks
:members:

.. autoclass:: CellExecutionError

.. autofunction:: coalesce_streams
2 changes: 1 addition & 1 deletion nbconvert/exporters/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ class Exporter(LoggingConfigurable):
"nbconvert.preprocessors.TagRemovePreprocessor",
"nbconvert.preprocessors.RegexRemovePreprocessor",
"nbconvert.preprocessors.ClearOutputPreprocessor",
"nbconvert.preprocessors.CoalesceStreamsPreprocessor",
"nbconvert.preprocessors.ExecutePreprocessor",
"nbconvert.preprocessors.coalesce_streams",
"nbconvert.preprocessors.SVG2PDFPreprocessor",
"nbconvert.preprocessors.LatexPreprocessor",
"nbconvert.preprocessors.HighlightMagicsPreprocessor",
Expand Down
8 changes: 8 additions & 0 deletions nbconvert/nbconvertapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ def validate(self, obj, value):
"""Clear output of current file and save in place,
overwriting the existing notebook. """,
),
"coalesce-streams": (
{
"NbConvertApp": {"use_output_suffix": False, "export_format": "notebook"},
"FilesWriter": {"build_directory": ""},
"CoalesceStreamsPreprocessor": {"enabled": True},
},
"""Coalesce consecutive stdout and stderr outputs into one stream (within each cell).""",
),
"no-prompt": (
{
"TemplateExporter": {
Expand Down
5 changes: 2 additions & 3 deletions nbconvert/preprocessors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
from .base import Preprocessor
from .clearmetadata import ClearMetadataPreprocessor
from .clearoutput import ClearOutputPreprocessor

# decorated function Preprocessors
from .coalescestreams import coalesce_streams
from .coalescestreams import CoalesceStreamsPreprocessor
from .convertfigures import ConvertFiguresPreprocessor
from .csshtmlheader import CSSHTMLHeaderPreprocessor
from .execute import ExecutePreprocessor
Expand All @@ -24,6 +22,7 @@
"Preprocessor",
"ClearMetadataPreprocessor",
"ClearOutputPreprocessor",
"CoalesceStreamsPreprocessor",
"ConvertFiguresPreprocessor",
"CSSHTMLHeaderPreprocessor",
"ExecutePreprocessor",
Expand Down
99 changes: 31 additions & 68 deletions nbconvert/preprocessors/coalescestreams.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,44 @@
"""Preprocessor for merging consecutive stream outputs for easier handling."""
import re

# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from nbconvert.preprocessors import Preprocessor

import functools
import re

from traitlets.log import get_logger


def cell_preprocessor(function):
"""
Wrap a function to be executed on all cells of a notebook

The wrapped function should have these parameters:

cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
index : int
Index of the cell being processed
"""

@functools.wraps(function)
def wrappedfunc(nb, resources):
get_logger().debug("Applying preprocessor: %s", function.__name__)
for index, cell in enumerate(nb.cells):
nb.cells[index], resources = function(cell, resources, index)
return nb, resources

return wrappedfunc

CR_PAT = re.compile(r".*\r(?=[^\n])")

cr_pat = re.compile(r".*\r(?=[^\n])")


@cell_preprocessor
def coalesce_streams(cell, resources, index):
class CoalesceStreamsPreprocessor(Preprocessor):
"""
Merge consecutive sequences of stream output into single stream
to prevent extra newlines inserted at flush calls

Parameters
----------
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
transformers to pass variables into the Jinja engine.
index : int
Index of the cell being processed
"""

outputs = cell.get("outputs", [])
if not outputs:
def preprocess_cell(self, cell, resources, cell_index):
"""
Apply a transformation on each cell. See base.py for details.
"""
outputs = cell.get("outputs", [])
if not outputs:
return cell, resources

last = outputs[0]
new_outputs = [last]
for output in outputs[1:]:
if (
output.output_type == "stream"
and last.output_type == "stream"
and last.name == output.name
):
last.text += output.text
else:
new_outputs.append(output)
last = output

# process \r characters
for output in new_outputs:
if output.output_type == "stream" and "\r" in output.text:
output.text = CR_PAT.sub("", output.text)

cell.outputs = new_outputs
return cell, resources

last = outputs[0]
new_outputs = [last]
for output in outputs[1:]:
if (
output.output_type == "stream"
and last.output_type == "stream"
and last.name == output.name
):
last.text += output.text

else:
new_outputs.append(output)
last = output

# process \r characters
for output in new_outputs:
if output.output_type == "stream" and "\r" in output.text:
output.text = cr_pat.sub("", output.text)

cell.outputs = new_outputs
return cell, resources
79 changes: 44 additions & 35 deletions tests/preprocessors/test_coalescestreams.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,68 @@

from nbformat import v4 as nbformat

from nbconvert.preprocessors.coalescestreams import coalesce_streams
from nbconvert.preprocessors.coalescestreams import CoalesceStreamsPreprocessor

from .base import PreprocessorTestsBase


class TestCoalesceStreams(PreprocessorTestsBase):
"""Contains test functions for coalescestreams.py"""

def build_preprocessor(self):
"""Make an instance of a preprocessor"""
preprocessor = CoalesceStreamsPreprocessor()
preprocessor.enabled = True
return preprocessor

def test_constructor(self):
"""Can a CoalesceStreamsPreprocessor be constructed?"""
self.build_preprocessor()

def process_outputs(self, outputs):
"""Process outputs"""
cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)]
nb = nbformat.new_notebook(cells=cells)
res = self.build_resources()
preprocessor = self.build_preprocessor()
nb, res = preprocessor(nb, res)
return nb.cells[0].outputs

def test_coalesce_streams(self):
"""coalesce_streams preprocessor output test"""
"""Test the output of a CoalesceStreamsPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
nb, res = coalesce_streams(nb, res)
outputs = nb.cells[0].outputs
outputs = self.process_outputs(nb.cells[0].outputs)
self.assertEqual(outputs[0].text, "a")
self.assertEqual(outputs[1].output_type, "display_data")
self.assertEqual(outputs[2].text, "cd")
self.assertEqual(outputs[3].text, "ef")

def test_coalesce_sequenced_streams(self):
"""Can the coalesce streams preprocessor merge a sequence of streams?"""
outputs = [
nbformat.new_output(output_type="stream", name="stdout", text="0"),
nbformat.new_output(output_type="stream", name="stdout", text="1"),
nbformat.new_output(output_type="stream", name="stdout", text="2"),
nbformat.new_output(output_type="stream", name="stdout", text="3"),
nbformat.new_output(output_type="stream", name="stdout", text="4"),
nbformat.new_output(output_type="stream", name="stdout", text="5"),
nbformat.new_output(output_type="stream", name="stdout", text="6"),
nbformat.new_output(output_type="stream", name="stdout", text="7"),
]
cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)]

nb = nbformat.new_notebook(cells=cells)
res = self.build_resources()
nb, res = coalesce_streams(nb, res)
outputs = nb.cells[0].outputs
outputs = self.process_outputs(
[
nbformat.new_output(output_type="stream", name="stdout", text="0"),
nbformat.new_output(output_type="stream", name="stdout", text="1"),
nbformat.new_output(output_type="stream", name="stdout", text="2"),
nbformat.new_output(output_type="stream", name="stdout", text="3"),
nbformat.new_output(output_type="stream", name="stdout", text="4"),
nbformat.new_output(output_type="stream", name="stdout", text="5"),
nbformat.new_output(output_type="stream", name="stdout", text="6"),
nbformat.new_output(output_type="stream", name="stdout", text="7"),
]
)
self.assertEqual(outputs[0].text, "01234567")

def test_coalesce_replace_streams(self):
"""Are \\r characters handled?"""
outputs = [
nbformat.new_output(output_type="stream", name="stdout", text="z"),
nbformat.new_output(output_type="stream", name="stdout", text="\ra"),
nbformat.new_output(output_type="stream", name="stdout", text="\nz\rb"),
nbformat.new_output(output_type="stream", name="stdout", text="\nz"),
nbformat.new_output(output_type="stream", name="stdout", text="\rc\n"),
nbformat.new_output(output_type="stream", name="stdout", text="z\rz\rd"),
]
cells = [nbformat.new_code_cell(source="# None", execution_count=1, outputs=outputs)]

nb = nbformat.new_notebook(cells=cells)
res = self.build_resources()
nb, res = coalesce_streams(nb, res)
outputs = nb.cells[0].outputs
outputs = self.process_outputs(
[
nbformat.new_output(output_type="stream", name="stdout", text="z"),
nbformat.new_output(output_type="stream", name="stdout", text="\ra"),
nbformat.new_output(output_type="stream", name="stdout", text="\nz\rb"),
nbformat.new_output(output_type="stream", name="stdout", text="\nz"),
nbformat.new_output(output_type="stream", name="stdout", text="\rc\n"),
nbformat.new_output(output_type="stream", name="stdout", text="z\rz\rd"),
]
)
self.assertEqual(outputs[0].text, "a\nb\nc\nd")
Loading