From 678459bc35a19d4379bf82268162ddfd8b23f509 Mon Sep 17 00:00:00 2001 From: yoshikazusawa <883514+yoshikazusawa@users.noreply.github.com> Date: Mon, 10 Jun 2024 00:23:15 +0900 Subject: [PATCH] Supports additional indicators on block scalars ref: https://yaml-multiline.info/ --- lib/TAP/Parser/YAMLish/Reader.pm | 76 +++++++++++++++++++++++------- t/yamlish.t | 80 ++++++++++++++++++++++++++++++-- 2 files changed, 135 insertions(+), 21 deletions(-) diff --git a/lib/TAP/Parser/YAMLish/Reader.pm b/lib/TAP/Parser/YAMLish/Reader.pm index fb823784..6f09de9d 100644 --- a/lib/TAP/Parser/YAMLish/Reader.pm +++ b/lib/TAP/Parser/YAMLish/Reader.pm @@ -137,23 +137,9 @@ sub _read_scalar { return {} if $string eq '{}'; return [] if $string eq '[]'; - if ( $string eq '>' || $string eq '|' ) { - - my ( $line, $indent ) = $self->_peek; - die "Multi-line scalar content missing" unless defined $line; - - my @multiline = ($line); - - while (1) { - $self->_next; - my ( $next, $ind ) = $self->_peek; - last if $ind < $indent; - - my $pad = $string eq '|' ? ( ' ' x ( $ind - $indent ) ) : ''; - push @multiline, $pad . $next; - } - - return join( ( $string eq '>' ? ' ' : "\n" ), @multiline ) . "\n"; + if ( $string =~ /^([>|])([+-]?)([1-9]?)$/ ) { + my ( $style, $chomping, $indent_base ) = ( $1, $2, $3 ); + return $self->_read_block_scalar( $style, $chomping, $indent_base ); } if ( $string =~ /^ ' (.*) ' $/x ) { @@ -175,6 +161,62 @@ sub _read_scalar { return $string; } +sub _read_block_scalar { + my ( $self, $style, $chomping, $indent_base ) = @_; + + my ( $line, $line_indent ) = $self->_peek; + die "Multi-line scalar content missing" unless defined $line; + + $indent_base ||= $line_indent; + + my $pad = ' ' x ( $line_indent - $indent_base ); + my @multi_lines = [ $pad, $line ]; + + while (1) { + $self->_next; + my ( $content, $line_indent ) = $self->_peek; + last if $line_indent < $indent_base; + + my $pad = ' ' x ( $line_indent - $indent_base ); + push @multi_lines, [ $pad, $content ]; + } + + my $block = ''; + my $previous_line; + + for my $current_line ( @multi_lines ) { + my ( $pad, $content ) = @$current_line; + unless ( defined $previous_line ) { + $block .= join( '', $pad, $content ); + $previous_line = $current_line; + next; + } + + if ( $style eq '>' + && length $content > 0 + && length $pad == 0 + && length $previous_line->[0] == 0 + && length $previous_line->[1] > 0 ) + { + $block .= ' ' . $content; + } else { + $block .= join( '', "\n", $pad, $content ); + } + $previous_line = $current_line; + } + + $block .= "\n"; + + if ( $chomping eq '-' ) { + $block =~ s/\n+$//; + } elsif ( $chomping eq '+' ) { + # noop: keep newlines + } else { + $block =~ s/\n+$/\n/; + } + return $block; +} + sub _read_nested { my $self = shift; diff --git a/t/yamlish.t b/t/yamlish.t index 07c36838..07bdb01b 100644 --- a/t/yamlish.t +++ b/t/yamlish.t @@ -40,16 +40,50 @@ BEGIN { ], out => "Hello, World", }, - { name => 'Hello World 4', + { name => 'Block Scalars with folded style', in => [ '--- >', ' Hello,', - ' World', + ' World', '...', ], out => "Hello, World\n", }, - { name => 'Hello World Block', + { name => 'Block Scalars with folded style including extra indentation', + in => [ + '--- >', + ' Hello,', + ' World', + ' with extra', + ' indentation', + '...', + ], + out => "Hello, World\n with extra\n indentation\n", + }, + { name => 'Hello World with folded style and no newline at end', + in => [ + '--- >-', + ' Hello,', + ' World', + ' ', + '...', + ], + out => "Hello, World", + }, + { name => 'Hello World with folded style and keeping newlines from end', + in => [ + '--- >+', + ' Hello,', + ' World', + ' ', + ' ', + ' Hello World again', + ' ', + '...', + ], + out => "Hello, World\n \n\nHello World again\n\n", + }, + { name => 'Hello World with literal style', in => [ '--- |', ' Hello,', @@ -58,7 +92,45 @@ BEGIN { ], out => "Hello,\n World\n", }, - { name => 'Hello World 5', + { name => 'Hello World with literal style and no newline at end', + in => [ + '--- |-', + ' Hello,', + ' World', + '...', + ], + out => "Hello,\n World", + }, + { name => 'Hello World with literal style and keeping newlines from end', + in => [ + '--- |+', + ' Hello,', + ' World', + ' ', + ' ', + '...', + ], + out => "Hello,\n World\n\n \n", + }, + { name => 'Hello World with literal style and no newline at end and indentation indicator', + in => [ + '--- |-3', + ' Hello,', + ' World', + '...', + ], + out => " Hello,\n World", + }, + { name => 'Hello World with indentation indicator', + in => [ + '--- >1', + ' Hello,', + ' World', + '...', + ], + out => " Hello,\n World\n", + }, + { name => 'Hello World with broken indentation', in => [ '--- >', ' Hello,',