Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added speakers to internal_format #85

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,18 @@ Add subtitles
```php
$subtitles->add(0, 5, 'some text'); // from 0, till 5 seconds

// Add text with speakers
// Only VTT and SRT file formats apply speaker to the lines
// For SRT output is like speaker1: some text
// For VRR output is like <v speaker1>some text</v>
$subtitles->add(0, 5, [
'speaker1' => 'some text',
]);

// Add multiline text
$subtitles->add(0, 5, [
'first line',
'second line',
'speaker2' => 'second line',
]);

// Add styles to VTT file format
Expand Down
60 changes: 56 additions & 4 deletions src/Code/Converters/SrtConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public function fileContentToInternalFormat($file_content)
$internal_format[$i]['start'] = self::srtTimeToInternal($parts['start'], $next_line);
$internal_format[$i]['end'] = self::srtTimeToInternal($parts['end'], $next_line);
$internal_format[$i]['lines'] = [];

$internal_format[$i]['speakers'] = [];

// remove number before timestamp
if (isset($internal_format[$i - 1])) {
Expand All @@ -49,7 +49,9 @@ public function fileContentToInternalFormat($file_content)
} elseif ($parts['start'] && !$parts['end'] && strpos($line, '-->') !== false) {
throw new UserException("Something is wrong with timestamps on this line: " . $line);
} elseif ($parts['text']) {
$internal_format[$i]['lines'][] = strip_tags($parts['text']);
list($linePart, $speakerPart) = self::fixLine($parts['text']);
$internal_format[$i]['lines'][] = $linePart;
$internal_format[$i]['speakers'][] = $speakerPart;
}

if ($parts['start'] && $parts['end']) {
Expand All @@ -59,7 +61,30 @@ public function fileContentToInternalFormat($file_content)
$internal_format = []; // skip words in front of srt subtitle (invalid subtitles)
}
}

// Cleanup speakers, for example ['speaker1', null, null] became ['speaker1']
foreach ($internal_format as $key => $internal_format_parts) {
if (isset($internal_format_parts['speakers'])) {
$hasSpeaker = false;
$empty_speakers_keys = [];
// Check speakers if they are null
foreach ($internal_format_parts['speakers'] as $speaker_key => $speaker) {
if ($speaker) {
$empty_speakers_keys = [];
$hasSpeaker = true;
} else {
$empty_speakers_keys[] = $speaker_key;
}
}
// Remove speakers key for that time if all speakers are empty or remove empty ones
if (!$hasSpeaker) {
unset($internal_format[$key]['speakers']);
} elseif ($empty_speakers_keys) {
foreach ($empty_speakers_keys as $empty_speaker_key) {
unset($internal_format[$key]['speakers'][$empty_speaker_key]);
}
}
}
}
return $internal_format;
}

Expand All @@ -77,7 +102,19 @@ public function internalFormatToFileContent(array $internal_format)
$nr = $k + 1;
$start = static::internalTimeToSrt($block['start']);
$end = static::internalTimeToSrt($block['end']);
$lines = implode("\r\n", $block['lines']);
$lines_array = [];
foreach ($block['lines'] as $key => $line) {
$speaker = '';
// if speakers is set
if (isset($block['speakers'][$key]) && $block['speakers'][$key]) {
// create speaker:
$speaker = $block['speakers'][$key] . ': ';
$lines_array[] = $speaker . $line;
} else {
$lines_array[] = $line;
}
}
$lines = implode("\r\n", $lines_array);

$file_content .= $nr . "\r\n";
$file_content .= $start . ' --> ' . $end . "\r\n";
Expand Down Expand Up @@ -132,4 +169,19 @@ public static function internalTimeToSrt($internal_time)

return sprintf("%02d:%02d:%02d,%03d", $hours, $minutes, $remaining_seconds, $milliseconds);
}

protected static function fixLine($line)
{
$speaker = null;
// Check if line is in {speaker: line} format
$hasSpeaker = preg_match('/(.*): (.*)/', $line, $matches);
if ($hasSpeaker) {
$speaker = $matches[1];
$line = $matches[2];
}

// html
$line = strip_tags($line);
return array($line, $speaker);
}
}
60 changes: 51 additions & 9 deletions src/Code/Converters/VttConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public function fileContentToInternalFormat($file_content)
$internal_format[$i]['start'] = self::vttTimeToInternal($parts['start']);
$internal_format[$i]['end'] = self::vttTimeToInternal($parts['end']);
$internal_format[$i]['lines'] = [];
$internal_format[$i]['speakers'] = [];

// styles
preg_match('/((?:\d{1,2}:){1,2}\d{2}\.\d{1,3})\s+-->\s+((?:\d{1,2}:){1,2}\d{2}\.\d{1,3}) *(.*)/', $line, $matches);
Expand All @@ -54,23 +55,59 @@ public function fileContentToInternalFormat($file_content)
}
}
} elseif ($parts['text']) {
$internal_format[$i]['lines'][] = self::fixLine($parts['text']);
list($linePart, $speakerPart) = self::fixLine($parts['text']);
$internal_format[$i]['lines'][] = $linePart;
$internal_format[$i]['speakers'][] = $speakerPart;
}

$last_line_was_empty = trim($line) === '';
}

// cleanup speakers, for example ['speaker1', null, null] to ['speaker1']
foreach ($internal_format as $key => $internal_format_parts) {
if (isset($internal_format_parts['speakers'])) {
$hasSpeaker = false;
$empty_speakers_keys = [];
// Check speakers if they are null
foreach ($internal_format_parts['speakers'] as $speaker_key => $speaker) {
if ($speaker) {
$empty_speakers_keys = [];
$hasSpeaker = true;
} else {
$empty_speakers_keys[] = $speaker_key;
}
}
// Remove speakers key for that time if all speakers are empty or remove empty ones
if (!$hasSpeaker) {
unset($internal_format[$key]['speakers']);
} elseif ($empty_speakers_keys) {
foreach ($empty_speakers_keys as $empty_speaker_key) {
unset($internal_format[$key]['speakers'][$empty_speaker_key]);
}
}
}
}
return $internal_format;
}

public function internalFormatToFileContent(array $internal_format)
{
$file_content = "WEBVTT\r\n\r\n";

foreach ($internal_format as $k => $block) {
$start = static::internalTimeToVtt($block['start']);
$end = static::internalTimeToVtt($block['end']);
$lines = implode("\r\n", $block['lines']);
$lines_array = [];
foreach ($block['lines'] as $key => $line) {
$speaker = '';
// if speakers is set
if (isset($block['speakers'][$key]) && $block['speakers'][$key]) {
// create <v speaker>Line</v>
$speaker = '<v ' . $block['speakers'][$key] . '>';
$lines_array[] = $speaker . $line . '</v>';
} else {
$lines_array[] = $line;
}
}
$lines = implode("\r\n", $lines_array);

$vtt_cue_settings = '';
if (isset($block['vtt_cue_settings'])) {
Expand All @@ -92,9 +129,9 @@ protected static function vttTimeToInternal($vtt_time)
{
$corrected_time = str_replace(',', '.', $vtt_time);
$parts = explode('.', $corrected_time);

// parts[0] could be mm:ss or hh:mm:ss format -> always use hh:mm:ss
$parts[0] = substr_count($parts[0], ':') == 2 ? $parts[0] : '00:'.$parts[0];
$parts[0] = substr_count($parts[0], ':') == 2 ? $parts[0] : '00:' . $parts[0];

$only_seconds = strtotime("1970-01-01 {$parts[0]} UTC");
$milliseconds = (float)('0.' . $parts[1]);
Expand All @@ -117,15 +154,20 @@ protected static function internalTimeToVtt($internal_time)

protected static function fixLine($line)
{
// speaker
$speaker = null;
// Remove <v speaker>
if (substr($line, 0, 3) == '<v ') {
// Remove <v
$line = substr($line, 3);
$line = str_replace('>', ': ', $line);
// Get speaker
$speaker = substr($line, 0, strpos($line, '>'));
// Remove speaker>
$line = str_replace($speaker . '>', '', $line);
}

// html
$line = strip_tags($line);

return $line;
return array($line, $speaker);
}
}
32 changes: 29 additions & 3 deletions src/Subtitles.php
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,40 @@ public function add($start, $end, $text, $settings = [])
$internal_format = [
'start' => $start,
'end' => $end,
'lines' => is_array($text) ? $text : [$text],
'lines' => is_array($text) ? array_values($text) : [$text], // array_values removes possible speakers from keys
];

$speakers = [];
// Check if $text is an array containing speakers
if (is_array($text) && !array_is_list($text)) {
foreach ($text as $key => $textItem) {
$speakers[] = $key;
}
// Cleanup speakers[], remove all empty items at the end of array so ['speaker1', null, null] became ['speaker1']
$empty_speakers_keys = [];
foreach ($speakers as $key => $speaker) {
if ($speaker && !is_int($speaker)) {
$empty_speakers_keys = [];
} else {
$empty_speakers_keys[] = $key;
}
}
if ($empty_speakers_keys) {
foreach ($empty_speakers_keys as $empty_speaker_key) {
unset($speakers[$empty_speaker_key]);
}
}
}

if (isset($settings['vtt_cue_settings']) && $settings['vtt_cue_settings']) {
$internal_format['vtt_cue_settings'] = $settings['vtt_cue_settings'];
}

// Finally push speakers to internal format if there is any
if ($speakers) {
$internal_format['speakers'] = $speakers;
}

$this->internal_format[] = $internal_format;
$this->sortInternalFormat();

Expand Down Expand Up @@ -283,7 +310,6 @@ public static function loadFromString($string, $format = null)
$internal_format[$i] = $row;
$i++;
}

}

// fix up to a 60 seconds time overlap
Expand Down Expand Up @@ -338,4 +364,4 @@ public static function loadFromString($string, $format = null)

return $converter;
}
}
}
25 changes: 25 additions & 0 deletions test_helpers/AdditionalAssertionsTrait.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,31 @@ public function assertInternalFormatsEqual($expected, $actual, $allowable_error
foreach ($expected[$k]['lines'] as $line_k => $line) {
$this->assertEquals($line, $actual[$k]['lines'][$line_k]);
}

// We should check if speakers is supported for format first.
if (isset($expected[$k]['speakers']) || isset($actual[$k]['speakers'])) {
// Check if speakers key is available for both expected and actual
$this->assertArrayHasKey('speakers', $expected[$k], "Expected Array doesn't contains 'speakers' as key");
$this->assertArrayHasKey('speakers', $actual[$k], "Actual Array doesn't contains 'speakers' as key");
// Compare expected and actual
if (isset($expected[$k]['speakers']) && isset($actual[$k]['speakers'])) {
$this->assertEquals(count($expected[$k]['speakers']), count($actual[$k]['speakers']), 'Speaker count is different');
foreach ($expected[$k]['speakers'] as $speaker_k => $speaker) {
$this->assertEquals($speaker, $actual[$k]['speakers'][$speaker_k]);
}
}
}

// We should check if vtt_cue_settings is supported for format first.
if (isset($expected[$k]['vtt_cue_settings']) || isset($actual[$k]['vtt_cue_settings'])) {
// Check if vtt_cue_settings key is available for both expected and actual
$this->assertArrayHasKey('vtt_cue_settings', $expected[$k], "Expected Array doesn't contains 'vtt_cue_settings' as key");
$this->assertArrayHasKey('vtt_cue_settings', $actual[$k], "Actual Array doesn't contains 'vtt_cue_settings' as key");
// Compare expected and actual
if (isset($expected[$k]['vtt_cue_settings']) && isset($actual[$k]['vtt_cue_settings'])) {
$this->assertEquals($expected[$k]['vtt_cue_settings'], $actual[$k]['vtt_cue_settings'], 'vtt_cue_settings is different');
}
}
}
}

Expand Down
19 changes: 19 additions & 0 deletions tests/files/srt_to_vtt.srt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
1
00:02:17,400 --> 00:02:20,400
line 1
speaker2: line 2

2
01:02:20,500 --> 01:02:22,500
speaker1: line 3
line 4

3
02:02:20,500 --> 02:02:22,500
speaker2: line 5
speaker1: line 6

4
03:02:20,500 --> 03:02:22,500
line 7
line 8
17 changes: 17 additions & 0 deletions tests/files/srt_to_vtt.vtt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
WEBVTT

00:02:17.400 --> 00:02:20.400
line 1
<v speaker2>line 2</v>

01:02:20.500 --> 01:02:22.500
<v speaker1>line 3</v>
line 4

02:02:20.500 --> 02:02:22.500
<v speaker2>line 5</v>
<v speaker1>line 6</v>

03:02:20.500 --> 03:02:22.500
line 7
line 8
27 changes: 27 additions & 0 deletions tests/files/srt_with_name.srt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
1
00:00:09,000 --> 00:00:11,000
speaker1: Line 1

2
00:00:12,000 --> 00:00:13,000
Line 2

3
00:00:14,000 --> 00:00:15,000
speaker1: Line 3
speaker2: Line 4

4
00:00:16,000 --> 00:00:17,000
Line 5
Line 6

5
00:00:18,000 --> 00:00:19,000
speaker1: Line 7
Line 8

6
00:00:20,000 --> 00:00:21,000
Line 9
speaker2: Line 10
5 changes: 5 additions & 0 deletions tests/files/vtt2.vtt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
WEBVTT

Cue
00:00:00.400 --> 00:00:00.900 something
<v speaker1>a</v>
Loading