Skip to content

Commit

Permalink
minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
georgiewellock committed Jul 10, 2019
1 parent 116b8b3 commit 7c7c1a7
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 9 deletions.
2 changes: 1 addition & 1 deletion build/lib/vttformatter/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.03'
__version__ = '2.10'
17 changes: 13 additions & 4 deletions build/lib/vttformatter/vttformatter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
__version__ = '1.0'
import numpy as np
import os
import re
Expand Down Expand Up @@ -118,14 +117,15 @@ def format_text(self):
#initialise a counter to run while it remains less than the length of the message list
i=0
while i < len(part_messages)-2:
#print(i, flush = True)
#check to see if the start and stop times for subsequent messages are the same, if not append the message to full_messages and increase the counter to check the next line
if x[0,i+1] != x[1,i]:
full_messages.append(x[2,i])
i+=1
#if the start and stop times are the same initialise an empty string and loop over messages from that point and append them to the string until the start and stop times are no longer consistent
else:
sentence = ''
while x[0,i+1] == x[1,i]:
while x[0,i+1] == x[1,i] and i+1 < len(part_messages)-2:
sentence = sentence + x[2,i] + ' '
i+=1
sentence = sentence + x[2,i]
Expand All @@ -135,12 +135,21 @@ def format_text(self):
#check the last 2 elements of the partial message list and append them to full_messages
if x[0,-1] == x[1,-2]:
end = x[2,-2] + ' ' + x[2,-1]
full_messages.append(end)
if x[1,-2] == x[1,-3]:
full_messages[-1] = full_messages[-1] + ' ' + end
else:
full_messages.append(end)

elif x[1,-2] == x[1,-3]:
full_messages[-1] = full_messages[-1] + ' ' + x[2,-2]
full_messages.append(x[2,-1])

else:
full_messages.append(x[2,-2])
full_messages.append(x[2,-1])
#return the list with all the fully combined messages
return part_messages, full_messages
self.full_messages = full_messages
return part_messages, self.full_messages

def reformat_vtt(self):
"""create a new .txt file with the same nane as the original .vtt and write each line in the list containing full messages to the file separated by a blank line. """
Expand Down
Binary file added dist/vttformatter-2.0-py3-none-any.whl
Binary file not shown.
Binary file added dist/vttformatter-2.0.tar.gz
Binary file not shown.
Binary file added dist/vttformatter-2.10-py3-none-any.whl
Binary file not shown.
Binary file added dist/vttformatter-2.10.tar.gz
Binary file not shown.
15 changes: 13 additions & 2 deletions vttformatter.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
Metadata-Version: 2.1
Name: vttformatter
Version: 1.3
Version: 2.10
Summary: WEBVTT to text converter
Home-page: https://github.com/georgiewellock/VTT_formatter
Author: Georgina L. Wellock
Author-email: [email protected]
License: MIT
Download-URL: https://github.com/georgiewellock/VTT_formatter/archive/1.03.tar.gz
Download-URL: https://github.com/georgiewellock/VTT_formatter/archive/2.10.tar.gz
Description: # VttFormatter

Converts WEBVTT files into text removing timestamps and identifiers and formatting the text into paragraphs.

`VTT_formatter` is a python package that can be executed using python in the command line or through an interface such as a [Jupyter Notebook](https://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html) either locally on a machine or using [Azure Notebooks](https://notebooks.azure.com/#).

Full instructions on using `VTT_formatter` in a Jupyter Notebook, on either Azure Notebooks, or locally using Anaconda can be found on the [wiki](https://github.com/georgiewellock/VTT_formatter/wiki/VTT_formatter-using-Jupyter-Notebooks).

## Example Input/Output

### Input
Expand Down Expand Up @@ -76,6 +80,13 @@ Description: # VttFormatter
it is crackling. It will still be recording the audio.
```

## Simple useage

The screenshot belows shows the simple implementation of the VTT formatter in a jupyter notebook. This will read in the file defined and create a new `.txt` file in the same directory as the original.
<img src="https://github.com/georgiewellock/VTT_formatter/blob/master//figures/example_usage.png"/>

Further information can be found in the notebook [here](https://github.com/georgiewellock/VTT_formatter/blob/master/VTT_formatter.ipynb)

## Installation

The simplest way to install this vttformatter is to use `pip` to install from [PyPI](https://pypi.org/project/vttformatter/)
Expand Down
11 changes: 9 additions & 2 deletions vttformatter/vttformatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ def format_text(self):
else:
sentence = ''
while x[0,i+1] == x[1,i] and i+1 < len(part_messages)-2:
print(i, flush = True)
sentence = sentence + x[2,i] + ' '
i+=1
sentence = sentence + x[2,i]
Expand All @@ -136,7 +135,15 @@ def format_text(self):
#check the last 2 elements of the partial message list and append them to full_messages
if x[0,-1] == x[1,-2]:
end = x[2,-2] + ' ' + x[2,-1]
full_messages.append(end)
if x[1,-2] == x[1,-3]:
full_messages[-1] = full_messages[-1] + ' ' + end
else:
full_messages.append(end)

elif x[1,-2] == x[1,-3]:
full_messages[-1] = full_messages[-1] + ' ' + x[2,-2]
full_messages.append(x[2,-1])

else:
full_messages.append(x[2,-2])
full_messages.append(x[2,-1])
Expand Down

0 comments on commit 7c7c1a7

Please sign in to comment.