Skip to content

Commit

Permalink
updated userguide notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
georgiewellock committed Jul 8, 2019
1 parent 181148a commit d3c6baf
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 39 deletions.
118 changes: 85 additions & 33 deletions VTT_formatter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"metadata": {},
"outputs": [],
"source": [
"from vttformatter import VttFormatter"
"from vttformatter.vttformatter import VttFormatter"
]
},
{
Expand All @@ -15,6 +15,8 @@
"metadata": {},
"outputs": [],
"source": [
"# define the path to the original WEBVTT file\n",
"# the updated .txt file will be saved in the same directory as the original\n",
"filename = 'test_AutoGeneratedCaption.vtt'"
]
},
Expand All @@ -24,7 +26,9 @@
"metadata": {},
"outputs": [],
"source": [
"# create a VttFormatter object\n",
"vtt = VttFormatter(filename)\n",
"# use this method to automatically reformat the .vtt and save the new file\n",
"vtt.reformat_vtt()"
]
},
Expand All @@ -34,41 +38,89 @@
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
"data": {
"text/plain": [
"{'messages': [{'confidence': '0.69450831413269',\n",
" 'marker': 'ef04c7c2-a59e-463f-9d27-b5b1259d6777',\n",
" 'start': '00:00:03.300',\n",
" 'stop': '00:00:06.870',\n",
" 'content': ['Sorry.']},\n",
" {'confidence': '0.621036410331726',\n",
" 'marker': '8a017ebb-1722-4e7f-8984-fc6da39c3489',\n",
" 'start': '00:00:08.100',\n",
" 'stop': '00:00:09.620',\n",
" 'content': ['Yes.']},\n",
" {'confidence': '0.713402450084686',\n",
" 'marker': 'd9a1567a-1ebe-40ce-983a-98436bcabcfe',\n",
" 'start': '00:00:19.240',\n",
" 'stop': '00:00:20.240',\n",
" 'content': ['Picking up your feet.']},\n",
" {'confidence': '0.536461710929871',\n",
" 'marker': 'b8e0fa64-8c2f-4070-9b0f-922a50f3fcde',\n",
" 'start': '00:00:21.930',\n",
" 'stop': '00:00:23.490',\n",
" 'content': ['Yeah.']},\n",
" {'confidence': '0.889019846916199',\n",
" 'marker': '88910870-8af9-48f5-bcc4-a501eda95d3f',\n",
" 'start': '00:00:24.670',\n",
" 'stop': '00:00:28.778',\n",
" 'content': ['But now my headphones or what',\n",
" 'the boy, I could it still yeah']},\n",
" {'confidence': '0.889019846916199',\n",
" 'marker': '7d633414-089b-4813-9617-9533f5f215c0',\n",
" 'start': '00:00:28.778',\n",
" 'stop': '00:00:32.570',\n",
" 'content': [\"well. I mean, it's it. It will\",\n",
" 'still be recording the audio.']},\n",
" {'confidence': '0.889019846916199',\n",
" 'marker': '3acc42cf-56d8-4d8e-938d-6b809966d6cf',\n",
" 'start': '00:00:32.570',\n",
" 'stop': '00:00:35.730',\n",
" 'content': [\"It's just not playing in\", 'through the speakers to see']},\n",
" {'confidence': '0.889019846916199',\n",
" 'marker': '8bb57fe4-63c7-4178-8c99-7ec7c17b1867',\n",
" 'start': '00:00:35.730',\n",
" 'stop': '00:00:39.206',\n",
" 'content': ['something in there, if you', 'click. The 3:00 dots. I saw']},\n",
" {'confidence': '0.889019846916199',\n",
" 'marker': '7bce27fa-6c7e-40e8-b94d-4def0c04d680',\n",
" 'start': '00:00:39.206',\n",
" 'stop': '00:00:42.366',\n",
" 'content': ['something that said turn off',\n",
" 'incoming video so that would']},\n",
" {'confidence': '0.889019846916199',\n",
" 'marker': 'f7327f7c-0534-47e9-a489-4d36af0b82ae',\n",
" 'start': '00:00:42.366',\n",
" 'stop': '00:00:42.998',\n",
" 'content': ['turn off.']},\n",
" {'confidence': '0.740396022796631',\n",
" 'marker': '7d21c661-cd53-4978-a3c4-e5fe669d5d1c',\n",
" 'start': '00:00:44.400',\n",
" 'stop': '00:00:49.953',\n",
" 'content': [\"10s of me, so I don't\", 'know if that.']},\n",
" {'confidence': '0.809084713459015',\n",
" 'marker': '1302ebc0-09a6-4196-b619-316fe067d58f',\n",
" 'start': '00:00:52.370',\n",
" 'stop': '00:00:59.318',\n",
" 'content': ['Yes, yeah, yeah, so you', 'can actually control how']},\n",
" {'confidence': '0.809084713459015',\n",
" 'marker': '73dd947f-3bff-4ee6-ab8d-2da524fcdb6c',\n",
" 'start': '00:00:59.318',\n",
" 'stop': '00:01:05.494',\n",
" 'content': ['you want the recording to', 'go you can.']}],\n",
" 'duration': '00:01:05.5380000\"',\n",
" 'language': 'en-us'}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import re\n",
"string = 'A1B2'\n",
"pattern = re.compile(\"^([A-Z][0-9]+)+$\")\n",
"if pattern.match(string):\n",
" print(True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
}
],
"source": [
"st = 'NOTE Confidence: 0.69450831413269'\n",
"s = st.split(':')[1].strip()\n",
"s\n",
"pattern = re.compile(r'[0-1]{1}.[0-9]{14}')\n",
"if pattern.match(s):\n",
" print(True)"
"# this code creates a dictionary for each element in the original file\n",
"# the dictionary is stored as an attribute which can be accessed\n",
"vtt.data_dict"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion build/lib/vttformatter/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.02'
__version__ = '1.03'
Binary file added dist/vttformatter-1.3-py3-none-any.whl
Binary file not shown.
Binary file added dist/vttformatter-1.3.tar.gz
Binary file not shown.
4 changes: 2 additions & 2 deletions vttformatter.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
Metadata-Version: 2.1
Name: vttformatter
Version: 1.2
Version: 1.3
Summary: WEBVTT to text converter
Home-page: https://github.com/georgiewellock/VTT_formatter
Author: Georgina L. Wellock
Author-email: [email protected]
License: MIT
Download-URL: https://github.com/georgiewellock/VTT_formatter/archive/1.02.tar.gz
Download-URL: https://github.com/georgiewellock/VTT_formatter/archive/1.03.tar.gz
Description: # VttFormatter

Converts WEBVTT files into text removing timestamps and identifiers and formatting the text into paragraphs.
Expand Down
2 changes: 0 additions & 2 deletions vttformatter.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
numpy
re
ruamel.yaml
3 changes: 2 additions & 1 deletion vttformatter/vttformatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ def format_text(self):
full_messages.append(x[2,-2])
full_messages.append(x[2,-1])
#return the list with all the fully combined messages
return part_messages, full_messages
self.full_messages = full_messages
return part_messages, self.full_messages

def reformat_vtt(self):
"""create a new .txt file with the same nane as the original .vtt and write each line in the list containing full messages to the file separated by a blank line. """
Expand Down

0 comments on commit d3c6baf

Please sign in to comment.