From 7c7c1a787e89af92538d4add993e71e57d6f6a40 Mon Sep 17 00:00:00 2001 From: Georgie Date: Wed, 10 Jul 2019 14:40:14 +0100 Subject: [PATCH] minor updates --- build/lib/vttformatter/__init__.py | 2 +- build/lib/vttformatter/vttformatter.py | 17 +++++++++++++---- dist/vttformatter-2.0-py3-none-any.whl | Bin 0 -> 5853 bytes dist/vttformatter-2.0.tar.gz | Bin 0 -> 4892 bytes dist/vttformatter-2.10-py3-none-any.whl | Bin 0 -> 6171 bytes dist/vttformatter-2.10.tar.gz | Bin 0 -> 5219 bytes vttformatter.egg-info/PKG-INFO | 15 +++++++++++++-- vttformatter/vttformatter.py | 11 +++++++++-- 8 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 dist/vttformatter-2.0-py3-none-any.whl create mode 100644 dist/vttformatter-2.0.tar.gz create mode 100644 dist/vttformatter-2.10-py3-none-any.whl create mode 100644 dist/vttformatter-2.10.tar.gz diff --git a/build/lib/vttformatter/__init__.py b/build/lib/vttformatter/__init__.py index 69ad71f..aeaa49b 100644 --- a/build/lib/vttformatter/__init__.py +++ b/build/lib/vttformatter/__init__.py @@ -1 +1 @@ -__version__ = '1.03' +__version__ = '2.10' diff --git a/build/lib/vttformatter/vttformatter.py b/build/lib/vttformatter/vttformatter.py index 553b299..8b06dba 100644 --- a/build/lib/vttformatter/vttformatter.py +++ b/build/lib/vttformatter/vttformatter.py @@ -1,4 +1,3 @@ -__version__ = '1.0' import numpy as np import os import re @@ -118,6 +117,7 @@ def format_text(self): #initialise a counter to run while it remains less than the length of the message list i=0 while i < len(part_messages)-2: + #print(i, flush = True) #check to see if the start and stop times for subsequent messages are the same, if not append the message to full_messages and increase the counter to check the next line if x[0,i+1] != x[1,i]: full_messages.append(x[2,i]) @@ -125,7 +125,7 @@ def format_text(self): #if the start and stop times are the same initialise an empty string and loop over messages from that point and append them to the string until the start and stop times are no longer consistent else: sentence = '' - while x[0,i+1] == x[1,i]: + while x[0,i+1] == x[1,i] and i+1 < len(part_messages)-2: sentence = sentence + x[2,i] + ' ' i+=1 sentence = sentence + x[2,i] @@ -135,12 +135,21 @@ def format_text(self): #check the last 2 elements of the partial message list and append them to full_messages if x[0,-1] == x[1,-2]: end = x[2,-2] + ' ' + x[2,-1] - full_messages.append(end) + if x[1,-2] == x[1,-3]: + full_messages[-1] = full_messages[-1] + ' ' + end + else: + full_messages.append(end) + + elif x[1,-2] == x[1,-3]: + full_messages[-1] = full_messages[-1] + ' ' + x[2,-2] + full_messages.append(x[2,-1]) + else: full_messages.append(x[2,-2]) full_messages.append(x[2,-1]) #return the list with all the fully combined messages - return part_messages, full_messages + self.full_messages = full_messages + return part_messages, self.full_messages def reformat_vtt(self): """create a new .txt file with the same nane as the original .vtt and write each line in the list containing full messages to the file separated by a blank line. """ diff --git a/dist/vttformatter-2.0-py3-none-any.whl b/dist/vttformatter-2.0-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..3cc51e4e5a2ca6ba79c1074791524e9b7571c8e7 GIT binary patch literal 5853 zcmaKwbx@S;`p1_gr6iZ`&RuG0>4qgGq@){`lu}ASq(d46Bofc$vg70`aMlNn^P@mlshOiur@K1s2^l!^7NlYCyT>x?3snY>EpGC6v8cI zR>39rbQ)#QhJbQxjIPu`YxT3$a8)!Oa+O!9$;NBdSEdIsbJKG0)sS#A>vfp(iT_|< z`G}{E_R;ghy~o;)={TY(CsEOopFR4aO)0(Fs0po~t7(iX8khZBVNsS>u5spyin z!1ZW7lN2x49r2`^`?^1li`Q;c&whN4;Rq4@>Q(pE3ZsK%NYmOUJWtPYY>PS1OeUx_ z`a8rKU*&plzK($Ii<8~?qSe?ptSrG4BnwtaRAQoV!}9y-zL0>zjiKNOvD@DPWw4pOi-uRkRCv{{DRij$DUJ)fcwUX|K^ zvU%TIOwU`^}rWSPcebV^TSqR+Z!FubLbZo$39%;gbpu0S4hci($K-y z(sipU7|2FewtLUv5_oID51r1oboMHZHyrG-^aA=Wz;NY7VVHas{8_c3Ql@-_)M95*P#AAZ%bTlUky(! z$ibQBvLchQ9w~-d);HMoXvb;<)IMf{B0f${Hx(L&_n{$8$Z3lqqnzQwGW@3tOXHY> zyEDBZD~5$~ZmK5J3lo}68tC~&`^V~DRn9^GTXJM+K5VeG2V?9N!WrJKaJYu!C^T>YIN69N0<4CF?A8g~>GI26=d{U}%=A=t6aVus~(-)Avcv67+ zle+%&e)JXnCHgB(b;|0vM7kb-!~k-4;p=&1{TW^T%+IY0x8#|_#kr-LE2=T(7mGPS zZ$N5KeKh@58pRjDaWCDITWWa==!?)d-LzQH8?3@54Z0*l8KYLNpr>L%?M(R<$lYo0FP1f#adxf{!rxQj4t;P9M zV?L;$vpQSw9J?qI^T%EpCx1}K32Jj`0|t)I5sjR%UfakgB}$6Md2|-?^HNrul=q{N zNr-EiDk-PQi34AHX+;8`*Ata+u4=o69bQ^`4`W!NhiQxN*_oGQFH){()^!l$joEuFkO|H^*FeaHOCfTQK-aD~!X|JbxMc7;n=W zv?XYQ5E-0^ilAXj5)sVM8PAgmGJ3%&FgjkO?eYq?rd$D&C)xuTTPdc>Pk%)6_X>Og zQo5mMVM!!CEi7NKv5YrvYZ#2j_r6>$mOIJ*L4!Wgm*0Qh60m&H2i4q;w8T~UX{snb zYFiF$29h>KJA+sXM2SG@!2^z1((Yy}onS~?sr7>6S#?Xs$$bIcD6`ZW4)b#-ZR<)S zAMl_kF0IHaRu+V+`pGg@iAIDzhraL^D$AU2xa(|(9}DYf4x zlH0JhfvQ!#r$2Pqm1E&7tKE-B7nKkkfXgC=oz}XH^+dC5#ilodOV8**t_>(nR3y7A z&tNlGeHq*6qV^9k1f#Yg`6q&UpbSLB;+kz&2wiYb-EkdmYa-%Znd)$ZtZAIC?#z+J9jpy{+;3IfIlk!h~(4 zILs;yV)0N8LwvruE08@mL5jLFIWqetTYxgf6W3V|VIIR-iM5b$IXN~O1?*?`vimbBxeN}rzZjP<_HSjxO>_4W%rdQ=XruoBU#C|Hq_6yryuD4p4 zqg^rfBqO(j{`M_8cfO@Vx!g1g2mt7y0RW7@`4%32UI?$XgQpjdgR89@SY1U<0j8^9 zs-qS^2O((tZV)4~uZsNU?&QYG_gLstqE)H~ZWM#7T#Th|?QNSt$ihBILH6{TRsfn4 zUMHB)+=Jiw5fvjIH>gyqJ?3Cm!PlO4Wp@j_^w{CX*GmrXirCp@>HDUzcdCy>BSAJ@ z2m1D&{RJ3Ky~{8M4e7GyxkME-m85k6BIP7oIzGptVE%Y8?F#QAbhJ-Mr5yC6#w>ShufJa)b5Q7!k5iICRB4OK$ki|_3LaZPVjQJwc_I#uw?3ZaIk*(=`3nkH*-A-T8z z)Tu8pU*g@aX z3Lq{B9>Sjl`c{R}Rre7QNU8CZVW-%tmk*j?s}C>^3ukbrLV7!8S9Vh`-w{r+)Axv{ zl91-V4~H3$pEM2?eP)T_DdfQN&KHEUH$wRr)=lIZkws-_*nI@`3Ei?xv4+tfB^Y=% zr6sHYQ!lqf-T8Wi?WL3Dl^-@vcr@}?>iDrjM9C1bqn_?-vgQsW8)6|b6<0dyy4V(D zwC4+GAAMNZm13ydCzT&2BtKzINE)1MnzLqShQUPe#m zy}py%Je1^S#9XG(oRU82yqdOW`h0=XDLK#E7`IzXmxQG%QT|n7wqyIVv=nM_=`rXm zuq%<{WYP0r&7{pHlSH0)iz)eKv}-u(jM7%24Oh!V6G=78BY~*6#enUbu@CZbTt9ZY zC*rg4rh!i1OszPZXp(KeJ0bZ?*hte$>kg(Is}K$4kza}mO!d_JUe-8G3x=BUbbLw+ zD`5*O4>wsc@X=K|q9O~*^cgv%o+es+2j74mIIp?Gy!MH{;u~z^mAunkzS{Q>%Z=VG zCoF_NXbJFid9NR<*s$2X za}O;bJ&&7S#{fM~`f#>BM6Eb`%VdZ2AmRfZp?|QHy zYZ)*v2HtI@-|6E=vi8jj1^4h|x5%={8pxUX0u9)5>q9lk8wLfBmJh8LkqPCL>OYOP za2?Ii9E(C!7@)aScF1uxIjNH_`Vj)lA~NK2416Qop{UhLuWCl+?7)ogo%~#@%qnB+ zqc{s=rYszz}^_i;Fw zBtYI+q}T>LDcxQ2NehvL_iEU)G|yYDMkbO0l)$m8BoZKs<1}|!>en06V{eA~#}hgX zNP0wBM0q~%AxFQ^i+v;awMCI+qM_{$c2Z<^r8p?s&T#ILI*_kK`&jk*(hOwnu*#HD zGQ_1qEy{tXDxKg9OAR2{qNi7@z*T#+M~K#z6<$y9+%?5-8?5-X`Ks7q^Qwbo}t>Zi8Atb){K?}nMI`3R;&w}EH` zQ4>2_7?>uZl-B|+JtV6If^L}{{H*eMBy#6(wUQa}yZYj(!NVTGUn5}{iJsLqS0>((wl*z^Z0=5Gs%}(Hb}`6w>(AAcvcl;mjj}^)%Q)%+=C@2vY{!J+@Fe zU(@?qBj?A>QM+{0dC$hp>_!PRc^AJJHepEO$>@?vGKoqtY!j?04j?LAzNEC^#9EXi zTy`1#zJI)_^AZ%)Mh%|9{BkqvNp{4W*VZnJ0qX4mm3(OE7>SKFHUJs>X;S*`0e0hr z)-rh9h#M^1K(h!GQdG0*2tsKnieLBnE<<||mL1pEGv~|!f$GxGOm`a#Vg-GfCe)DC1-#d1h?p3rJiKIA}23%F`nQp-_#BA zHj1-0qnyW9L@Xezq!&}}Eggl2#cRo@)RkF_VVNF5lJ>>Y!qSB0)O2D1t8G}dk`LUW z7MYc$5KZg0|9wBdGu9Qt9@^NvJ|+W+E!3!kX5J(-F}O`j8&^%NqEOErflRckvQ!D4 zpDrIP_W*_J75Yy*O~zS22Z`xnV^@)F0`1-0^I-J2mET;_?cWa?75Y698U|F)4+1)` zv~of|&dcWRr5iD`V!fVG#lv-ri4gF+eeUULk~1=gO*hVN!PBLlA5lOf4-D*jhhF;p zjlO{$2-}O8to4hRb!+88M3)oO@WCu@gp?pDaPIO}9oF&e8P4>@GGOxafZ*ez5{@V0^ z9wi3K3JU5tqskh?NWA}KL^V2)NrB}J56xjU9#!ssaJ7!IYCkt`6d033XACkX0G$>< zu5VxujDnHp+|zjMJgUR}>eV{z9Gn^(8`vu9^*Zbb!Rg__QMDBadD^&f(JCXIaGL7e}`()Zz~(3002UF zUXbC>P#pz1O&$5CL(M*+5CCATb%}^O;$nIsBYNLqelSKKeol={#^ zX=K+kXh^i;`hxon+dz$T&%xu_EiG-SgcQ)+{XP0l=EpbRtU20U5KAP8z!~ov!9q#? zz|V>YDALC+fd{$AdIw>EYZVLn_HIF%S3>n|u&N4o?|?9`uOh}&ve?^3jPZ4T@#_)r&=!KQ~b zv{i6j_RT%iIVG<*hyR#z3FpBS66#Pq`Ya$`JeAh+X;YA?NtqFl=WXfsw+9K#3W45k z)G@6})S1uoJnrrAdZcawFy#(*C0uga-k~Pn>o0F2*{SztAt#@EoBG;lTHk7F07Ns2zzpj_w)uPT7O_g)nBf`_Lv&*j5C4NDtoLL1M@;wu#xserIo0Duo6>c3NdMc###zmopfh2d`!+P#0)^S`eT zzw>_gZGZEIasJ5ruZ#N~_`7=l29gu~5%_N{|4#c|Y=6`0|4jRb^nR!QuD1VDYf*5C c|5)RHw5O$pe&=)n0L;6^d?(m9f3*wXUr--5%>V!Z literal 0 HcmV?d00001 diff --git a/dist/vttformatter-2.0.tar.gz b/dist/vttformatter-2.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f8486a5ec6558e78e173b86bd723b4d48149ed69 GIT binary patch literal 4892 zcmb7`Ra+B)0zheyR60evyF=*`1V>2?hNBrB(hMXehqUBqknS2Gh?I19NsLYjw|oD= z{qFajhx2mY4|5y=K`u2L5DRVN=;dwU>+Nml=ILVX?QQERASx&<=xyzZaU%cdwIVrv zpCxwB?Ec%E9jKOFyq{3+Zd#TRvlCnga7pFEUU zlT?pYRQV^mtufEei(0KK^*%rynbbn0U%v(}P08jtr?<)gMTAV2j?-ll?Sc7NiKf#_Lu~_di5_$;c56H(I55OX}sI3jb}z(d)cuF#yCCf z4#M&t(&mnfe0i3MHyFmzFt3HAqL~a29CY$Si=JvxNA%f7&qQd<@T^4O#pO1g=>X;) zqwLfT&(nffbKl|smFM*0P~j=Bdb4#6eRRS&G+%o+0he{Yd8=4qa_DstnWT+$1#%!g zV1wSY^esL`>1}j(iL7ZRo6nm}hCiSN+VG?ib0W{1y!ja)cx9-b8*%f6=ES#uF-LGF z?IOp6Zfj=Lbv#VwJ5r*wL*(OCJ0Xcq^Mty#k38MoT#1CmKL{9;KXU<6WsdLS>jZ0=S z9qf8E%PMvf`UwjKmjXmg#e|0$)Sr5UgK}?fR|vT@$KU@vess$UM4o;Rk2vE!i5NrT=QC z7Reb!YF3(f=n20a8V!?e{r={l+O-ZhmzcVHk2E3JMVfa?dS1QAWBr)HZp9x7T!@q96KR>!J?CjY?<^PO9KA2YRvFEIAEG3 z!tV<|1f;r)Yp#szZwUfJb|^D1qyigRtwFiDs}ibNa_djtDk@!XfXh|Vpe-H>(opK? zUZwF*AGpdwYr(sB9#BGpUBZZ8oU=I3*QevwAlp3PUmTT4qetx+{?}8GP~>kpk}vM3 z%JRF?IW_99H}y%C?hdGn0Or@24NBpIn$*oyd7q78cZic7AxTTT^(YI(GD3D@|BqX+ z`DdBj{{BZQ96vKv-(|=5HoZC40z*^y*Qg5YYKzPx60a5@0s}xE#~!lx5kq%V2m6mO z;mmVGG)P0?=T+w@dmp)P2kWX%4E6F6q>O~pJTA(g%DzT)x@p>Np`}@u;iHZAVt?WR zBlSCTTP9DoX%dTSlw_0gOFlL^EK1<>huO;33?b}~GN83DId5TW6lJ}vs>mpR%m$mF zwg>yffDW=|yc7C!D%+GS@=1>qsT6I;Eg;5~6We*Kpbmv8Ul2iem|^^=pa>;w|Lfh; z{p0Fsp^R-oZLICHk2I>Wu7!>yE-!v4$##Rtu+F_y*OF-u_Ok2hR7N{1zW;zrjUG-M zx^9vV$N@kTJLQyb(!ihHwv7mg=I(^Ab`BZ*2ns@UY^R<9PlP%+`XU@|R!_*m z(R`~^(LgF&;^5i1YBGQT=gMYJxU=|BQkq6+z+wfuJLLAdbAO^~e7o={^a13M?f6gc zjDd3=jc2DfkA8{8>%*t!|BE*bE1EZY2}tB2 z;|Z)%{-Sw)q`{XFLb;czoYc^YD6%mHpTwlGCc+sF8UVM(PN_c)o6^JI%BcUhKyU{- z3ldJaIcrpI*EtY7|I?V#zNNo`AS^hJ~3n{ONg2Y=Zu$Xt?yF)Fp&r?Ki@FVFq6gcw!6^*DaQES@OWI^orQOxOs&O+j7K3WD#@9&#>i~^@4W=XJTzRunG|*{ zQ4D4g@8%~9Op}boPCQ6_VI--Xoiz4_OwU*Tv`ScpX@S)W<_o9Ft)XR~qV3I>*VNF1 zAJE}tJulprj|^x^kz*ANUu{dPc+08k@rx`rBqRj0<5mlE^JoQyw2GAVDL#{3M`h@* zZXloTFc_oc(f=MtL-7g=na1s`sL2@P(c>YUq1b2Oag#iB$a8L4&-3L1OaVuMl7D*h zp%+8@D^TpSP~yk#RTKk=$eUsVBL#`RGec{|pZ#RWKa*|?2^b}sse&qH+RBI%Ycu>Z zc$t=)lk=S8bq0Qt7NU>8L-jiQBBkLckxSbK>=?4xw#VnIKAqseUlb70pIjj*NPzbl z&O9HY7S`ymkbP#VBJ@J+4TsrDNz%Ri&MwQomWdDMfnQ)HJUZGgWUz8TC8R}65O-EQ(b ze+9qr%-X^ZD_>;7y(CvYsuL{O}e_83jx(OrH z?GS$4eL<}IRU|$kLuZ`5(k8BL3YJqeq!kb%3mNA4MiNwE<>;UYe_pn(dpToVM#_gU z7_4tLFOaf?(!(66*>l!x+$OQ-uUMW}fgHZ1?oLzZd=r2WiG84lAZ=0N18`sNzm6J^ zua}MdPI-UMxaIcD^`&M9nIgRz`v;=Rjpvh8Q!J=sI={E6tsU>?zCUp}dgbQ+$#F+x zrC;;DiW8bTK{<+*pLmi-K4OH$kL~IZRUYlz;3iMgS;Lb3HMq#KWqQ4uZ?@5*Bu_eT zpJLT<$;jk4$igx;P7%WUo=ea(eaq_wvzG?s)AM~N>$tUJu=+8_ zmZbUQr=yP+k{X2(`7*CZ+IeuwbCuk9BmYm^^A_(N2OBz<~*u4?J|wp-2w9gsa`7holsB7B@VfSr!B=1C2OfGeq?+1L#TN#YFh=z3jyU8*g4dTB9 z%j6o$SWO^hg*4{&=7uiHZWT(MZX3}RSM_&1XqU|VULECyE6&pSI*yVP?sWP61OxT1 z;6M~mfF|BdklhY8Tg8ldGxNz*Uw!jF@UiM=u3v|g=VDQ|goVz#5Fp6XW|P5ndotza zeCZD`t6XBr3WI!}*KB$7&%|%s;&Q7ZF=jRtUzdooM@KvY`Tea1y@t`|uHkhg`9T8&$DJB8K&(4BOekkwbhs&b!ut-zde!2wP@q`7&e-nn~Ndo4+-d0*m@S=Of7b?__~aS zFpDNT4mC>qKmevnSYdB!1TX*7b|Lw-jgi~+mj2nY6~ih;aGQ9??OcrzFG;3gCs_uk zcnyySdo9(|7D^r?t=?Zankk_Q7bInOyeg73bNxHWe%5Yik&-jRi?rK-;TMT7KNA zw51ja%kvz(+91}X64`PzVooy1OU>->|LrqZ2+ADSM~o#Y6J6<>wfSJxa&ZBCB=&op zjB%ivzoor4VVmCl{0AkBITTO%diEdQ(bK#jo|kDT0W?KuWoB?(cIBnzo!8zuTHCIa zY)>7@WW*bYLf7bV2D1&w?UhXoDD2YLi7S`Rwp)5j1gj8~?Az>=_bwQDRfUp3?oLW7 zNu7i~{ejcJEu6-`_RZyYPT${|QSyw!7!=|&B0z(ibVSsqY?=5P*^HJ;P@d4!t|7SClyoSCM^FBVSVEOFdpT=^+U9etqcjx3_Avvbc;3 zoCZ*~qm#^0kY@c0_rHfUZV4MFPYdoCt-JTN3W-lW2UP{1(5QWfaT(y2S?zKDZMgxVuZx!QEW~3=nM41O|5q!I=aP2_AyGOK{iV?(Uj|NA9ir zPEP9GclNHXuKwp&U+u5g?$t|O2>}rw005u>;2ic;=cJhHh#yykkK@Fy48c6GFH zcei%sFf+4rvU4{xV~2RXiRDmYV;&wHc@wKwrlT$}&cmy|y$KIc|6f|^BTbwdX3>fQ z0Gv|*0OWtten&qNGYlM^*ZFaOwzU03o|b$@vR_bM+jbem1N5Cmg1@MdNIUX9A$?*w z#!nWhslXhAv(dVYp`ef?(|TZ69?0rqdNX*E@~rE;A}%$8nFA|?nYzE$xz1O&U`w~6 z*7;;v_>*G%h@n5Lm1;Hk?COvV{+lf?_0^GI4+LN*i_Ma=OQRKnmLS?bk2yk|I>DLx zregero^D@Cl&g?nHFYOQy{ysYrDnp84MXj=w+$`dNXj@bM>5Vf6JDybU?Ljr`5J1@ zsJ;8XT)ob)UXyz42O*mBaaSMvCjQnpZ^ssdQT;?Ym*YbgB0Y3-qaBJ^Y3Pt_Zn}Yl zp<%mmzRxJ@XA+W>YNkU)Crts$myZ4XR@G}k`%}#HQPyb+`%^P+1ZH$YXBjYEP?lAvyt7II{Iu^)TcS3{!euLE)o1 zv2OkK*rgK|`<5Eu={I~TxE*4X<7k$M?NL`Z5+0^0{5XU6r`QFu*rBj5_)ybITur=8 zKW3_MmUzFoia4=S+RZ3Tx$0o51>)WAt2JVLvaR@0GXd~B?{{AjK6s(z+aYnLTp?Q_IhhC za6o3T(TNm*&Sj~lZ97g%eZm-y^zOp7D|GTJV)8ZjtlCsLLl{fo1O^U#g1J{AR%W>; z)gIqOXh3U|Y4L}v=^W&;u#ZNA-qL(f;QkF~GiLx9Bb70)jm^AH!ZSvaq%ZWspEWgGFiE%XA zc1@W(?&e*71eXR~gA|via44FD@O&2f)2ZpxdYB;M(CfTrhrcyXA&9 zKV6``+M+DgkVrG~P_=`1Xj!{8c=}Hc3@gQnUIgnn4?VG(sGQ$&kWyONp3K82o^sz@>hcjiV$=06=eu@ZHl7ML zY+%4?c>pR#n)1;kN@s%ba1U3p1PyZ94$ce4( zr^&Jvk+1h2y62t$c|-6Xu>Xa%=&(e(DomSgo>WBtkWn3B)|Cn%-;pGih(?++=qXpj z-x=f&Afmu?#d@10abY`T88*V;cbRk3Y5hdl%PKN=cXQZr#J+$b3Nr5r>8==Z0#a@(vYHJK z$Cgxev)S>D(xoYQF+L8oty1P#oJYf-~K{P`>yL4ix`@{NQFfD_*dp`mh8;+@CBH;F*Y?7D}B?@?O5$>nJKJR zMg>RL=XLi05roYwN!okcX76_O=5x;w2uwV0 zC_T8=?!R=A_1$*NK-!V7+&^}tTNILjWI>FOLl8}h^^oJ-&h*+r(fv(RJT1g@7&mk% z9(bR^8*A(TnFJzB;x z6R>i79Mb?yiDWw>ZM+XYQozYtzEA=%JV=eztxw!|Pm60*H3|XAx{EbMB${eTs%9%J z_vJ41xWcU0Xvs2dr1bhtlF#!)m1h);+*H{(jgqHU0=JtwKG_x!<&gu?=FZo^9m^^z zTu@VG1|Jeeo2QAFQog2x=I%Sjxf9c~Cz%0XvxHIE99ZM)kh_AGPjnFi)=g(lVR0iz z^2KoMa^%H!pK=zFDK9}1A{r5FezuGuLA-mcCQbTzcgQ1Pg@!vgAzV=BH~vG?rft3@ z{*dFmcbk@N`fm3zGw<6l%WUZp(Tkh0M(yTYM& z@)p%mUE$t^(wGr7;c@#*eooaA@MJ;K`goJPz6!_`wbB-yo7nU_k4}R%pk7^by)K!I z2`VIRc7px@q^bon(Nv0^Y;@v zaPg23Bz1mA!NZ>sTFo2R*bQv&KzI_1;a@1;8htbadTmRwy1&h_Y;O1CwYxOtEuMqp z^8J>82h3Bn9y^_~4Qc1V_L>7izW;2FKd{r5?FNC@M2x}_eoR`dsckj}4*C1@X(vc0 z&P&_4crJ&xPuCFrBffc8%`8ei4RSVq8!&ped6q1Np8t zvGUw{xp9m+@ zORet1Zi;cH7^%8NVE6>tUqV&&h|lT=b7$$I*>adrJhFKqjP?B7iyOw$^<%k3h-kgo zwQ*fiG;j6abcjA<+maBq1WZM33%PK03)o5|%E;@~Pq@}|muY&@a|#g^yd80KS(h@i z8{QNSj4r*^RMtYX7^S#eMC|aSW0Z>~Z=ICaiA&TbkhOgOLBnwgTw8&Q3SncJ>y!G- z=be+W7LlqBDZ>^Zo)hp9M6XeAD*Q@DZ56bQqC0l7aaMgF`2*?iJ9+v;Gv(bA06-QC z0HFPcovZ@VmXwjymi(e;?>sL{_+!|NE24EWK;e`w62D<_YpT1k5U)x#D6~8xtTUUJ zug}6JOmos~nMZm&p+S}m!Je2`EEYP_e7%6|zZM3YRDYn+Gwk&Qtx=sh*>>t#*(lTL zNWWd}7g@w=HoMEq-YcWdGHtzi4ONP!4fE#;>GSMK7JHE8UpU@*;#$+~?#-9x7&3bW zYO~^*hZOP$9ebH5d5Q-|jL7MXKr549g8baW4HcK3Y)n907tXz;LCw@@AAqtX%S4k7 z2>n4nm!K@f&V0NN?!NoY3HLRystfV&->>@g!P@0tW=>P8g*mm6(w3XA`8;9_;;{rC zTD-1zyP;itS|52JaSxg`VYS_@V(t>=&D_DCkCfU@{I*&zrCbhMap#KV=LHtq&)IwB zEdAbVNh)0 z7;-;FIpH7H2%j`vrgYg;LsHz4Z|b`wJ!6U2gh3EHB{bC=qG9h&rEz-}FHt7Ld<39l z@t$E-x_l@f-Te?Dq!?Vn+%GsL))}Sac3nzwZE^9Iw<+%IXX~;u&Yj21>AXu1hhXpM z3+su_Q3)4ADNCjlU`YC@XINPqS*2K+AK~3OBh^f5UJETbTsTF@`{~O-02du52!fm* z%=T);|CqoZ@F~`o9$)ywRRqm&7hTh}wAnXK6|yNFGJi)9*7(n!a^3AL%$AC6If(zb zpLL*00CN0@Q4~JQF8&+ED@YBwkAE%E^%}~pJ(H7j2FI;xin!DzCBOj6!QQ*iVQxIs z=P`c?zA6#8kv#Hw)*l3Gy8JUStis9p%5tIXmYzz3XaSSexl<*qkQQ0>wlM= zwP~=Y=b$l8Yon05nb?4_xu$Fa^ipSV_#lxb$H)GO7SnQu7`#gz+|8Xo=2M>I_Z6L( znEPnkK0F1yxbc`If-)YP?R@WW6akR#i{p5mP~78vQ0U+P0B?IMx?ATo4E?}`oSim4=-4* zbA5vZ@>|{`j1itLUI+}%nvw0T?zX!mjiI?O9T^( z(xZe9mEgAKl-*JUa#rKK@jk2VToH*9BV5#G()e!+xq{=b-z%6oDX-q@Y11oN5F10dv#b ze7y5eg+7*x$Z|#%_rQPP>ypih~2jFuZ^e?DUunnB)vHYHtJH z;@t%#*Gl?miE_z|(&bgSDq@=mLaRRr5uD!veivgLcTTnL@YQ1wYPw#HH2t&`OD|?= z+GSG^BwUIa{KQt%4b2K6jxyA1f=KPvDkI3u6q9Em!oghKS6@iKSTx-GW$;K}9N+zy zHe){adDOTQO=81~OZ1>uqsomb^w7t{|f=_p=ReO3B2VK!Vxm2x0Yz~bi7J6OCvjF5!?V+)O7 z3CAnW5<14KVQRnV=4x**sHcWiSN4sIA6Px*r$1>Z+_%Ftn7Tt6AL4Xjn51az;o(sw zHIMp!54z^yGQr12OPWle;mXF3MMn|;9Cj1X^sX+_Ir!%cx&u!TBBSQ_cGnx%3W_lV z_(hj6HAe6eLev)4Pzr|m_T;RO&642uD%=|T;{lnR* zCl3NCqmRg|42@y_J6WsLqJKQesnN?c=D>lvnIyWsl@QX3`wx9OdDk<{8`A zMC%{n7`tSh#$;qu9O_f9&}3v{R@vM{Q&6teWGvvF9vT=?no=43ys^3Vw|MSXeO!e> zt&hh#0P}G${X>23&JZ&PYfozjcK6rrEG$e4104M-tV{}QYrltnzo=_P3HAnX0D$nL z|E2y1swPNUO;g5qw9zw&6A-Z4Jd(hO>PjYz7(oSMi~u!4B!WIJghaf{Ki&}Xbou_y z`9^N|O--rRuz|QrLIsmof^vfhF^~zip+>uXufj7d#Yec>6|*JYJl7-FG-m}z1k#E$ z$OkSWI5D=}So&<5u7^6P+K1qS`EXfdvD#;TF8S>aWMXW;D5a9pTs{k{1;IL@Bb(MOVNMg{*>HbxWj+I{Y`*>V*k|Kf3Q_> i(vRK$y$gSS$bTzPT?y&Y;{yQ5k1x}sZWjoCt^NmkWd~OP literal 0 HcmV?d00001 diff --git a/dist/vttformatter-2.10.tar.gz b/dist/vttformatter-2.10.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4e4eb9d14fccace80e4a540c66eda94258e6dd3 GIT binary patch literal 5219 zcmaKw)msw|poT|CH%P}2kP;Xn-6bI)4HDAbj1pngC;?@J5`x4iLAtxUyAd`z2O_=m zyEvEUId|{#z29H(vV6wFgC25NV504ueZl7bV6cOykGnM(Z12M-#4iZq2V469PGnqt z3sf6!-=?9JDvR}CqL^48TNhfT)L+J(IWWJjoZ}nhU~?Mo!)ua0p4J|RZ2Gn^IFYHL z`n+qz!1gkDEjqcfXR;wyo+);8%C5ZXPKBA7dAXl1wzOhIGu|$BPL-i+Tx~%@3EhEq zhXn%QUPhuTfua4+Z=9U7O~_eEYt(|g(HtlKpV5_1f7?GkZMS|S9YtRRu603rX|o)l z=*T>Y+lYNWH01I+`(?TjG~Gp~yqr*L0L#!*E>5b^O#68q-SNVjTnT)dNo;LyNxYd3IZtPv;Z$v6@-19u zqN<5BoKzNgYI=@Ler2J{>WX-`F~uo7!qR1l>OI5s0aKXF?`2KjDayT*(kfA!q1~g9 zvB(`KU^inE$~n@fQ1%ian3;bKjBT27a$ZjDDN;M8>~AnOyS zi|JHx>C4xMkiDkaH#$FHD_f=zA1dy6)`B#l3rJTWK#+=y@t>|SG@A6~*(4T>G#ZFz z@VqnhjXLB$N8krtV<6@V5No8f2!wrRmt0}T8RbT)kx1q5couV+!9Wh8@xE5MIB3}{T0kVHo?Gi0_@(lZuy7QfmVgoDSe46!8c_ZhT zZ72FWE9odXEL)qTBULTF_3u;ticpdw$-)%4pEAG9hqtSeMP;rE7hKxe|DUYN^vcV@+%umm}#5#_nTcpj67us6FT_VW|JE7vh{4 zb2umZE$pANu_GZ9d$&kMatGrpcSl0c*(rxeiVZAx@s#tY4=xrg8mkB3A_jT*#TYF^ z?Ry0%e>90+k#+_0EtB>)Hb3iCrLSh|Ka*ZFNJJv3la$=0x&MgIzf5tSK5?A_%Qdkb z5yKYoyQRY9YP*u4IrSLAdSIQ_y+w1amss5oL0;{7@e89!z8wEVg zjixa(xZpF-cZ+*`VUblyiu-{~?4UH7wSMg1e+0Cj`vQ*7=(mM-LGcdabBfNF-fxruP%l)jM_iN6}bVg!z1;K{bL8NWxdHgEsv3!}9GZ#T)u zvd4jEE10?8SWSKzxc z9^`~neo1;kz1$>=8*9e{<+ov?vmSaDfAnZeOE)}2IZiD0l)nhX+aqgK0QaA}q+$&q zY3(Lgwr~cKwgC67pKVf#&d@!eA%X|o%iDY@M6iI^p5L7hs&4S2Y@shdz@|E*vD%6w zIxbuU4gn-$-fv``B;OY^U!kl^8|PN9GB96oGFEfgxNQl(jxBt{XM!_$7c6$mYO$8f ziK(%6bV-~bpQ-t)QNq1z$)A3KqtYB+s6Ko4+hWNxHmS+@#hf>#D~86bq4SqD6yGZs z*QU6W?ekiL7cGBh^`c?Hx zH}j+`6YLa8cD+rfECtx&H?c^eI=6>yHB~dM8C|`*c6j#mY=d|%JdiVPO(iq6r`X5s z+^;2GLAMv+i~ouhkb~Lyh5WIzF0N%y^G~1!I{H5JNVK+(+O+VOY@mD59i!0QmYZ)7 z^B^~e=QPvo{*%z@w%3w^&t!}#t^f(GT`Z*=I}xEg=641~8816fYLHCzZ`x(+ezB;E zB=@=s6+ky`gqPFtZy#JDoA=^YiERW{g_KLqe@X>^1Mon)%9m+a?X~lfWEeQ!YVH9C zD}ltZ{rn?;)?fc6C#4clCT~;1{`6)E88ylaB9HCo4*IwKLBR6qDnFH1FL@B88q$i- zTGoF>d{eoPGk%eb3f_Z!kPE&$xpZ7v2`YT3Bnc+ULC0v{^??M@Qqho=hcmW1jRiej zX5lxbT<)r#)4#Q8DIeo~)ChEzGn(~LpX5&x(yp@e8DRCGiSDqw1u1T0^H|2_Is+YiX>KS&}#2BgDY+)|WgBO9O6YvfjYM6|el49))vm8SQ( z$1vjI$bes{-?lyIHst%j)O4RZ3!_rs3}NRP-`OHUXgQIuzE~m7QfIAu=2N@>!2ZQ( z&VuTAvLNu5J;xu+?3ejmU(y_{#UcJi&_s{1JVtF1j?rkYk=|X|rt<2yxVd-{&BvDd zD9qf*KjoT+AZ279w%C2}ELr@_OQz7(!bsHlJ^K4g=aS{odD|1TUx+~kI%3OV?G(*Y zU@x?L5S$e$NDfCbAXj zdDwi%LSps^Nf?OW89=e)0ll!!B0RpY>r!3UF;ZUcT{g##?D~R@&T`vBeV3h27JiC$ixEUiiwF4~f2%G@Jozf1ZmgzKImt;*fpy*=>E=&P zGb!j6^__BYxW9ny^xOp3zx9W?fzL}6{Ric{+gr1*^*RJ1FFV#dpVB_-SN^s5SA~XK z9GzV~4XWkRp4_uQ08f*wqTrO<2jhOM;2nr>_60;7gxiIV!~bH2M&oJuBev>wF(uv0 zH&-BeXw;X<28`ehbSBY2bgX6`|r!gXJX27ReNxa61u)Ji;kh*4(#doH4AM^o@ch=7Xea_ zWvS;Xfu44DK;fgcspA}Y=mSr^JNh2Hn;&9nA&%jbZ1*bjdQl`nfHPxF4omR@q! zO^Q{W2wBx+(nt?{U{-9?UgJ1B#x+66pGL&Ok5EJ|(a*kew}SU!b{qAC_k5nq0#%3V zCEIL#5TEn_7>2j#_yu{4VMN$i5I%;Vx#@fF{3s+@4g+V?r}CUwiK3u!y6BzTI0J9F zIE03_$@6jUCpyHh2*YsQNU%qTGZi?5_9QU&l12&S%bjMs(8Yo; z%Wbud#qy34e%m+H5~I^GO^%xm7El*;z^vGJp-ZKPb}Ch=7uey(?55O)r4zUKE)lLl zYOK=I?_-8lGv9tx$J?qw+Q7DEy8%LO8G&K*U6iA=@R{ehy@a9V^aZ8M5@#x$3>(eL zc`1maI2vr$?%q`QTYIo-Vl6G_O>;OBE)?#}7Y?V_PDh{Zd2xStr^l#oZRRqvpFn1( z^{0R}>;;+hnDk9OHpst?hC_3*z*5D7KQ^G@`7F(OV`FUw=@Njq|IW+cWEsqU{9r%- z+vx{zi(lTw=iy>)Vv#&Q%o3c1rH<2uZPUh|iWr-0^H3ZITcOb(M{Kp%hOaW?G7QO1 zI2<;-KYy2UG8OhL*5jI47WQPV&3qma;@>o6C1lT^if&9T<{sa7ny}XsC}%SLIC1UJ)nJf-)q_$j3Wpm zc8}BR`Os=8Z1!+N{z@U+7&sE0PG{)hzAU|e@(M(3hm61gu!8q-VT!=sFKHI1Sqgt> z!Z6e0<$=gc0@ZMm+P#qF;P-tK9Fae*FYeg2EQ&uwy*2HE(oJfxzxxg>Eor8Z7p!Kt z_{pfOvq!YY!P{KeV;?84p;}G*nCV?eP-%s6@PY(jS`$5kBC7%=32OHxxIezt>+=&3 z)8FzjLJ2{klccT^0`|h4RW^s`LOL0xx%Q7o?^^b$j!(>U)Wc(gn!LrR_IskmY7t(?Ue_pCI*~M zbXIUeb#(}=LF(;X6l5yx(fsy!3Qd$@E^A+Eq_7e!Y~Cx4j)l}Y6TBXVKkuWclSk$^==D&1%>}+(&uL;9^BUSq86t+ z`FsM#9}7r#Q=t(L$OLfcwUnVNMvr;}(GUg}1C1_0D$htelLZwJQ4*d4N#cwn(zb0o zPdx7^Zz}P^e0^VCQ#lX;e^pcB!xx~0G>?m}jCh_z|EE9W=A5Sl!4LFqiLFi`+@e~A z$L_*CLYCVH?Nq$)J1nd847Wi1j2=MWkTq^LRnfFfSXms#5{HA~uEZKYKM7{ITjGNS zu2GI5Zr$N&zXMpn@aW&Sgv(W_iU0@-A1g8vjtkcKbR(#+K>+l zp2LeXsDLkSe|OfTQ3Zvqg!DKcoOJV137-IV${#73LTSR)S7xQYYYd$$aT!o25J0gD z$eqmNlr=FezuJD57#H>x2eEyy-95R6o$KLX;0*!4x3-c-ajMxno4&QUuQ}XS%u>;A z>w9+9m|t>Lb`~QnH@7r?`Yduh?|PJ|)6Ir!EOq`m`pS=-Y%q) zkDOH>0&N{YJ^jq)_L;mYBlH^H^m1y9(${(Yi%z8@%h$ShAAa%rU0V$uV#Y`7grQHY zh-$r0+qG$vg20%+ExYYiT@*(((y*7SLh!jj1+tIQ#F~fxv)L4rheRDI&Bx8h5vz&g zUg&gyfa{v6^T=CWIrHyRt^AdG1V`e*0bi9ZB}?_)z{yRxDlxgM71@BE>z*Vr>>z6H z>TEApqm|ww2l@CAFE;+rE}l>4KQVaSc4`i5>MChw>3Qs*uqI`sR=+eaA+Kad_s30d z(!9S*FT73v5UL`svxQmi^w{LSdmQPH7m56tz_ED2&X!`Ta96g^%;i8QTT{=4y;|F~ zp9LS6`Zcs`^vjO;ki7tdHIK%n`{jK3;j;({TlNUY?Cl6UP`TseZk#=z;U9>ML{;7ftJ(&sf%Qf0u+Yaa9>9fGT zQ0zU3NM%iVE;-?L2=j!WKXa2tnz)^-U{9hv1O1!nrE3W^$+1W8X~%eFXh|pZ< zY^>d!l)YV+JFSNv|0_dJ?YqFYS1~v{8$`B4|3Y@A1E&_ykI6S^*A9XZ^qHK_z|QqO z%a@szGr7i1h(~9`Oenhf@yYl8aqs$m^5kFc>i-J#+W#%&`mY@Co_s@|{*8|YRCWek z;}m?wg`k5$bg7q}MY4?bx0`RAZyn#K9=FPA%;Ex4Ej)2}TfY#ew(HGL_2vaq5v_KP zGm2u0T_pWp-c#}iV-QL^Rw{i;XGCXAq7QG*tu`T3iSbWaJK@uZXxqALk+ylX($xpi z-=t5EG$H2G|8=x_G`jQ4%u)=?d$y@VH1i|+b@Hs`qnN7UBMWT>8UlW5bolA+?rOHW Yve3{_U-ADVdg7Wzuo`6u_z3{~4;R`|tN;K2 literal 0 HcmV?d00001 diff --git a/vttformatter.egg-info/PKG-INFO b/vttformatter.egg-info/PKG-INFO index d310127..ce55b2b 100644 --- a/vttformatter.egg-info/PKG-INFO +++ b/vttformatter.egg-info/PKG-INFO @@ -1,16 +1,20 @@ Metadata-Version: 2.1 Name: vttformatter -Version: 1.3 +Version: 2.10 Summary: WEBVTT to text converter Home-page: https://github.com/georgiewellock/VTT_formatter Author: Georgina L. Wellock Author-email: g.l.wellock@bath.ac.uk License: MIT -Download-URL: https://github.com/georgiewellock/VTT_formatter/archive/1.03.tar.gz +Download-URL: https://github.com/georgiewellock/VTT_formatter/archive/2.10.tar.gz Description: # VttFormatter Converts WEBVTT files into text removing timestamps and identifiers and formatting the text into paragraphs. + `VTT_formatter` is a python package that can be executed using python in the command line or through an interface such as a [Jupyter Notebook](https://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html) either locally on a machine or using [Azure Notebooks](https://notebooks.azure.com/#). + + Full instructions on using `VTT_formatter` in a Jupyter Notebook, on either Azure Notebooks, or locally using Anaconda can be found on the [wiki](https://github.com/georgiewellock/VTT_formatter/wiki/VTT_formatter-using-Jupyter-Notebooks). + ## Example Input/Output ### Input @@ -76,6 +80,13 @@ Description: # VttFormatter it is crackling. It will still be recording the audio. ``` + ## Simple useage + + The screenshot belows shows the simple implementation of the VTT formatter in a jupyter notebook. This will read in the file defined and create a new `.txt` file in the same directory as the original. + + + Further information can be found in the notebook [here](https://github.com/georgiewellock/VTT_formatter/blob/master/VTT_formatter.ipynb) + ## Installation The simplest way to install this vttformatter is to use `pip` to install from [PyPI](https://pypi.org/project/vttformatter/) diff --git a/vttformatter/vttformatter.py b/vttformatter/vttformatter.py index 8f68928..8b06dba 100644 --- a/vttformatter/vttformatter.py +++ b/vttformatter/vttformatter.py @@ -126,7 +126,6 @@ def format_text(self): else: sentence = '' while x[0,i+1] == x[1,i] and i+1 < len(part_messages)-2: - print(i, flush = True) sentence = sentence + x[2,i] + ' ' i+=1 sentence = sentence + x[2,i] @@ -136,7 +135,15 @@ def format_text(self): #check the last 2 elements of the partial message list and append them to full_messages if x[0,-1] == x[1,-2]: end = x[2,-2] + ' ' + x[2,-1] - full_messages.append(end) + if x[1,-2] == x[1,-3]: + full_messages[-1] = full_messages[-1] + ' ' + end + else: + full_messages.append(end) + + elif x[1,-2] == x[1,-3]: + full_messages[-1] = full_messages[-1] + ' ' + x[2,-2] + full_messages.append(x[2,-1]) + else: full_messages.append(x[2,-2]) full_messages.append(x[2,-1])