From e507077d6beb1652e787c2be8a093e11e9e9cba9 Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Tue, 24 Jun 2014 14:43:45 +0200 Subject: [PATCH] cleaning up --- pympi/EafIO.py | 19 ++-- pympi/Elan.py | 295 ++++++++++++++++++++++++------------------------- pympi/Praat.py | 98 ++++++++-------- 3 files changed, 205 insertions(+), 207 deletions(-) diff --git a/pympi/EafIO.py b/pympi/EafIO.py index 4e0a9b6..c1fa3b6 100644 --- a/pympi/EafIO.py +++ b/pympi/EafIO.py @@ -2,14 +2,13 @@ from xml.etree import ElementTree import sys -import warnings def parseEaf(filePath, eafObj): """ - Parse an elan file
-
- filePath -- Filepath to parse from - for stdin
+ Parse an elan file + + filePath -- Filepath to parse from - for stdin eafObj -- Object to put the data in""" if filePath == "-": filePath = sys.stdin @@ -101,8 +100,8 @@ def parseEaf(filePath, eafObj): def indent(el, level=0): """ - Pretty prints the xml
-
+ Pretty prints the xml + level -- Level of indenting, only used internally""" i = '\n' + level*'\t' if len(el): @@ -121,10 +120,10 @@ def indent(el, level=0): def toEaf(filePath, eafObj, pretty=True): """ - Write an elan object to a file
-
- filePath -- Filpath to write to - for stdout
- eafObj -- The elan object
+ Write an elan object to a file + + filePath -- Filpath to write to - for stdout + eafObj -- The elan object pretty -- Use pretty indentation in xml""" rmNone = lambda x:\ dict((k, unicode(v)) for k, v in x.iteritems() if v is not None) diff --git a/pympi/Elan.py b/pympi/Elan.py index 2a0cb81..1e6bf08 100644 --- a/pympi/Elan.py +++ b/pympi/Elan.py @@ -8,39 +8,38 @@ class Eaf: """ Class to work with elan files -
- annotationDocument - Dict of all annotationdocument TAG entries.
- header - Dict of the header TAG entries.
- media_descriptors - List of all linked files: [{attrib}]
- properties - List of all properties: [(value, {attrib})]
- linked_file_descriptors - List of all secondary linked files: [{attrib}].
- timeslots - Timeslot data: {TimslotID -> time(ms)}
- tiers - Tier data: {TierName ->
- (alignedAnnotations, referenceAnnotations, attributes, ordinal)},
- alignedAnnotations: [{annotationId ->
- (beginTs, endTs, value, svg_ref)}]
- referenceAnnotations: [{annotationId ->
- (reference, value, previous, svg_ref)}]
- linguistic_types - Linguistic type data [{id -> attrib}]
- locales - List of locale data: [{attrib}]
- constraints - Constraint data: {stereotype -> description}
- controlled_vocabularies - Controlled vocabulary data: {id ->
- (description, entries, ext_ref)}
- entry: {description -> (attrib, value)}
- external refs - External refs [extref]
- extref: [id, type, value]
- lexicon_refs - Lexicon refs [{attribs}]
+ + annotationDocument - Dict of all annotationdocument TAG entries. + header - Dict of the header TAG entries. + media_descriptors - List of all linked files: [{attrib}] + properties - List of all properties: [(value, {attrib})] + linked_file_descriptors - List of all secondary linked files: [{attrib}]. + timeslots - Timeslot data: {TimslotID -> time(ms)} + tiers - Tier data: {TierName -> + (alignedAnnotations, referenceAnnotations, attributes, ordinal)}, + alignedAnnotations: [{annotationId -> + (beginTs, endTs, value, svg_ref)}] + referenceAnnotations: [{annotationId -> + (reference, value, previous, svg_ref)}] + linguistic_types - Linguistic type data [{id -> attrib}] + locales - List of locale data: [{attrib}] + constraints - Constraint data: {stereotype -> description} + controlled_vocabularies - Controlled vocabulary data: {id -> + (description, entries, ext_ref)} + entry: {description -> (attrib, value)} + external refs - External refs [extref] + extref: [id, type, value] + lexicon_refs - Lexicon refs [{attribs}] """ def __init__(self, filePath=None, author='Elan.py'): """ - Constructor, builds an elan object from file or an empty one
-
- filepath -- The path to load the file from
- author -- The author used in the xml tag
+ Constructor, builds an elan object from file or an empty one + + filepath -- The path to load the file from + author -- The author used in the xml tag """ self.naiveGenAnn, self.naiveGenTS = False, False - now = time.localtime() self.annotationDocument = { 'AUTHOR': author, 'DATE': time.strftime("%Y-%m-%dT%H:%M:%S%z"), @@ -84,18 +83,18 @@ def __init__(self, filePath=None, author='Elan.py'): def tofile(self, filePath, pretty=True): """ - Exports the eaf object to a file with or without pretty printing
-
- filePath -- The output file path - for stdout
+ Exports the eaf object to a file with or without pretty printing + + filePath -- The output file path - for stdout pretty -- Flag for pretty indented output""" EafIO.toEaf(filePath, self) def toTextGrid(self, filePath, excludedTiers=[], includedTiers=[]): """ - Convert the elan file to praat's TextGrid, returns 0 if succesfull
-
- filePath -- The output file path - for stdout
- excludedTiers -- Tiers to exclude
+ Convert the elan file to praat's TextGrid, returns 0 if succesfull + + filePath -- The output file path - for stdout + excludedTiers -- Tiers to exclude includedTiers -- Tiers to include if empty all tiers are included""" try: from pympi.Praat import TextGrid @@ -120,9 +119,9 @@ def toTextGrid(self, filePath, excludedTiers=[], includedTiers=[]): def extract(self, start, end): """ - Extracts a timeframe from the eaf file and returns it
-
- start -- Starting time
+ Extracts a timeframe from the eaf file and returns it + + start -- Starting time end -- Ending time""" from copy import deepcopy eafOut = deepcopy(self) @@ -143,30 +142,29 @@ def getLinkedFiles(self): def addLinkedFile(self, filePath, relpath=None, mimetype=None, time_origin=None, exfrom=None): """Adds the linked file to the object -
- filePath -- Path of the file to link
- relpath -- Relative filepath
- mimetype -- MIME-type, if none it tries to guess it
- time_origin -- Time origin for media files
+ + filePath -- Path of the file to link + relpath -- Relative filepath + mimetype -- MIME-type, if none it tries to guess it + time_origin -- Time origin for media files exfrom -- Extracted from""" if mimetype is None: mimes = {'wav': 'audio/x-wav', 'mpg': 'video/mpeg', 'mpeg': 'video/mpg', 'xml': 'text/xml'} mimetype = mimes[filePath.split('.')[-1]] self.media_descriptors.append({ - 'MEDIA_URL': filepath, 'RELATIVE_MEDIA_URL': relpath, + 'MEDIA_URL': filePath, 'RELATIVE_MEDIA_URL': relpath, 'MIME_TYPE': mimetype, 'TIME_ORIGIN': time_origin, 'EXTRACTED_FROM': exfrom}) def copyTier(self, eafObj, tierName): """ - Copies the tier to this object
-
- eafObj -- Elan object
+ Copies the tier to this object + + eafObj -- Elan object tierName -- Tier name""" eafObj.removeTier(tierName) try: - t = self.tiers[tierName][3] eafObj.addTier(tierName, tierDict=self.tiers[tierName][3]) for ann in self.getAnnotationDataForTier(tierName): eafObj.insertAnnotation(tierName, ann[0], ann[1], ann[2]) @@ -178,15 +176,15 @@ def copyTier(self, eafObj, tierName): def addTier(self, tierId, ling='default-lt', parent=None, locale=None, part=None, ann=None, tierDict=None): """ - Add a tier to the object
-
- tierId -- Name of the tier
- ling -- Linguistic type
- parent -- ID of parent tier
- locale -- Locale used
- part -- Participant
- ann -- Annotator
- tierDict -- Tier dict to use the quick function, when this is not None
+ Add a tier to the object + + tierId -- Name of the tier + ling -- Linguistic type + parent -- ID of parent tier + locale -- Locale used + part -- Participant + ann -- Annotator + tierDict -- Tier dict to use the quick function, when this is not None it will ignore all other options""" if ling not in self.linguistic_types: warnings.warn( @@ -205,8 +203,8 @@ def addTier(self, tierId, ling='default-lt', parent=None, locale=None, def removeTiers(self, tiers): """ - Remove tiers
-
+ Remove tiers + tiers -- List of names of tiers to remove""" for a in tiers: self.removeTier(a, check=False, clean=False) @@ -214,9 +212,9 @@ def removeTiers(self, tiers): def removeTier(self, idTier, clean=True): """ - Remove tier
-
- idTier -- Name of the tier
+ Remove tier + + idTier -- Name of the tier clean -- Flag to also clean up the timeslot id's(takes time)""" try: del(self.tiers[idTier]) @@ -234,8 +232,8 @@ def getTierNames(self): def getParametersForTier(self, idTier): """ - Gives the tierdict that is usable in the addTier function
-
+ Gives the tierdict that is usable in the addTier function + idTier -- Name of the tier""" try: return self.tiers[idTier][2] @@ -245,8 +243,8 @@ def getParametersForTier(self, idTier): def childTiersFor(self, idTier): """ - Gives all children tiers
-
+ Gives all children tiers + idTier -- Parent tier""" try: return [m for m in self.tiers @@ -258,8 +256,8 @@ def childTiersFor(self, idTier): def getAnnotationDataForTier(self, idTier): """ - Gives a list of annotations in the format (start, end, value)
-
+ Gives a list of annotations in the format (start, end, value) + idTier -- Name of the tier""" try: a = self.tiers[idTier][0] @@ -271,9 +269,9 @@ def getAnnotationDataForTier(self, idTier): def getAnnotationDataAtTime(self, idTier, time): """ - Gives the annotation at time
-
- idTier -- Name of the tier
+ Gives the annotation at time + + idTier -- Name of the tier time -- Time""" try: anns = self.tiers[idTier][0] @@ -288,10 +286,10 @@ def getAnnotationDataAtTime(self, idTier, time): def getAnnotationDatasBetweenTimes(self, idTier, start, end): """ - Gives a list of annotations that occur between times
-
- idTier -- Name of the tier
- start -- Start time
+ Gives a list of annotations that occur between times + + idTier -- Name of the tier + start -- Start time end -- End time""" try: anns = self.tiers[idTier][0] @@ -305,8 +303,8 @@ def getAnnotationDatasBetweenTimes(self, idTier, start, end): def removeAllAnnotationsFromTier(self, idTier): """ - Remove all annotations from a tier
-
+ Remove all annotations from a tier + idTier -- Name of the tier""" try: self.tiers[idTier][0], self.tiers[idTier][1] = {}, {} @@ -318,12 +316,12 @@ def removeAllAnnotationsFromTier(self, idTier): def insertAnnotation(self, idTier, start, end, value='', svg_ref=None): """ - Insert an annotation in a tier
-
- idTier -- Name of the tier
- start -- Start time of the annotation
- end -- End time of the annotation
- value -- Value of the annotation
+ Insert an annotation in a tier + + idTier -- Name of the tier + start -- Start time of the annotation + end -- End time of the annotation + value -- Value of the annotation svg_ref -- SVG reference""" try: startTs = self.generateTsId(start) @@ -337,15 +335,15 @@ def insertAnnotation(self, idTier, start, end, value='', svg_ref=None): def removeAnnotation(self, idTier, time, clean=True): """ - Remove an annotation at time
-
- idTier -- Name of the tier
- time -- Time
+ Remove an annotation at time + + idTier -- Name of the tier + time -- Time clean -- Flag to clean timeslots(this takes time)""" try: - for b in [a for a in self.tiers[tier][0].iteritems() if + for b in [a for a in self.tiers[idTier][0].iteritems() if a[1][0] >= time and a[1][1] <= time]: - del(self.tiers[tier][0][b[0]]) + del(self.tiers[idTier][0][b[0]]) if clean: self.cleanTimeSlots() return 0 @@ -355,12 +353,12 @@ def removeAnnotation(self, idTier, time, clean=True): def insertRefAnnotation(self, idTier, ref, value, prev, svg_ref=None): """ - Insert a ref annotation in a tier
-
- idTier -- Name of the tier
- ref -- Reference
- value -- Value of the annotation
- prev -- Previous annotation
+ Insert a ref annotation in a tier + + idTier -- Name of the tier + ref -- Reference + value -- Value of the annotation + prev -- Previous annotation svg_ref -- SVG reference""" try: self.tiers[idTier][1][self.generateAnnotationId()] =\ @@ -372,8 +370,8 @@ def insertRefAnnotation(self, idTier, ref, value, prev, svg_ref=None): def getRefAnnotationDataForTier(self, idTier): """" - Give a list of all reference annotations
-
+ Give a list of all reference annotations + idTier -- Name of the tier""" try: return self.tiers[idTier][1] @@ -383,8 +381,8 @@ def getRefAnnotationDataForTier(self, idTier): def removeControlledVocabulary(self, cv): """ - Remove a controlled vocabulary
-
+ Remove a controlled vocabulary + cv -- Controlled vocabulary ID""" try: del(self.controlled_vocabularies[cv]) @@ -445,20 +443,20 @@ def cleanTimeSlots(self): def generateAnnotationConcat(self, tiers, start, end): """ - Generate an concatenated annotation from annotations within a timeframe
-
- tiers -- List of tiers
- start -- Start time
+ Generate an concatenated annotation from annotations within a timeframe + + tiers -- List of tiers + start -- Start time end -- End time""" return '_'.join(set(d[2] for t in tiers if t in self.tiers for d in self.getAnnotationDatasBetweenTimes(t, start, end))) def mergeTiers(self, tiers, tiernew=None, gaptresh=1): """ - Merge tiers
-
- tiers -- List of tiers to merge
- tiernew -- Name of the new tier, if None it will be generated
+ Merge tiers + + tiers -- List of tiers to merge + tiernew -- Name of the new tier, if None it will be generated gaptresh -- Treshhold to glue annotations in ms""" if len([t for t in tiers if t not in self.tiers]) > 0: warnings.warn('mergeTiers: One or more tiers non existent!') @@ -495,32 +493,32 @@ def mergeTiers(self, tiers, tiernew=None, gaptresh=1): def shiftAnnotations(self, time): """ - Shift all annotations to the left or right, this creates a new object
-
+ Shift all annotations to the left or right, this creates a new object + time -- Shift width in ms negative for right shift""" e = self.extract( -1*time, self.getFullTimeInterval()[1]) if time < 0 else\ self.extract(0, self.getFullTimeInterval()[1]-time) for tier in e.tiers.itervalues(): for ann in tier[0].itervalues(): - e.timeslots[ann[0]] = e.timeslots[ann[0]]+offset - e.timeslots[ann[1]] = e.timeslots[ann[1]]+offset + e.timeslots[ann[0]] = e.timeslots[ann[0]]+time + e.timeslots[ann[1]] = e.timeslots[ann[1]]+time e.cleanTimeSlots() return e def filterAnnotations(self, tier, tierName=None, filtin=None, filtex=None): """ - Filter annotations in tier
-
- tier -- Tier to filter
- tierName -- Tier to put the filtered annotations in
- filtin -- Include everything in this list
+ Filter annotations in tier + + tier -- Tier to filter + tierName -- Tier to put the filtered annotations in + filtin -- Include everything in this list filtex -- Exclude everything in this list""" if tier not in self.tiers: warnings.warn('filterAnnotations: Tier non existent!' + tier) return 1 if tierName is None: - tierName = '%s_filter' % tier1 + tierName = '%s_filter' % tier self.removeTier(tierName) self.addTier(tierName) for a in [b for b in self.getAnnotationDataForTier(tier) @@ -532,12 +530,12 @@ def filterAnnotations(self, tier, tierName=None, filtin=None, filtex=None): def glueAnnotationsInTier(self, tier, tierName=None, treshhold=85, filtin=None, filtex=None): """ - Glue annotatotions together
-
- tier -- Tier to glue
- tierName -- Name for the output tier
- treshhold -- Maximal gap to glue
- filtin -- Include only this annotations
+ Glue annotatotions together + + tier -- Tier to glue + tierName -- Name for the output tier + treshhold -- Maximal gap to glue + filtin -- Include only this annotations filtex -- Exclude all this annotations""" if tier not in self.tiers: warnings.warn('glueAnnotationsInTier: Tier non existent!') @@ -571,13 +569,14 @@ def getFullTimeInterval(self): return (min(self.timeslots.itervalues()), max(self.timeslots.itervalues())) - def createGapsAndOverlapsTier(self, tier1, tier2, tierNam=None, maxlen=-1): + def createGapsAndOverlapsTier(self, tier1, tier2, tierName=None, + maxlen=-1): """ - Create a tier with the gaps and overlaps
-
- tier1 -- Name of the first tier
- tier2 -- Name of the second tier
- tierNam -- Name of the output tier
+ Create a tier with the gaps and overlaps + + tier1 -- Name of the first tier + tier2 -- Name of the second tier + tierNam -- Name of the output tier maxlen -- Maximum length of the ftos""" if tier1 not in self.tiers or tier2 not in self.tiers: warnings.warn( @@ -595,11 +594,11 @@ def createGapsAndOverlapsTier(self, tier1, tier2, tierNam=None, maxlen=-1): def getGapsAndOverlapsDuration(self, tier1, tier2, maxlen=-1, progressbar=False): """ - Give gaps and overlaps in the format (type, start, end)
-
- tier1 -- Name of the first tier
- tier2 -- Name of the second tier
- maxlen -- Maximum length of the ftos
+ Give gaps and overlaps in the format (type, start, end) + + tier1 -- Name of the first tier + tier2 -- Name of the second tier + maxlen -- Maximum length of the ftos progressbar -- Flag to display the progress""" if tier1 not in self.tiers or tier2 not in self.tiers: warnings.warn( @@ -671,18 +670,18 @@ def getGapsAndOverlapsDuration(self, tier1, tier2, maxlen=-1, def createControlledVocabulary(self, cvEntries, cvId, description=''): """ - Add a controlled vocabulary
-
- cvEntries -- Entries in the controlled vocabulary
- cvId -- Name of the controlled vocabulary
+ Add a controlled vocabulary + + cvEntries -- Entries in the controlled vocabulary + cvId -- Name of the controlled vocabulary description -- Description""" self.controlledvocabularies[cvId] = (description, cvEntries) def getTierIdsForLinguisticType(self, lingType, parent=None): """ - Give a list of all tiers matching a linguistic type
-
- lingType -- The linguistic type
+ Give a list of all tiers matching a linguistic type + + lingType -- The linguistic type parent -- Only match tiers from this parent""" return [t for t in self.tiers if self.tiers[t][2]['LINGUISTIC_TYPE_REF'] == lingType and @@ -690,8 +689,8 @@ def getTierIdsForLinguisticType(self, lingType, parent=None): def removeLinguisticType(self, lingType): """ - Remove a linguistic type
-
+ Remove a linguistic type + lingType -- Name of the linguistic type""" try: del(self.linguistic_types[lingType]) @@ -704,12 +703,12 @@ def removeLinguisticType(self, lingType): def addLinguisticType(self, lingtype, constraints, timealignable=True, graphicreferences=False, extref=None): """ - Add a linguistic type
-
- lingtype -- Name of the linguistic type
- constraints -- Constraint names
- timealignable -- Flag for time alignable
- graphicreferences -- Graphic references
+ Add a linguistic type + + lingtype -- Name of the linguistic type + constraints -- Constraint names + timealignable -- Flag for time alignable + graphicreferences -- Graphic references extref -- External references""" self.linguistic_types[lingtype] = { 'LINGUISTIC_TYPE_ID': lingtype, diff --git a/pympi/Praat.py b/pympi/Praat.py index e4bda31..aa12053 100644 --- a/pympi/Praat.py +++ b/pympi/Praat.py @@ -19,17 +19,17 @@ class TextGrid: """Class to read and write in TextGrid files, -
- note all the times are in seconds
- xmin - maximum x value
- xmax - maximum y value
- tierNum - number of tiers currently present
- tiers - dict of tiers
+ + note all the times are in seconds + xmin - maximum x value + xmax - maximum y value + tierNum - number of tiers currently present + tiers - dict of tiers """ def __init__(self, filePath=None, codec='ascii'): """Constructor, -
- filePath -- Filepath to read from - for stdin
+ + filePath -- Filepath to read from - for stdin codec -- File encoding""" self.tiers = dict() if filePath is None: @@ -67,10 +67,10 @@ def __update(self): def addTier(self, name, tierType='IntervalTier', number=None): """ - Add a tier
-
- name -- Name of the tier
- tierType -- Type of the tier
+ Add a tier + + name -- Name of the tier + tierType -- Type of the tier number -- Position of the tier""" if number is None: number = 1 if len(self.tiers) is 0 else\ @@ -81,20 +81,20 @@ def addTier(self, name, tierType='IntervalTier', number=None): def removeTier(self, name): """ - Remove a tier
-
+ Remove a tier + name -- Name of the tier""" if name in self.tiers: del(self.tiers[name]) return 0 else: - warning.warn('removeTier: tier non existent') + warnings.warn('removeTier: tier non existent') return 1 def getTier(self, name): """ - Give a tier
-
+ Give a tier + name -- Name of the tier""" try: return self.tiers[name] @@ -108,10 +108,10 @@ def getTiers(self): # def getGapsAndOverlapsDuration(self, tier1, tier2): # """ -# Give a list of gaps and overlaps between tiers (type, begin, end)
-#
-# tier1 -- Name of tier 1
-# tie
+# Give a list of gaps and overlaps between tiers (type, begin, end) +# +# tier1 -- Name of tier 1 +# tie # """Gives the gaps and the overlaps between tiers in (type, begin, end) # None if one of the tiers doesn't exist""" # if tier1 not in self.tiers or tier2 not in self.tiers: @@ -184,9 +184,9 @@ def getTiers(self): def tofile(self, filepath, codec='utf-16'): """ - Write the object to a file
-
- filepath -- Path to write to - for stdout
+ Write the object to a file + + filepath -- Path to write to - for stdout codec -- Encoding to write to""" if filepath == "-": f = sys.stdout @@ -238,8 +238,8 @@ def tofile(self, filepath, codec='utf-16'): def toEaf(self, filepath): """ - Write to eaf
-
+ Write to eaf + filepath -- Filepath to write to - for stdout""" try: from pympi.Elan import Eaf @@ -260,21 +260,21 @@ def toEaf(self, filepath): class Tier: """Class to represent a TextGrid tier: IntervalTier or TextTier -
- name - tier name
- intervals - list of intervals (start, [end,] value)
- number - number of the tier
- tierType - TextTier or IntervalTier
- xmin - minimum x value
- xmax - maximum x value
+ + name - tier name + intervals - list of intervals (start, [end,] value) + number - number of the tier + tierType - TextTier or IntervalTier + xmin - minimum x value + xmax - maximum x value """ def __init__(self, name, number, tierType, lines=None): """Constructor -
- name -- Name of the tier
- number -- Number of the tier
- tierType -- Type of the tier
+ + name -- Name of the tier + number -- Number of the tier + tierType -- Type of the tier lines -- Lines to parse the tier information from""" self.name = name self.intervals = list() @@ -315,10 +315,10 @@ def update(self): def addPoint(self, point, value, check=True): """ - Add a point to the tier
-
- point -- Time point
- value -- Value
+ Add a point to the tier + + point -- Time point + value -- Value check -- Flag for overlap checking""" if self.tierType is not 'TextTier': warnings.warn( @@ -334,12 +334,12 @@ def addPoint(self, point, value, check=True): def addInterval(self, begin, end, value, check=True, threshhold=5): """ - Add an interval to the tier
-
- begin -- Start time
- end -- End time
- value -- Value
- check -- Flag for overlap checking
+ Add an interval to the tier + + begin -- Start time + end -- End time + value -- Value + check -- Flag for overlap checking threshhold -- Threshhold for checking overlap""" if self.tierType != 'IntervalTier': warnings.warn('addInterval: Wrong tier type... Tier should be a ' + @@ -358,8 +358,8 @@ def addInterval(self, begin, end, value, check=True, threshhold=5): def removeInterval(self, time): """ - Remove an interval at time
-
+ Remove an interval at time + time -- Time""" for r in [i for i in self.intervals if i[0] <= time and i[1] >= time]: self.intervals.remove(r)