refactors and reformats towards 0.3.0 release

somnathrakshit · Mar 29, 2024 · 73201e4 · 73201e4
1 parent 303f467
commit 73201e4
Show file tree

Hide file tree

Showing 29 changed files with 2,307 additions and 1,950 deletions.
diff --git a/CHANGELOG.bak b/CHANGELOG.bak
diff --git a/MANIFEST.in b/MANIFEST.in
diff --git a/geograpy/__init__.py b/geograpy/__init__.py
@@ -1,63 +1,66 @@
-'''
+"""
 main geograpy 3 module
-'''
+"""
+__version__ = "0.3.0"
 from geograpy.extraction import Extractor
-from geograpy.places import PlaceContext
-from geograpy.locator import Locator
 from geograpy.labels import Labels
+from geograpy.locator import Locator
+from geograpy.places import PlaceContext
+
 
-def get_geoPlace_context(url=None, text=None,debug=False):
-    '''
+def get_geoPlace_context(url=None, text=None, debug=False):
+    """
     Get a place context for a given text with information
     about country, region, city and other
     based on NLTK Named Entities having the Geographic(GPE) label.
-    
+
     Args:
         url(String): the url to read text from (if any)
         text(String): the text to analyze
         debug(boolean): if True show debug information
-    
+
     Returns:
-        places: 
+        places:
             PlaceContext: the place context
-    '''    
-    places=get_place_context(url, text, labels=Labels.geo, debug=debug)
+    """
+    places = get_place_context(url, text, labels=Labels.geo, debug=debug)
     return places
-
-def get_place_context(url=None, text=None,labels=Labels.default, debug=False):
-    '''
+
+
+def get_place_context(url=None, text=None, labels=Labels.default, debug=False):
+    """
     Get a place context for a given text with information
     about country, region, city and other
-    based on NLTK Named Entities in the label set Geographic(GPE), 
+    based on NLTK Named Entities in the label set Geographic(GPE),
     Person(PERSON) and Organization(ORGANIZATION).
-    
+
     Args:
         url(String): the url to read text from (if any)
         text(String): the text to analyze
         debug(boolean): if True show debug information
-    
+
     Returns:
-        pc: 
+        pc:
             PlaceContext: the place context
-    '''
-    e = Extractor(url=url, text=text,debug=debug)
+    """
+    e = Extractor(url=url, text=text, debug=debug)
     e.find_entities(labels=labels)
-    places=e.places
+    places = e.places
     pc = PlaceContext(places)
     pc.setAll()
     return pc
 
-def locateCity(location,correctMisspelling=False,debug=False):
-    '''
+
+def locateCity(location, correctMisspelling=False, debug=False):
+    """
     locate the given location string
     Args:
         location(string): the description of the location
     Returns:
         Locator: the location
-    '''
-    e = Extractor(text=location,debug=debug)
+    """
+    e = Extractor(text=location, debug=debug)
     e.split()
-    loc=Locator.getInstance(correctMisspelling=correctMisspelling,debug=debug)
-    city=loc.locateCity(e.places)
+    loc = Locator.getInstance(correctMisspelling=correctMisspelling, debug=debug)
+    city = loc.locateCity(e.places)
     return city
-
diff --git a/geograpy/extraction.py b/geograpy/extraction.py
@@ -1,93 +1,99 @@
-import nltk
 import re
+
+import nltk
 from newspaper import Article
+
 from geograpy.labels import Labels
 
+
 class Extractor(object):
-    '''
+    """
     Extract geo context for text or from url
-    '''
+    """
+
     def __init__(self, text=None, url=None, debug=False):
-        '''
+        """
         Constructor
         Args:
 
             text(string): the text to analyze
             url(string): the url to read the text to analyze from
             debug(boolean): if True show debug information
-        '''
+        """
         if not text and not url:
-            raise Exception('text or url is required')
-        self.debug=debug
+            raise Exception("text or url is required")
+        self.debug = debug
         self.text = text
         self.url = url
         self.places = []
-        nltk_packages = ['maxent_ne_chunker',
-                        'words',
-                        'treebank',
-                        'maxent_treebank_pos_tagger',
-                        'punkt',
-                        'averaged_perceptron_tagger'
-                        ]
+        nltk_packages = [
+            "maxent_ne_chunker",
+            "words",
+            "treebank",
+            "maxent_treebank_pos_tagger",
+            "punkt",
+            "averaged_perceptron_tagger",
+        ]
         for nltk_package in nltk_packages:
             try:
                 import nltk
+
                 nltk.data.find(nltk_package)
             except LookupError:
                 nltk.downloader.download(nltk_package, quiet=True)
-        import nltk  
+        import nltk
 
     def set_text(self):
-        '''
+        """
         Setter for text
-        '''
+        """
         if not self.text and self.url:
             a = Article(self.url)
             a.download()
             a.parse()
             self.text = a.text
-            
-    def split(self,delimiter=r","):
-        '''
+
+    def split(self, delimiter=r","):
+        """
         simpler regular expression splitter with not entity check
-        
+
         hat tip: https://stackoverflow.com/a/1059601/1497139
-        '''
+        """
         self.set_text()
-        self.places=re.split(delimiter,self.text)
-            
+        self.places = re.split(delimiter, self.text)
+
     def find_geoEntities(self):
-        '''
+        """
         Find geographic entities
-        
+
         Returns:
-            list: 
+            list:
                 List of places
-        '''
+        """
         self.find_entities(Labels.geo)
         return self.places
-        
-    def find_entities(self,labels=Labels.default):
-        '''
+
+    def find_entities(self, labels=Labels.default):
+        """
         Find entities with the given labels set self.places and returns it
         Args:
-            labels: 
+            labels:
                 Labels: The labels to filter
         Returns:
-            list: 
+            list:
                 List of places
-        '''
+        """
         self.set_text()
 
         text = nltk.word_tokenize(self.text)
         nes = nltk.ne_chunk(nltk.pos_tag(text))
 
         for ne in nes:
             if type(ne) is nltk.tree.Tree:
-                nelabel=ne.label()
-                if (nelabel in labels):
-                    leaves=ne.leaves()
+                nelabel = ne.label()
+                if nelabel in labels:
+                    leaves = ne.leaves()
                     if self.debug:
                         print(leaves)
-                    self.places.append(u' '.join([i[0] for i in leaves]))
-        return self.places
+                    self.places.append(" ".join([i[0] for i in leaves]))
+        return self.places
diff --git a/geograpy/labels.py b/geograpy/labels.py
@@ -1,12 +1,14 @@
-'''
+"""
 Created on 2020-09-10
 
 @author: wf
-'''
+"""
+
 
 class Labels(object):
-    '''
+    """
     NLTK labels
-    '''
-    default=['GPE','GSP','PERSON','ORGANIZATION']
-    geo=['GPE','GSP']
+    """
+
+    default = ["GPE", "GSP", "PERSON", "ORGANIZATION"]
+    geo = ["GPE", "GSP"]