Address issue #8

ail-project · Aug 18, 2021 · 67485c1 · 67485c1
1 parent 3c422b3
commit 67485c1
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 27 deletions.
diff --git a/bin/accountCreator.py b/bin/accountCreator.py
@@ -92,7 +92,12 @@
                         logging.error("Approval required, skipping...\n")
                     continue
                 time.sleep(2)
-                browser.find_elements_by_name('button')[0].click()
+                buttons = browser.find_elements_by_name('button')
+                if len(buttons) != 0:
+                    buttons[0].click()
+                else:
+                    logging.error("No button found, skipping...")
+                    continue
                 time.sleep(5)
                 if args.verbose:
                     logging.info("Registered successfully!\n")
@@ -129,7 +134,12 @@
         response2 = response2.json()
         try:
             # Search for the email verification code
-            confirmationLink = re.findall('https://.*/auth/.*', response2['textBody'])[0]
+            links = re.findall('https://.*/auth/.*', response2['textBody'])
+            if len(links) != 0:
+                confirmationLink = links[0]
+            else:
+                logging.error("No link found, skipping...")
+                continue
             if args.verbose:
                 logging.info(confirmationLink)
             # Open the email verification link to verify the email

diff --git a/bin/feeder.py b/bin/feeder.py
@@ -151,8 +151,7 @@
         urls = extractor.find_urls(account['note'])
         for url in urls:
             # If the url is not valid, drop it and continue
-            surl = url.split()[0]
-            if not validators.url(surl):
+            if not validators.url(url):
                 continue
 
             output = {}
@@ -163,37 +162,37 @@
             output['meta'] = {}
             output['meta']['activitypub:account_id'] = account['id']
 
-            output['meta']['activitypub:url-extracted'] = surl
+            output['meta']['activitypub:url-extracted'] = url
 
             signal.alarm(10)
             try:
-                article = newspaper.Article(surl)
+                article = newspaper.Article(url)
             except TimeoutError:
                 if args.verbose:
-                    logging.error(f"Timeout reached for {surl}")
+                    logging.error(f"Timeout reached for {url}")
                 continue
             else:
                 signal.alarm(0)
 
             # Caching
-            if r.exists(f"cu:{base64.b64encode(surl.encode())}"):
+            if r.exists(f"cu:{base64.b64encode(url.encode())}"):
                 if args.verbose:
-                    logging.info(f"URL {surl} already processed")
+                    logging.info(f"URL {url} already processed")
                 if not args.nocache:
                     continue
             else:
-                r.set(f"cu:{base64.b64encode(surl.encode())}", account['note'])
-                r.expire(f"cu:{base64.b64encode(surl.encode())}", cache_expire)
+                r.set(f"cu:{base64.b64encode(url.encode())}", account['note'])
+                r.expire(f"cu:{base64.b64encode(url.encode())}", cache_expire)
 
             if args.verbose:
-                logging.info(f"Downloading and parsing {surl}")
+                logging.info(f"Downloading and parsing {url}")
 
             try:
                 article.download()
                 article.parse()
             except ArticleException:
                 if args.verbose:
-                    logging.error(f"Unable to download/parse {surl}")
+                    logging.error(f"Unable to download/parse {url}")
                 continue
 
             output['data'] = article.html
@@ -204,7 +203,7 @@
                 article.nlp()
             except:
                 if args.verbose:
-                    logging.error(f"Unable to nlp {surl}")
+                    logging.error(f"Unable to nlp {url}")
                 nlpFailed = True
 
                 obj = json.dumps(output['data'], indent=4, sort_keys=True)
@@ -275,10 +274,9 @@
         urls = extractor.find_urls(status['content'])
         for url in urls:
             # If the url is not valid, drop it and continue
-            surl = url.split()[0]
-            if not validators.url(surl):
+            if not validators.url(url):
                 continue
-        
+
             output = {}
             output['source'] = ailurlextract
             output['source-uuid'] = uuid
@@ -287,37 +285,37 @@
             output['meta'] = {}
             output['meta']['activitypub:status_id'] = status['id']
 
-            output['meta']['activitypub:url-extracted'] = surl
+            output['meta']['activitypub:url-extracted'] = url
 
             signal.alarm(10)
             try:
-                article = newspaper.Article(surl)
+                article = newspaper.Article(url)
             except TimeoutError:
                 if args.verbose:
-                    logging.error(f"Timeout reached for {surl}")
+                    logging.error(f"Timeout reached for {url}")
                 continue
             else:
                 signal.alarm(0)
 
             # Caching
-            if r.exists(f"cu:{base64.b64encode(surl.encode())}"):
+            if r.exists(f"cu:{base64.b64encode(url.encode())}"):
                 if args.verbose:
-                    logging.info(f"URL {surl} already processed")
+                    logging.info(f"URL {url} already processed")
                 if not args.nocache:
                     continue
             else:
-                r.set(f"cu:{base64.b64encode(surl.encode())}", status['content'])
-                r.expire(f"cu:{base64.b64encode(surl.encode())}", cache_expire)
+                r.set(f"cu:{base64.b64encode(url.encode())}", status['content'])
+                r.expire(f"cu:{base64.b64encode(url.encode())}", cache_expire)
 
             if args.verbose:
-                logging.info(f"Downloading and parsing {surl}")
+                logging.info(f"Downloading and parsing {url}")
 
             try:
                 article.download()
                 article.parse()
             except ArticleException:
                 if args.verbose:
-                    logging.error(f"Unable to download/parse {surl}")
+                    logging.error(f"Unable to download/parse {url}")
                 continue
 
             output['data'] = article.html
@@ -328,7 +326,7 @@
                 article.nlp()
             except:
                 if args.verbose:
-                    logging.error(f"Unable to nlp {surl}")
+                    logging.error(f"Unable to nlp {url}")
                 nlpFailed = True
 
                 obj = json.dumps(output['data'], indent=4, sort_keys=True)