diff --git a/bin/accountCreator.py b/bin/accountCreator.py index 040ad88..2cd3dd0 100644 --- a/bin/accountCreator.py +++ b/bin/accountCreator.py @@ -92,7 +92,12 @@ logging.error("Approval required, skipping...\n") continue time.sleep(2) - browser.find_elements_by_name('button')[0].click() + buttons = browser.find_elements_by_name('button') + if len(buttons) != 0: + buttons[0].click() + else: + logging.error("No button found, skipping...") + continue time.sleep(5) if args.verbose: logging.info("Registered successfully!\n") @@ -129,7 +134,12 @@ response2 = response2.json() try: # Search for the email verification code - confirmationLink = re.findall('https://.*/auth/.*', response2['textBody'])[0] + links = re.findall('https://.*/auth/.*', response2['textBody']) + if len(links) != 0: + confirmationLink = links[0] + else: + logging.error("No link found, skipping...") + continue if args.verbose: logging.info(confirmationLink) # Open the email verification link to verify the email diff --git a/bin/feeder.py b/bin/feeder.py index 27e8b23..9c1e246 100644 --- a/bin/feeder.py +++ b/bin/feeder.py @@ -151,8 +151,7 @@ urls = extractor.find_urls(account['note']) for url in urls: # If the url is not valid, drop it and continue - surl = url.split()[0] - if not validators.url(surl): + if not validators.url(url): continue output = {} @@ -163,37 +162,37 @@ output['meta'] = {} output['meta']['activitypub:account_id'] = account['id'] - output['meta']['activitypub:url-extracted'] = surl + output['meta']['activitypub:url-extracted'] = url signal.alarm(10) try: - article = newspaper.Article(surl) + article = newspaper.Article(url) except TimeoutError: if args.verbose: - logging.error(f"Timeout reached for {surl}") + logging.error(f"Timeout reached for {url}") continue else: signal.alarm(0) # Caching - if r.exists(f"cu:{base64.b64encode(surl.encode())}"): + if r.exists(f"cu:{base64.b64encode(url.encode())}"): if args.verbose: - logging.info(f"URL {surl} already processed") + logging.info(f"URL {url} already processed") if not args.nocache: continue else: - r.set(f"cu:{base64.b64encode(surl.encode())}", account['note']) - r.expire(f"cu:{base64.b64encode(surl.encode())}", cache_expire) + r.set(f"cu:{base64.b64encode(url.encode())}", account['note']) + r.expire(f"cu:{base64.b64encode(url.encode())}", cache_expire) if args.verbose: - logging.info(f"Downloading and parsing {surl}") + logging.info(f"Downloading and parsing {url}") try: article.download() article.parse() except ArticleException: if args.verbose: - logging.error(f"Unable to download/parse {surl}") + logging.error(f"Unable to download/parse {url}") continue output['data'] = article.html @@ -204,7 +203,7 @@ article.nlp() except: if args.verbose: - logging.error(f"Unable to nlp {surl}") + logging.error(f"Unable to nlp {url}") nlpFailed = True obj = json.dumps(output['data'], indent=4, sort_keys=True) @@ -275,10 +274,9 @@ urls = extractor.find_urls(status['content']) for url in urls: # If the url is not valid, drop it and continue - surl = url.split()[0] - if not validators.url(surl): + if not validators.url(url): continue - + output = {} output['source'] = ailurlextract output['source-uuid'] = uuid @@ -287,37 +285,37 @@ output['meta'] = {} output['meta']['activitypub:status_id'] = status['id'] - output['meta']['activitypub:url-extracted'] = surl + output['meta']['activitypub:url-extracted'] = url signal.alarm(10) try: - article = newspaper.Article(surl) + article = newspaper.Article(url) except TimeoutError: if args.verbose: - logging.error(f"Timeout reached for {surl}") + logging.error(f"Timeout reached for {url}") continue else: signal.alarm(0) # Caching - if r.exists(f"cu:{base64.b64encode(surl.encode())}"): + if r.exists(f"cu:{base64.b64encode(url.encode())}"): if args.verbose: - logging.info(f"URL {surl} already processed") + logging.info(f"URL {url} already processed") if not args.nocache: continue else: - r.set(f"cu:{base64.b64encode(surl.encode())}", status['content']) - r.expire(f"cu:{base64.b64encode(surl.encode())}", cache_expire) + r.set(f"cu:{base64.b64encode(url.encode())}", status['content']) + r.expire(f"cu:{base64.b64encode(url.encode())}", cache_expire) if args.verbose: - logging.info(f"Downloading and parsing {surl}") + logging.info(f"Downloading and parsing {url}") try: article.download() article.parse() except ArticleException: if args.verbose: - logging.error(f"Unable to download/parse {surl}") + logging.error(f"Unable to download/parse {url}") continue output['data'] = article.html @@ -328,7 +326,7 @@ article.nlp() except: if args.verbose: - logging.error(f"Unable to nlp {surl}") + logging.error(f"Unable to nlp {url}") nlpFailed = True obj = json.dumps(output['data'], indent=4, sort_keys=True)