From 3ccc6720e722e2178b21ac358daa55635ecdaed2 Mon Sep 17 00:00:00 2001 From: Adam Sawicki Date: Wed, 8 Jan 2020 01:13:59 -0500 Subject: [PATCH 1/2] Add '--no-logs' option to argument parser --- rnlp/__main__.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/rnlp/__main__.py b/rnlp/__main__.py index 085c63e..208beaa 100644 --- a/rnlp/__main__.py +++ b/rnlp/__main__.py @@ -38,15 +38,6 @@ LOGGER = logging.getLogger(__name__) LOGGER.setLevel(logging.INFO) -LOG_HANDLER = logging.FileHandler("rnlp_log.log") -LOG_HANDLER.setLevel(logging.INFO) -FORMATTER = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") -LOG_HANDLER.setFormatter(FORMATTER) - -LOGGER.addHandler(LOG_HANDLER) -LOGGER.info("Started logger.") - -LOGGER.info("Started Argument Parser.") PARSER = argparse.ArgumentParser( description="rnlp (v{0}): Convert text into relational facts.".format(__version__), epilog="This program is free software under the {0}. {1}".format( @@ -61,8 +52,19 @@ "-d", "--directory", type=str, help="Read all .txt files in directory" ) FILE_OR_DIR.add_argument("-f", "--file", type=str, help="Read from one .txt file") +PARSER.add_argument("--no-logs", action="store_true", help="Specify that no logs should be created.") + ARGS = PARSER.parse_args() +LOG_HANDLER = logging.NullHandler() if ARGS.no_logs else logging.FileHandler("rnlp_log.log") + +LOG_HANDLER.setLevel(logging.INFO) +FORMATTER = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +LOG_HANDLER.setFormatter(FORMATTER) + +LOGGER.addHandler(LOG_HANDLER) +LOGGER.info("Started logger.") + LOGGER.info("Argument Parsing Successful.") N_BLOCKS = ARGS.blockSize From f3292d529f692b059918a1d0042888d06e95e7e7 Mon Sep 17 00:00:00 2001 From: Adam Sawicki Date: Fri, 24 Jan 2020 12:23:47 -0500 Subject: [PATCH 2/2] Add check_state module to ensure proper nltk setup --- rnlp/check_state.py | 54 ++++++++++++++++++++++++++++++++++++++++++ rnlp/parse.py | 2 ++ rnlp/textprocessing.py | 3 +++ 3 files changed, 59 insertions(+) create mode 100644 rnlp/check_state.py diff --git a/rnlp/check_state.py b/rnlp/check_state.py new file mode 100644 index 0000000..9414723 --- /dev/null +++ b/rnlp/check_state.py @@ -0,0 +1,54 @@ +_err = ( + """ + >>> import nltk + >>> nltk.download('punkt') + >>> nltk.download('stopwords') + >>> nltk.download('averaged_perceptron_tagger') + + Visit https://rnlp.readthedocs.io/en/latest/getting_started/02_installation.html + for more information. + """ +) + + +def _ensure_nltk_installed(): + """ + Determine if `nltk` is installed. + + :raises: Exception if `nltk` is not installed. + """ + try: + import nltk + except ModuleNotFoundError: + raise Exception( + "Unable to `import nltk` because it is not in the current environment." + " Run `pip install nltk`, and then the following in an interpreter:\n" + + _err + ) from None + + +def _find_nltk_module(module_name): + """ + Determine whether a certain `nltk` module is installed. + + :param module_name: Name of a module within nltk + :type module name: str. + + :raises: Exception if the module cannot be found + """ + import nltk + try: + nltk.data.find(module_name) + except LookupError as e: + raise Exception( + "Unable to find module '" + module_name + "'. Please run the following:\n" + + _err + ) from None + + +def ensure_nltk_setup(): + """Function to ensure required packages are installed.""" + _ensure_nltk_installed() + nltk_modules = ("tokenizers/punkt", "corpora/stopwords", "taggers/averaged_perceptron_tagger") + for nltk_module in nltk_modules: + _find_nltk_module(nltk_module) diff --git a/rnlp/parse.py b/rnlp/parse.py index 66ca937..8bee26a 100644 --- a/rnlp/parse.py +++ b/rnlp/parse.py @@ -22,6 +22,8 @@ rnlp.parse ---------- """ +import rnlp.check_state as check_state +check_state.ensure_nltk_setup() import string import nltk diff --git a/rnlp/textprocessing.py b/rnlp/textprocessing.py index 5fa741a..d68ed89 100644 --- a/rnlp/textprocessing.py +++ b/rnlp/textprocessing.py @@ -37,6 +37,9 @@ The depth of reasoning probably depends on the domain you are working on. """ +import rnlp.check_state as check_state +check_state.ensure_nltk_setup() + import string from nltk import sent_tokenize from nltk.corpus import stopwords