From 8863afb9bee43b58307f08597c98e47984bc934e Mon Sep 17 00:00:00 2001 From: deven367 Date: Fri, 31 May 2024 12:02:10 -0400 Subject: [PATCH] mv download logic in the index nb --- nbs/00_utils.ipynb | 66 +++++----------------------------------------- nbs/index.ipynb | 21 +++++++++++++++ 2 files changed, 27 insertions(+), 60 deletions(-) diff --git a/nbs/00_utils.ipynb b/nbs/00_utils.ipynb index 4583887..e0ff919 100644 --- a/nbs/00_utils.ipynb +++ b/nbs/00_utils.ipynb @@ -377,60 +377,6 @@ " nltk.download(\"omw-1.4\")\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "a59f9b5a", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "from nltk.corpus import stopwords\n", - "import nltk" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "29c07c57", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading dependencies\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package punkt to /home/deven/nltk_data...\n", - "[nltk_data] Unzipping tokenizers/punkt.zip.\n", - "[nltk_data] Downloading package stopwords to /home/deven/nltk_data...\n", - "[nltk_data] Unzipping corpora/stopwords.zip.\n", - "[nltk_data] Downloading package averaged_perceptron_tagger to\n", - "[nltk_data] /home/deven/nltk_data...\n", - "[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n", - "[nltk_data] Downloading package wordnet to /home/deven/nltk_data...\n", - "[nltk_data] Downloading package omw-1.4 to /home/deven/nltk_data...\n" - ] - } - ], - "source": [ - "#| hide\n", - "try:\n", - " nltk.data.find('tokenizers/punkt')\n", - " nltk.data.find('corpora/stopwords')\n", - " nltk.data.find('corpora/wordnet')\n", - " nltk.data.find('corpora/omw-1.4')\n", - " nltk.data.find('taggers/averaged_perceptron_tagger')\n", - "except:\n", - " print('Downloading dependencies')\n", - " download_nltk_dep()" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1158,7 +1104,7 @@ " name = str(self)\n", " if name.endswith('.npy'):\n", " return np.load(self).shape\n", - " raise AssertionError('not a npy array') " + " raise AssertionError('not a npy array')" ] }, { @@ -1223,7 +1169,7 @@ "outputs": [], "source": [ "#| local\n", - "with working_directory('/home/deven'): \n", + "with working_directory('/home/deven'):\n", " shp = Path('test.npy').shape\n", " test_eq(arr.shape, Path('test.npy').shape)" ] @@ -1260,7 +1206,7 @@ "def text(self: Path):\n", " if str(self).endswith('.txt'):\n", " with open(self) as f: return f.read()\n", - " raise AssertionError('not a txt file') " + " raise AssertionError('not a txt file')" ] }, { @@ -1305,11 +1251,11 @@ "def sentences(self: Path):\n", " name = str(self)\n", " if name.endswith('.txt'):\n", - " if '_cleaned' in name: \n", + " if '_cleaned' in name:\n", " return split_by_newline(self.text)\n", - " else: \n", + " else:\n", " return make_sentences(self.text)\n", - " raise AssertionError('not a txt file') " + " raise AssertionError('not a txt file')" ] }, { diff --git a/nbs/index.ipynb b/nbs/index.ipynb index d9585ab..27f3786 100644 --- a/nbs/index.ipynb +++ b/nbs/index.ipynb @@ -22,6 +22,27 @@ "%autoreload 2" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nltk.corpus import stopwords\n", + "import nltk\n", + "\n", + "try:\n", + " nltk.data.find('tokenizers/punkt')\n", + " nltk.data.find('corpora/stopwords')\n", + " nltk.data.find('corpora/wordnet')\n", + " nltk.data.find('corpora/omw-1.4')\n", + " nltk.data.find('taggers/averaged_perceptron_tagger')\n", + "except:\n", + " print('Downloading dependencies')\n", + " download_nltk_dep()" + ] + }, { "cell_type": "markdown", "metadata": {},