Skip to content

Commit

Permalink
Close out open notebooks, update old NLTK code
Browse files Browse the repository at this point in the history
  • Loading branch information
AaronWChen committed May 8, 2024
1 parent b6c6e22 commit 89fc77a
Show file tree
Hide file tree
Showing 9 changed files with 3,106 additions and 757 deletions.
11 changes: 11 additions & 0 deletions .gitconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Generated by nbdev_install_hooks
#
# If you need to disable this instrumentation do:
# git config --local --unset include.path
#
# To restore:
# git config --local include.path ../.gitconfig
#
[merge "nbdev-merge"]
name = resolve conflicts with nbdev_fix
driver = nbdev_merge %O %A %B %P
8 changes: 7 additions & 1 deletion nbs/01_initial_eda.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3059,7 +3059,13 @@
"source": []
}
],
"metadata": {},
"metadata": {
"kernelspec": {
"display_name": "python3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
5 changes: 5 additions & 0 deletions nbs/04_word_lists_combine_features.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1547,6 +1547,11 @@
"filtered_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
47 changes: 3 additions & 44 deletions nbs/07_bertopic_testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -132,54 +132,13 @@
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"> <span style=\"font-weight: bold\">❗❗❗ AUTHORIZATION REQUIRED ❗❗❗</span> \n",
"</pre>\n"
],
"text/plain": [
" \u001b[1m❗❗❗ AUTHORIZATION REQUIRED ❗❗❗\u001b[0m \n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "084490be36a047529096776bcb49e6de",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\n",
"Open the following link in your browser to authorize the client:\n",
"https://dagshub.com/login/oauth/authorize?state=1bf46b97-13cb-4873-9c57-af8141828675&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=b534b316020d1f4e499a4a24567fc8ff36a49714fc1a4e01f004354ef9e20587\n",
"\n",
"\n"
"Token Dagshub OAuth token, valid until 2023-03-15 05:42:01.541960+00:00 does not exist in the storage\n",
"Token Dagshub OAuth token, valid until 2023-07-27 17:31:38.987842+00:00 does not exist in the storage\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
Expand Down
151 changes: 8 additions & 143 deletions nbs/11_sklearn_mlfow_model_testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -242,149 +242,14 @@
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5c7f9ca7c04d4d61a1d93d784e67a1e8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-02-13 11:12:24 INFO: Downloading default packages for language: en (English) ...\n",
"2024-02-13 11:12:24 INFO: File exists: /home/awchen/stanza_resources/en/default.zip\n",
"2024-02-13 11:12:27 INFO: Finished downloading models and saved to /home/awchen/stanza_resources.\n",
"2024-02-13 11:12:27 INFO: Checking for updates to resources.json in case models have been updated. Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "341eed1075774bfaa7c7873d53723222",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-02-13 11:12:28 INFO: Loading these models for language: en (English):\n",
"======================================\n",
"| Processor | Package |\n",
"--------------------------------------\n",
"| tokenize | combined |\n",
"| pos | combined_charlm |\n",
"| lemma | combined_nocharlm |\n",
"| constituency | ptb3-revised_charlm |\n",
"| depparse | combined_charlm |\n",
"| sentiment | sstplus |\n",
"| ner | ontonotes_charlm |\n",
"======================================\n",
"\n",
"2024-02-13 11:12:28 INFO: Using device: cpu\n",
"2024-02-13 11:12:28 INFO: Loading: tokenize\n",
"2024-02-13 11:12:28 INFO: Loading: pos\n",
"2024-02-13 11:12:28 INFO: Loading: lemma\n",
"2024-02-13 11:12:29 INFO: Loading: constituency\n",
"2024-02-13 11:12:29 INFO: Loading: depparse\n",
"2024-02-13 11:12:29 INFO: Loading: sentiment\n",
"2024-02-13 11:12:29 INFO: Loading: ner\n",
"2024-02-13 11:12:29 INFO: Done loading processors!\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"--------------\n",
"Raw Dataframe:\n",
" id \\\n",
"0 54a2b6b019925f464b373351 \n",
"1 54a408a019925f464b3733bc \n",
"2 54a408a26529d92b2c003631 \n",
"3 54a408a66529d92b2c003638 \n",
"4 54a408a719925f464b3733cc \n",
"\n",
" dek \\\n",
"0 How does fried chicken achieve No. 1 status? B... \n",
"1 Spinaci all'Ebraica \n",
"2 This majestic, moist, and richly spiced honey ... \n",
"3 The idea for this sandwich came to me when my ... \n",
"4 In 1930, Simon Agranat, the chief justice of t... \n",
"\n",
" hed pubDate \\\n",
"0 Pickle-Brined Fried Chicken 2014-08-19T04:00:00.000Z \n",
"1 Spinach Jewish Style 2008-09-09T04:00:00.000Z \n",
"2 New Year’s Honey Cake 2008-09-10T04:00:00.000Z \n",
"3 The B.L.A.—Bagel with Lox and Avocado 2008-09-08T04:00:00.000Z \n",
"4 Shakshuka a la Doktor Shakshuka 2008-09-09T04:00:00.000Z \n",
"\n",
" author type \\\n",
"0 [] recipe \n",
"1 [{'name': 'Edda Servi Machlin'}] recipe \n",
"2 [{'name': 'Marcy Goldman'}] recipe \n",
"3 [{'name': 'Faye Levy'}] recipe \n",
"4 [{'name': 'Joan Nathan'}] recipe \n",
"\n",
" url \\\n",
"0 /recipes/food/views/pickle-brined-fried-chicke... \n",
"1 /recipes/food/views/spinach-jewish-style-350152 \n",
"2 /recipes/food/views/majestic-and-moist-new-yea... \n",
"3 /recipes/food/views/the-b-l-a-bagel-with-lox-a... \n",
"4 /recipes/food/views/shakshuka-a-la-doktor-shak... \n",
"\n",
" photoData \\\n",
"0 {'id': '54a2b64a6529d92b2c003409', 'filename':... \n",
"1 {'id': '56746182accb4c9831e45e0a', 'filename':... \n",
"2 {'id': '55e85ba4cf90d6663f728014', 'filename':... \n",
"3 {'id': '5674617e47d1a28026045e4f', 'filename':... \n",
"4 {'id': '56746183b47c050a284a4e15', 'filename':... \n",
"\n",
" tag aggregateRating \\\n",
"0 {'category': 'ingredient', 'name': 'Chicken', ... 3.11 \n",
"1 {'category': 'cuisine', 'name': 'Italian', 'ur... 3.22 \n",
"2 {'category': 'cuisine', 'name': 'Jewish', 'url... 3.62 \n",
"3 {'category': 'cuisine', 'name': 'Jewish', 'url... 4.00 \n",
"4 {'category': 'cuisine', 'name': 'Jewish', 'url... 2.71 \n",
"\n",
" ingredients \\\n",
"0 [1 tablespoons yellow mustard seeds, 1 tablesp... \n",
"1 [3 pounds small-leaved bulk spinach, Salt, 1/2... \n",
"2 [3 1/2 cups all-purpose flour, 1 tablespoon ba... \n",
"3 [1 small ripe avocado, preferably Hass (see No... \n",
"4 [2 pounds fresh tomatoes, unpeeled and cut in ... \n",
"\n",
" prepSteps reviewsCount \\\n",
"0 [Toast mustard and coriander seeds in a dry me... 7 \n",
"1 [Remove the stems and roots from the spinach. ... 5 \n",
"2 [I like this cake best baked in a 9-inch angel... 105 \n",
"3 [A short time before serving, mash avocado and... 7 \n",
"4 [1. Place the tomatoes, garlic, salt, paprika,... 7 \n",
"\n",
" willMakeAgainPct dateCrawled \n",
"0 100 1498547035 \n",
"1 80 1498547740 \n",
"2 88 1498547738 \n",
"3 100 1498547740 \n",
"4 83 1498547740 \n",
"(34756, 15)\n"
"ename": "NameError",
"evalue": "name 'stanza' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# instantiate stanza pipeline\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mstanza\u001b[49m\u001b[38;5;241m.\u001b[39mdownload(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124men\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3\u001b[0m nlp \u001b[38;5;241m=\u001b[39m stanza\u001b[38;5;241m.\u001b[39mPipeline(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124men\u001b[39m\u001b[38;5;124m'\u001b[39m, \n\u001b[1;32m 4\u001b[0m depparse_batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m, \n\u001b[1;32m 5\u001b[0m depparse_min_length_to_batch_separately\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 8\u001b[0m batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m\n\u001b[1;32m 9\u001b[0m )\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# load raw data and preprocess/clean\u001b[39;00m\n",
"\u001b[0;31mNameError\u001b[0m: name 'stanza' is not defined"
]
}
],
Expand Down
Loading

0 comments on commit 89fc77a

Please sign in to comment.