From c6a57fd3114f695590eabd8553838d3cfabb6c5a Mon Sep 17 00:00:00 2001 From: niekveldhuis Date: Fri, 9 Feb 2024 16:32:25 -0800 Subject: [PATCH] SSL related issues --- .../2_1_0_download_ORACC-JSON.ipynb | 106 +++--------------- utils/utils.py | 1 + 2 files changed, 17 insertions(+), 90 deletions(-) diff --git a/2_1_Data_Acquisition_ORACC/2_1_0_download_ORACC-JSON.ipynb b/2_1_Data_Acquisition_ORACC/2_1_0_download_ORACC-JSON.ipynb index e905aee..a05abfb 100644 --- a/2_1_Data_Acquisition_ORACC/2_1_0_download_ORACC-JSON.ipynb +++ b/2_1_Data_Acquisition_ORACC/2_1_0_download_ORACC-JSON.ipynb @@ -47,14 +47,17 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import requests\n", "from tqdm.auto import tqdm\n", "import os\n", - "import ipywidgets as widgets" + "import ipywidgets as widgets\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\", message=\n", + " \"Unverified HTTPS request is being made to host\")" ] }, { @@ -67,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -85,24 +88,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4444d379ecb3488691395c05a8133387", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Textarea(value='', description='Projects:', placeholder='Type project names, separated by commas')" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "projects = widgets.Textarea(\n", " placeholder='Type project names, separated by commas',\n", @@ -122,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -143,28 +131,20 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "tags": [ "remove_output" ] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING: http://oracc.museum.upenn.edu/json/.zip does not exist.\n" - ] - } - ], + "outputs": [], "source": [ "CHUNK = 1024\n", "for project in project_list:\n", " proj = project.replace('/', '-')\n", " url = f\"http://oracc.museum.upenn.edu/json/{proj}.zip\"\n", " file = f'jsonzip/{proj}.zip'\n", - " with requests.get(url, stream=True) as request:\n", + " with requests.get(url, stream=True, verify=False) as request:\n", " if request.status_code == 200: # meaning that the file exists\n", " total_size = int(request.headers.get('content-length', 0))\n", " tqdm.write(f'Saving {url} as {file}')\n", @@ -181,7 +161,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "(2.1.0.5)=\n", "## 2.1.0.5 Downloading with the utils Module\n", "In the chapters 3-6, downloading of [ORACC](http://oracc.org) data will be done with the `oracc_download()` function in the module `utils` that can be found in the `utils` directory. The following code illustrates how to use that function. \n", "\n", @@ -196,66 +175,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "tags": [ "remove_output" ] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Saving http://build-oracc.museum.upenn.edu/json/dcclt.zip as jsonzip/dcclt.zip.\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bf797de366124d65822fd4b384d6102a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "dcclt: 0%| | 0.00/71.6M [00:00