Skip to content

Commit

Permalink
SSL related issues
Browse files Browse the repository at this point in the history
  • Loading branch information
niekveldhuis committed Feb 10, 2024
1 parent 57e7ce3 commit c6a57fd
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 90 deletions.
106 changes: 16 additions & 90 deletions 2_1_Data_Acquisition_ORACC/2_1_0_download_ORACC-JSON.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,17 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from tqdm.auto import tqdm\n",
"import os\n",
"import ipywidgets as widgets"
"import ipywidgets as widgets\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\", message=\n",
" \"Unverified HTTPS request is being made to host\")"
]
},
{
Expand All @@ -67,7 +70,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -85,24 +88,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4444d379ecb3488691395c05a8133387",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Textarea(value='', description='Projects:', placeholder='Type project names, separated by commas')"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"projects = widgets.Textarea(\n",
" placeholder='Type project names, separated by commas',\n",
Expand All @@ -122,7 +110,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -143,28 +131,20 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {
"tags": [
"remove_output"
]
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING: http://oracc.museum.upenn.edu/json/.zip does not exist.\n"
]
}
],
"outputs": [],
"source": [
"CHUNK = 1024\n",
"for project in project_list:\n",
" proj = project.replace('/', '-')\n",
" url = f\"http://oracc.museum.upenn.edu/json/{proj}.zip\"\n",
" file = f'jsonzip/{proj}.zip'\n",
" with requests.get(url, stream=True) as request:\n",
" with requests.get(url, stream=True, verify=False) as request:\n",
" if request.status_code == 200: # meaning that the file exists\n",
" total_size = int(request.headers.get('content-length', 0))\n",
" tqdm.write(f'Saving {url} as {file}')\n",
Expand All @@ -181,7 +161,6 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"(2.1.0.5)=\n",
"## 2.1.0.5 Downloading with the utils Module\n",
"In the chapters 3-6, downloading of [ORACC](http://oracc.org) data will be done with the `oracc_download()` function in the module `utils` that can be found in the `utils` directory. The following code illustrates how to use that function. \n",
"\n",
Expand All @@ -196,66 +175,13 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {
"tags": [
"remove_output"
]
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saving http://build-oracc.museum.upenn.edu/json/dcclt.zip as jsonzip/dcclt.zip.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bf797de366124d65822fd4b384d6102a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"dcclt: 0%| | 0.00/71.6M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saving http://build-oracc.museum.upenn.edu/json/saao-saa01.zip as jsonzip/saao-saa01.zip.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b7871ee1fa1b4e779ca326bb296f0edc",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"saao/saa01: 0%| | 0.00/4.99M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"['dcclt', 'saao/saa01']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"import os\n",
"import sys\n",
Expand Down Expand Up @@ -285,7 +211,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -299,7 +225,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"version": "3.9.7"
},
"toc-autonumbering": false
},
Expand Down
1 change: 1 addition & 0 deletions utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import requests
from tqdm.auto import tqdm
import os
import sys
import zipfile
import json
import pandas as pd
Expand Down

0 comments on commit c6a57fd

Please sign in to comment.