From 41c17015737304c413a68904a5a1d0824f696662 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Wed, 18 Oct 2023 22:58:47 +0800 Subject: [PATCH] Fix pandas-cookbook notebooks --- .../01-reading-from-a-csv.ipynb | 16 +++-------- ...lecting-data-finding-common-complain.ipynb | 20 ++++--------- ...orough-has-the-most-noise-complaints.ipynb | 28 +++++++------------ ...-weekday-bike-most-groupby-aggregate.ipynb | 16 +++-------- ...ing-dataframes-scraping-weather-data.ipynb | 18 ++++++------ ...rations-which-month-was-the-snowiest.ipynb | 17 ++++++----- .../07-cleaning-up-messy-data.ipynb | 18 ++++-------- .../08-how-to-deal-with-timestamps.ipynb | 16 +++-------- 8 files changed, 51 insertions(+), 98 deletions(-) diff --git a/notebooks/tutorials/pandas-cookbook/01-reading-from-a-csv.ipynb b/notebooks/tutorials/pandas-cookbook/01-reading-from-a-csv.ipynb index ce844ee7f47..4a8e63dd094 100644 --- a/notebooks/tutorials/pandas-cookbook/01-reading-from-a-csv.ipynb +++ b/notebooks/tutorials/pandas-cookbook/01-reading-from-a-csv.ipynb @@ -510,7 +510,7 @@ "new_project = sy.Project(\n", " name=\"My pandas project 1\",\n", " description=\"Hi, I would like to plot the Berri 1 column.\",\n", - " members=[guest_domain_client],\n", + " members=[guest_client],\n", ")\n", "new_project" ] @@ -538,7 +538,7 @@ }, "outputs": [], "source": [ - "project.create_code_request(get_column, guest_domain_client)" + "project.create_code_request(get_column, guest_client)" ] }, { @@ -550,7 +550,7 @@ }, "outputs": [], "source": [ - "assert len(guest_domain_client.code.get_all())==1" + "assert len(guest_client.code.get_all())==1" ] }, { @@ -821,14 +821,6 @@ "source": [ "node.land()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "32ab7e8e", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -847,7 +839,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.5" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/pandas-cookbook/02-selecting-data-finding-common-complain.ipynb b/notebooks/tutorials/pandas-cookbook/02-selecting-data-finding-common-complain.ipynb index 2b75c070ea3..ecb945878a1 100644 --- a/notebooks/tutorials/pandas-cookbook/02-selecting-data-finding-common-complain.ipynb +++ b/notebooks/tutorials/pandas-cookbook/02-selecting-data-finding-common-complain.ipynb @@ -209,9 +209,9 @@ " \"X Coordinate (State Plane)\": lambda x: randint(1,1000000),\n", " \"Y Coordinate (State Plane)\": lambda x: randint(1,1000000),\n", " \"Complaint Type\": lambda x: random.choice([\"Illegal Parking\", \"Noise - Street/Sidewalk\", \"'Animal in a Park'\"]),\n", - " \"Descriptor\": lambda x: random.choice([ 'Branch or Limb Has Fallen Down','Branches Damaged','Broken Fence', 'Broken Glass']),\n", - " \"School Number\": lambda x: random.choice([ 'B073', 'B077', 'B079', 'B080-01', 'B087', 'B099', 'B100', 'B102', 'B109', 'B111']),\n", - " \"Bridge Highway Segment\": lambda x: random.choice([ 'Grand Central Pkwy (Exit 1 E-W)',\n", + " \"Descriptor\": lambda x: random.choice(['Branch or Limb Has Fallen Down','Branches Damaged','Broken Fence', 'Broken Glass']),\n", + " \"School Number\": lambda x: random.choice(['B073', 'B077', 'B079', 'B080-01', 'B087', 'B099', 'B100', 'B102', 'B109', 'B111']),\n", + " \"Bridge Highway Segment\": lambda x: random.choice(['Grand Central Pkwy (Exit 1 E-W)',\n", " 'Grand Central Pkwy (Exit 10) - 69th Rd-Jewel Ave (Exit 11)',\n", " 'GrandCentral Pkwy/VanWyck Expwy/College Point Blvd (Exit 22 A-E)',\n", " 'Hamilton Ave (Exit 2A) - Gowanus Expwy (I-278) (Exit 1)',\n", @@ -698,7 +698,7 @@ "new_project = sy.Project(\n", " name=\"Pandas chapter 2\",\n", " description=\"Hi, I would like to plot the histogram of the complaint types.\",\n", - " members=[guest_domain_client],\n", + " members=[guest_client],\n", ")\n", "new_project" ] @@ -726,7 +726,7 @@ }, "outputs": [], "source": [ - "project.create_code_request(get_counts, guest_domain_client)" + "project.create_code_request(get_counts, guest_client)" ] }, { @@ -738,7 +738,7 @@ }, "outputs": [], "source": [ - "assert len(guest_domain_client.code.get_all())==1" + "assert len(guest_client.code.get_all()) == 1" ] }, { @@ -974,14 +974,6 @@ "source": [ "node.land()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "247f8f48-7281-4dd5-a6fe-702ed46891cb", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/notebooks/tutorials/pandas-cookbook/03-which-borough-has-the-most-noise-complaints.ipynb b/notebooks/tutorials/pandas-cookbook/03-which-borough-has-the-most-noise-complaints.ipynb index 2f3e20955be..a0ff4c3f34d 100644 --- a/notebooks/tutorials/pandas-cookbook/03-which-borough-has-the-most-noise-complaints.ipynb +++ b/notebooks/tutorials/pandas-cookbook/03-which-borough-has-the-most-noise-complaints.ipynb @@ -95,9 +95,9 @@ "# Make the graphs a bit prettier, and bigger\n", "plt.style.use('ggplot')\n", "\n", - "# This is necessary to show lots of columns in pandas 0.12. \n", + "# This is necessary to show lots of columns in pandas 0.12.\n", "# Not necessary in pandas 0.13.\n", - "pd.set_option('display.width', 5000) \n", + "pd.set_option('display.width', 5000)\n", "pd.set_option('display.max_columns', 60)\n", "\n", "plt.rcParams['figure.figsize'] = (15, 5)" @@ -222,9 +222,9 @@ " \"X Coordinate (State Plane)\": lambda x: randint(1,1000000),\n", " \"Y Coordinate (State Plane)\": lambda x: randint(1,1000000),\n", " \"Complaint Type\": lambda x: random.choice([\"Illegal Parking\", \"Noise - Street/Sidewalk\", \"'Animal in a Park'\"]),\n", - " \"Descriptor\": lambda x: random.choice([ 'Branch or Limb Has Fallen Down','Branches Damaged','Broken Fence', 'Broken Glass']),\n", - " \"School Number\": lambda x: random.choice([ 'B073', 'B077', 'B079', 'B080-01', 'B087', 'B099', 'B100', 'B102', 'B109', 'B111']),\n", - " \"Bridge Highway Segment\": lambda x: random.choice([ 'Grand Central Pkwy (Exit 1 E-W)',\n", + " \"Descriptor\": lambda x: random.choice(['Branch or Limb Has Fallen Down','Branches Damaged','Broken Fence', 'Broken Glass']),\n", + " \"School Number\": lambda x: random.choice(['B073', 'B077', 'B079', 'B080-01', 'B087', 'B099', 'B100', 'B102', 'B109', 'B111']),\n", + " \"Bridge Highway Segment\": lambda x: random.choice(['Grand Central Pkwy (Exit 1 E-W)',\n", " 'Grand Central Pkwy (Exit 10) - 69th Rd-Jewel Ave (Exit 11)',\n", " 'GrandCentral Pkwy/VanWyck Expwy/College Point Blvd (Exit 22 A-E)',\n", " 'Hamilton Ave (Exit 2A) - Gowanus Expwy (I-278) (Exit 1)',\n", @@ -265,7 +265,7 @@ "mock_data = dict()\n", "for col in complaints.columns:\n", " col_vals = complaints[col]\n", - " \n", + "\n", " if col in fake_functions:\n", " mock_func = fake_functions[col]\n", " elif len(set(complaints[col])) < 100:\n", @@ -810,7 +810,7 @@ "new_project = sy.Project(\n", " name=\"Pandas chapter 3\",\n", " description=\"Hi, I would like to plot the histogram of the noise complaint counts per area.\",\n", - " members=[guest_domain_client],\n", + " members=[guest_client],\n", ")\n", "new_project" ] @@ -838,7 +838,7 @@ }, "outputs": [], "source": [ - "project.create_code_request(get_counts, guest_domain_client)" + "project.create_code_request(get_counts, guest_client)" ] }, { @@ -850,7 +850,7 @@ }, "outputs": [], "source": [ - "assert len(guest_domain_client.code.get_all())==1" + "assert len(guest_client.code.get_all())==1" ] }, { @@ -1086,14 +1086,6 @@ "source": [ "node.land()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4374c6ed-b64f-4819-aca4-5570676d6c68", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1112,7 +1104,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.5" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/pandas-cookbook/04-weekday-bike-most-groupby-aggregate.ipynb b/notebooks/tutorials/pandas-cookbook/04-weekday-bike-most-groupby-aggregate.ipynb index ed59b274c64..2f5c7182e68 100644 --- a/notebooks/tutorials/pandas-cookbook/04-weekday-bike-most-groupby-aggregate.ipynb +++ b/notebooks/tutorials/pandas-cookbook/04-weekday-bike-most-groupby-aggregate.ipynb @@ -588,7 +588,7 @@ "new_project = sy.Project(\n", " name=\"Pandas Chapter 4\",\n", " description=\"Hi, I would like to plot histogram of bikers per weekday\",\n", - " members=[guest_domain_client],\n", + " members=[guest_client],\n", ")\n", "new_project" ] @@ -616,7 +616,7 @@ }, "outputs": [], "source": [ - "project.create_code_request(get_bike_hist, guest_domain_client)" + "project.create_code_request(get_bike_hist, guest_client)" ] }, { @@ -628,7 +628,7 @@ }, "outputs": [], "source": [ - "assert len(guest_domain_client.code.get_all())==1" + "assert len(guest_client.code.get_all()) == 1" ] }, { @@ -876,14 +876,6 @@ "source": [ "node.land()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cbcd55e5", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -902,7 +894,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.5" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/pandas-cookbook/05-combining-dataframes-scraping-weather-data.ipynb b/notebooks/tutorials/pandas-cookbook/05-combining-dataframes-scraping-weather-data.ipynb index 1fc3212aaa2..519e749a82b 100644 --- a/notebooks/tutorials/pandas-cookbook/05-combining-dataframes-scraping-weather-data.ipynb +++ b/notebooks/tutorials/pandas-cookbook/05-combining-dataframes-scraping-weather-data.ipynb @@ -258,11 +258,13 @@ "for month in range(1, 13):\n", " url = url_template.format(month=month, year=2012)\n", " weather = pd.read_csv(url, parse_dates=True, index_col='Date/Time (LST)')\n", - " mock = pd.DataFrame(index=[Timestamp(get_mock_date(i)) for i in range(len(weather))],\n", - " data=[get_mock_row(i) for i in range(len(weather))],\n", - " columns=weather.columns)\n", + " mock = pd.DataFrame(\n", + " index=[Timestamp(get_mock_date(i)) for i in range(len(weather))],\n", + " data=[get_mock_row(i) for i in range(len(weather))],\n", + " columns=weather.columns\n", + " )\n", " \n", - " assets += [sy.Asset(name=f\"weather{month}\", data=weather, mock=mock, mock_is_real=False)]" + " assets.append(sy.Asset(name=f\"weather{month}\", data=weather, mock=mock, mock_is_real=False))" ] }, { @@ -793,7 +795,7 @@ "new_project = sy.Project(\n", " name=\"Pandas Chapter 5\",\n", " description=\"Hi, get all weather data for 2012\",\n", - " members=[guest_domain_client],\n", + " members=[guest_client],\n", ")\n", "new_project" ] @@ -821,7 +823,7 @@ }, "outputs": [], "source": [ - "project.create_code_request(get_2012_weather, guest_domain_client)" + "project.create_code_request(get_2012_weather, guest_client)" ] }, { @@ -833,7 +835,7 @@ }, "outputs": [], "source": [ - "assert len(guest_domain_client.code.get_all())==1" + "assert len(guest_client.code.get_all())==1" ] }, { @@ -1175,7 +1177,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.5" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/pandas-cookbook/06-string-operations-which-month-was-the-snowiest.ipynb b/notebooks/tutorials/pandas-cookbook/06-string-operations-which-month-was-the-snowiest.ipynb index 0a639bcb2b2..7c4769191ac 100644 --- a/notebooks/tutorials/pandas-cookbook/06-string-operations-which-month-was-the-snowiest.ipynb +++ b/notebooks/tutorials/pandas-cookbook/06-string-operations-which-month-was-the-snowiest.ipynb @@ -211,8 +211,7 @@ " res = dict()\n", " for k, function in mock_functions.items():\n", " res[k] = function()\n", - " return res\n", - " " + " return res\n" ] }, { @@ -247,7 +246,7 @@ "mock = pd.DataFrame(index=[Timestamp(get_mock_date(i)) for i in range(len(weather_2012_final))],\n", " data=[get_mock_row(i) for i in range(len(weather_2012_final))],\n", " columns=weather_2012_final.columns)\n", - " \n" + "\n" ] }, { @@ -657,10 +656,10 @@ " weather_2012 = df\n", " weather_description = weather_2012['Weather']\n", " is_snowing = weather_description.str.contains('Snow')\n", - " \n", + "\n", " median_temperature = weather_2012['Temp (C)'].resample('M').apply(np.median)\n", " snow_frequency = is_snowing.astype(float).resample('M').apply(np.mean)\n", - " \n", + "\n", " temperature = weather_2012['Temp (C)'].resample('M').apply(np.median)\n", " is_snowing = weather_2012['Weather'].str.contains('Snow')\n", " snowiness = is_snowing.astype(float).resample('M').apply(np.mean)\n", @@ -670,7 +669,7 @@ " snowiness.name = \"Snowiness\"\n", " stats = pd.concat([temperature, snowiness], axis=1)\n", " stats.columns = [\"temperature\", \"snowiness\"]\n", - " \n", + "\n", " return is_snowing.astype(float), median_temperature, snow_frequency, stats" ] }, @@ -694,7 +693,7 @@ "new_project = sy.Project(\n", " name=\"Pandas Chapter 6\",\n", " description=\"Hi, I would like to get some insights about snow and temperature for 2012\",\n", - " members=[guest_domain_client],\n", + " members=[guest_client],\n", ")\n", "new_project" ] @@ -722,7 +721,7 @@ }, "outputs": [], "source": [ - "project.create_code_request(snow_and_temperature, guest_domain_client)" + "project.create_code_request(snow_and_temperature, guest_client)" ] }, { @@ -734,7 +733,7 @@ }, "outputs": [], "source": [ - "assert len(guest_domain_client.code.get_all())==1" + "assert len(guest_client.code.get_all()) == 1" ] }, { diff --git a/notebooks/tutorials/pandas-cookbook/07-cleaning-up-messy-data.ipynb b/notebooks/tutorials/pandas-cookbook/07-cleaning-up-messy-data.ipynb index 92490b2ba40..89917c1f4a7 100644 --- a/notebooks/tutorials/pandas-cookbook/07-cleaning-up-messy-data.ipynb +++ b/notebooks/tutorials/pandas-cookbook/07-cleaning-up-messy-data.ipynb @@ -360,7 +360,7 @@ }, "outputs": [], "source": [ - "ds = guest_domain_client.datasets[0]" + "ds = guest_client.datasets[0]" ] }, { @@ -740,7 +740,7 @@ "new_project = sy.Project(\n", " name=\"Pandas Chapter 7\",\n", " description=\"Hi, I would like to get some insights about the zip codes of the complaints\",\n", - " members=[guest_domain_client],\n", + " members=[guest_client],\n", ")\n", "new_project" ] @@ -768,7 +768,7 @@ }, "outputs": [], "source": [ - "project.create_code_request(zip_codes, guest_domain_client)" + "project.create_code_request(zip_codes, guest_client)" ] }, { @@ -780,7 +780,7 @@ }, "outputs": [], "source": [ - "assert len(guest_domain_client.code.get_all())==1" + "assert len(guest_client.code.get_all()) == 1" ] }, { @@ -1064,14 +1064,6 @@ "source": [ "node.land()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cb11ce89", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1090,7 +1082,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.5" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/pandas-cookbook/08-how-to-deal-with-timestamps.ipynb b/notebooks/tutorials/pandas-cookbook/08-how-to-deal-with-timestamps.ipynb index 60387e53d39..a9945663fef 100644 --- a/notebooks/tutorials/pandas-cookbook/08-how-to-deal-with-timestamps.ipynb +++ b/notebooks/tutorials/pandas-cookbook/08-how-to-deal-with-timestamps.ipynb @@ -677,7 +677,7 @@ "new_project = sy.Project(\n", " name=\"Pandas Chapter 8\",\n", " description=\"Hi, I would like to get some insights about the installed programs\",\n", - " members=[guest_domain_client],\n", + " members=[guest_client],\n", ")\n", "new_project" ] @@ -705,7 +705,7 @@ }, "outputs": [], "source": [ - "project.create_code_request(find_recently_installed, guest_domain_client)" + "project.create_code_request(find_recently_installed, guest_client)" ] }, { @@ -717,7 +717,7 @@ }, "outputs": [], "source": [ - "assert len(guest_domain_client.code.get_all())==1" + "assert len(guest_client.code.get_all()) == 1" ] }, { @@ -979,14 +979,6 @@ "source": [ "node.land()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0b4dc278-0a7f-4d01-b660-82aed6419fcc", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1005,7 +997,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.5" }, "toc": { "base_numbering": 1,