Skip to content

Commit

Permalink
Merge pull request #9213 from madhavajay/madhava/fix_bigquery_l2
Browse files Browse the repository at this point in the history
Fixing bigquery scenario notebooks
  • Loading branch information
koenvanderveen authored Aug 27, 2024
2 parents 16edc38 + 429143f commit 1f087e0
Show file tree
Hide file tree
Showing 14 changed files with 175 additions and 162 deletions.
4 changes: 2 additions & 2 deletions notebooks/api/0.8/03-data-scientist-download-result.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@
},
"outputs": [],
"source": [
"assert not ops.is_valid\n",
"assert ops.count().unwrap() > 0"
"assert not ops.is_valid()\n",
"assert ops.count() > 0"
]
},
{
Expand Down
14 changes: 1 addition & 13 deletions notebooks/scenarios/bigquery/01-setup-datasite.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -257,13 +257,6 @@
"# !docker image ls | grep bigquery"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -273,11 +266,6 @@
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
Expand All @@ -288,7 +276,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.12.5"
}
},
"nbformat": 4,
Expand Down
91 changes: 49 additions & 42 deletions notebooks/scenarios/bigquery/02-configure-api.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
" from google.oauth2 import service_account\n",
"\n",
" # syft absolute\n",
" from syft.service.response import SyftError\n",
" from syft import SyftException\n",
"\n",
" # Auth for Bigquer based on the workload identity\n",
" credentials = service_account.Credentials.from_service_account_info(\n",
Expand All @@ -118,8 +118,8 @@
" )\n",
"\n",
" if rows.total_rows > 1_000_000:\n",
" return SyftError(\n",
" message=\"Please only write queries that gather aggregate statistics\"\n",
" raise SyftException(\n",
" public_message=\"Please only write queries that gather aggregate statistics\"\n",
" )\n",
"\n",
" return rows.to_dataframe()\n",
Expand All @@ -129,10 +129,9 @@
" # not a bigquery exception\n",
" if not hasattr(e, \"_errors\"):\n",
" output = f\"got exception e: {type(e)} {str(e)}\"\n",
" return SyftError(\n",
" message=f\"An error occured executing the API call {output}\"\n",
" raise SyftException(\n",
" public_message=f\"An error occured executing the API call {output}\"\n",
" )\n",
" # return SyftError(message=\"An error occured executing the API call, please contact the domain owner.\")\n",
"\n",
" if e._errors[0][\"reason\"] in [\n",
" \"badRequest\",\n",
Expand All @@ -150,12 +149,13 @@
" \"tableUnavailable\",\n",
" \"timeout\",\n",
" ]:\n",
" return SyftError(\n",
" message=\"Error occured during the call: \" + e._errors[0][\"message\"]\n",
" raise SyftException(\n",
" public_message=\"Error occured during the call: \"\n",
" + e._errors[0][\"message\"]\n",
" )\n",
" else:\n",
" return SyftError(\n",
" message=\"An error occured executing the API call, please contact the domain owner.\"\n",
" raise SyftException(\n",
" public_message=\"An error occured executing the API call, please contact the domain owner.\"\n",
" )"
]
},
Expand Down Expand Up @@ -210,7 +210,7 @@
" from google.oauth2 import service_account\n",
"\n",
" # syft absolute\n",
" from syft.service.response import SyftError\n",
" from syft import SyftException\n",
"\n",
" # Auth for Bigquer based on the workload identity\n",
" credentials = service_account.Credentials.from_service_account_info(\n",
Expand All @@ -230,7 +230,9 @@
" context.state[context.user.email] = []\n",
"\n",
" if not context.code.is_within_rate_limit(context):\n",
" return SyftError(message=\"Rate limit of calls per minute has been reached.\")\n",
" raise SyftException(\n",
" public_message=\"Rate limit of calls per minute has been reached.\"\n",
" )\n",
"\n",
" try:\n",
" context.state[context.user.email].append(datetime.datetime.now())\n",
Expand All @@ -241,8 +243,8 @@
" )\n",
"\n",
" if rows.total_rows > 1_000_000:\n",
" return SyftError(\n",
" message=\"Please only write queries that gather aggregate statistics\"\n",
" raise SyftException(\n",
" public_message=\"Please only write queries that gather aggregate statistics\"\n",
" )\n",
"\n",
" return rows.to_dataframe()\n",
Expand All @@ -251,10 +253,9 @@
" # not a bigquery exception\n",
" if not hasattr(e, \"_errors\"):\n",
" output = f\"got exception e: {type(e)} {str(e)}\"\n",
" return SyftError(\n",
" message=f\"An error occured executing the API call {output}\"\n",
" raise SyftException(\n",
" public_message=f\"An error occured executing the API call {output}\"\n",
" )\n",
" # return SyftError(message=\"An error occured executing the API call, please contact the domain owner.\")\n",
"\n",
" # Treat all errors that we would like to be forwarded to the data scientists\n",
" # By default, any exception is only visible to the data owner.\n",
Expand All @@ -275,12 +276,13 @@
" \"tableUnavailable\",\n",
" \"timeout\",\n",
" ]:\n",
" return SyftError(\n",
" message=\"Error occured during the call: \" + e._errors[0][\"message\"]\n",
" raise SyftException(\n",
" public_message=\"Error occured during the call: \"\n",
" + e._errors[0][\"message\"]\n",
" )\n",
" else:\n",
" return SyftError(\n",
" message=\"An error occured executing the API call, please contact the domain owner.\"\n",
" raise SyftException(\n",
" public_message=\"An error occured executing the API call, please contact the domain owner.\"\n",
" )"
]
},
Expand Down Expand Up @@ -401,7 +403,7 @@
" import pandas as pd\n",
"\n",
" # syft absolute\n",
" from syft.service.response import SyftError\n",
" from syft import SyftException\n",
"\n",
" # Auth for Bigquer based on the workload identity\n",
" credentials = service_account.Credentials.from_service_account_info(\n",
Expand All @@ -420,7 +422,9 @@
" context.state[context.user.email] = []\n",
"\n",
" if not context.code.is_within_rate_limit(context):\n",
" return SyftError(message=\"Rate limit of calls per minute has been reached.\")\n",
" raise SyftException(\n",
" public_message=\"Rate limit of calls per minute has been reached.\"\n",
" )\n",
"\n",
" try:\n",
" context.state[context.user.email].append(datetime.datetime.now())\n",
Expand Down Expand Up @@ -452,14 +456,13 @@
" # not a bigquery exception\n",
" if not hasattr(e, \"_errors\"):\n",
" output = f\"got exception e: {type(e)} {str(e)}\"\n",
" return SyftError(\n",
" message=f\"An error occured executing the API call {output}\"\n",
" raise SyftException(\n",
" public_message=f\"An error occured executing the API call {output}\"\n",
" )\n",
" # return SyftError(message=\"An error occured executing the API call, please contact the domain owner.\")\n",
"\n",
" # Should add appropriate error handling for what should be exposed to the data scientists.\n",
" return SyftError(\n",
" message=\"An error occured executing the API call, please contact the domain owner.\"\n",
" raise SyftException(\n",
" public_message=\"An error occured executing the API call, please contact the domain owner.\"\n",
" )\n",
"\n",
"\n",
Expand Down Expand Up @@ -511,8 +514,6 @@
" return res\n",
"\n",
" request = context.user_client.code.request_code_execution(execute_query)\n",
" if isinstance(request, sy.SyftError):\n",
" return request\n",
" context.admin_client.requests.set_tags(request, [\"autosync\"])\n",
"\n",
" return (\n",
Expand Down Expand Up @@ -626,11 +627,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Test mock version for wrong queries\n",
"result = high_client.api.services.bigquery.test_query.mock(\n",
" sql_query=\"SELECT * FROM invalid_table LIMIT 1\"\n",
")\n",
"result"
"# todo can we clean up the duplicate exception messages?"
]
},
{
Expand All @@ -639,7 +636,13 @@
"metadata": {},
"outputs": [],
"source": [
"assert isinstance(result, sy.SyftError)"
"# Test mock version for wrong queries\n",
"with sy.raises(\n",
" sy.SyftException(public_message=\"*must be qualified with a dataset*\"), show=True\n",
"):\n",
" _ = high_client.api.services.bigquery.test_query.mock(\n",
" sql_query=\"SELECT * FROM invalid_table LIMIT 1\"\n",
" )"
]
},
{
Expand Down Expand Up @@ -675,6 +678,15 @@
"state"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# todo fix catch exceptions in code eval so that state can be written"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -753,11 +765,6 @@
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
Expand All @@ -768,7 +775,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
"version": "3.12.5"
}
},
"nbformat": 4,
Expand Down
37 changes: 12 additions & 25 deletions notebooks/scenarios/bigquery/03-ds-submit-request.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,10 @@
"metadata": {},
"outputs": [],
"source": [
"result = api_method()\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"assert isinstance(result, sy.SyftError)\n",
"assert \"waiting for approval\" in str(result)"
"with sy.raises(\n",
" sy.SyftException(public_message=\"*Your code is waiting for approval*\"), show=True\n",
"):\n",
" result = api_method()"
]
},
{
Expand All @@ -167,7 +159,7 @@
"source": [
"FUNC_NAME = \"large_sample\"\n",
"LARGE_SAMPLE_QUERY = (\n",
" f\"SELECT * FROM {test_settings.dataset_2}.{test_settings.table_2} LIMIT 1000000\"\n",
" f\"SELECT * FROM {test_settings.dataset_2}.{test_settings.table_2} LIMIT 10000\"\n",
")"
]
},
Expand Down Expand Up @@ -217,7 +209,10 @@
"metadata": {},
"outputs": [],
"source": [
"result = api_method_2()"
"with sy.raises(\n",
" sy.SyftException(public_message=\"*Your code is waiting for approval*\"), show=True\n",
"):\n",
" result = api_method_2()"
]
},
{
Expand All @@ -226,26 +221,18 @@
"metadata": {},
"outputs": [],
"source": [
"assert isinstance(result, sy.SyftError)\n",
"assert \"waiting for approval\" in str(result)"
"server.land()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"server.land()"
]
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
Expand All @@ -256,7 +243,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.12.5"
}
},
"nbformat": 4,
Expand Down
16 changes: 9 additions & 7 deletions notebooks/scenarios/bigquery/04-do-review-requests.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
"metadata": {},
"outputs": [],
"source": [
"assert len(result) == 1"
"assert len(result) == 10000"
]
},
{
Expand Down Expand Up @@ -243,14 +243,16 @@
"source": [
"server.land()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
Expand All @@ -261,7 +263,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.12.5"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 1f087e0

Please sign in to comment.