From 0a59ddfc4367c6581d64626621803e7a060961b1 Mon Sep 17 00:00:00 2001 From: Ivan Nardini <88703814+inardini@users.noreply.github.com> Date: Tue, 17 Sep 2024 14:38:05 +0200 Subject: [PATCH] feat: update embeddings tuning notebook (#1096) # Description Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Follow the [`CONTRIBUTING` Guide](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/CONTRIBUTING.md). - [x] You are listed as the author in your notebook or README file. - [x] Your account is listed in [`CODEOWNERS`](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/.github/CODEOWNERS) for the file(s). - [x] Make your Pull Request title in the specification. - [x] Ensure the tests and linter pass (Run `nox -s format` from the repository root to format). - [x] Appropriate docs were updated (if necessary) --------- Co-authored-by: Owl Bot --- embeddings/intro_embeddings_tuning.ipynb | 36 +++++++++++++----------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/embeddings/intro_embeddings_tuning.ipynb b/embeddings/intro_embeddings_tuning.ipynb index 467a5a2206e..ac6646abefe 100644 --- a/embeddings/intro_embeddings_tuning.ipynb +++ b/embeddings/intro_embeddings_tuning.ipynb @@ -156,8 +156,8 @@ }, "outputs": [], "source": [ - "! pip3 install --upgrade --user google-cloud-aiplatform==1.48.0 google-cloud-documentai==2.26.0 google-cloud-documentai-toolbox==0.13.3a0\n", - "! pip3 install --upgrade --user langchain==0.1.16 langchain-core==0.1.44 langchain-text-splitters==0.0.1 langchain-google-community==1.0.2 gcsfs==2024.3.1 etils==1.7.0" + "! pip3 install --upgrade --user google-cloud-aiplatform google-cloud-documentai google-cloud-documentai-toolbox --quiet\n", + "! pip3 install --upgrade --user langchain langchain-core langchain-text-splitters langchain-google-community gcsfs etils --quiet" ] }, { @@ -402,10 +402,9 @@ "from google.api_core.client_options import ClientOptions\n", "from google.cloud import aiplatform, documentai\n", "from google.protobuf.json_format import MessageToDict\n", - "from langchain_community.document_loaders.blob_loaders import Blob\n", - "from langchain_community.document_loaders.parsers import DocAIParser\n", "import langchain_core\n", "from langchain_core.documents.base import Document\n", + "from langchain_google_community.docai import Blob, DocAIParser\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "import numpy as np\n", "import pandas as pd\n", @@ -517,11 +516,11 @@ " }\n", "\n", " prompt_template = \"\"\"\n", - " You are an examinator. Your task is to create one QUESTION for an exam using only.\n", + " You are an examinator. Your task is to create one QUESTION for an exam using only.\n", "\n", - " \n", + " \n", " {chunk}\n", - " \n", + " \n", "\n", " QUESTION:\n", " \"\"\"\n", @@ -568,7 +567,7 @@ " \"\"\"Get uploaded model from the pipeline job\"\"\"\n", " evaluation_task = get_task_by_name(job, task_name)\n", " upload_metadata = MessageToDict(evaluation_task.execution._pb)[\"metadata\"]\n", - " return vertexai.Model(upload_metadata[\"output:model_resource_name\"])\n", + " return aiplatform.Model(upload_metadata[\"output:model_resource_name\"])\n", "\n", "\n", "def get_training_output_dir(\n", @@ -724,7 +723,7 @@ }, "outputs": [], "source": [ - "blob = Blob(\n", + "blob = Blob.from_path(\n", " path=f\"{RAW_DATA_URI}/goog-10-k-2023.pdf\",\n", ")\n", "\n", @@ -1382,28 +1381,33 @@ }, "outputs": [], "source": [ - "import os\n", + "import shutil\n", "\n", "delete_endpoint = False\n", "delete_model = False\n", "delete_job = False\n", "delete_bucket = False\n", + "delete_tutorial = False\n", "\n", "# Delete endpoint resource\n", - "if delete_endpoint or os.getenv(\"IS_TESTING\"):\n", - " endpoint.delete()\n", + "if delete_endpoint:\n", + " endpoint.delete(force=True)\n", "\n", "# Delete model resource\n", - "if delete_model or os.getenv(\"IS_TESTING\"):\n", + "if delete_model:\n", " model.delete()\n", "\n", "# Delete pipeline job\n", - "if delete_job or os.getenv(\"IS_TESTING\"):\n", + "if delete_job:\n", " job.delete()\n", "\n", "# Delete Cloud Storage objects that were created\n", - "if delete_bucket or os.getenv(\"IS_TESTING\"):\n", - " ! gsutil -m rm -r $BUCKET_URI" + "if delete_bucket:\n", + " ! gsutil -m rm -r $BUCKET_URI\n", + "\n", + "# Delete tutorial folder\n", + "if delete_tutorial:\n", + " shutil.rmtree(str(tutorial_path))" ] } ],