diff --git a/libs/vertexai/langchain_google_vertexai/embeddings.py b/libs/vertexai/langchain_google_vertexai/embeddings.py
index 3bf658f4..1d0ff524 100644
--- a/libs/vertexai/langchain_google_vertexai/embeddings.py
+++ b/libs/vertexai/langchain_google_vertexai/embeddings.py
@@ -28,6 +28,7 @@
 )
 
 from langchain_google_vertexai._base import _VertexAICommon
+from langchain_google_vertexai._image_utils import ImageBytesLoader
 from langchain_google_vertexai._utils import get_user_agent
 
 logger = logging.getLogger(__name__)
@@ -396,12 +397,15 @@ def embed_query(self, text: str) -> List[float]:
         """
         return self.embed([text], 1, "RETRIEVAL_QUERY")[0]
 
-    def embed_image(self, image_path: str) -> List[float]:
+    def embed_image(
+        self, image_path: str, contextual_text: Optional[str] = None
+    ) -> List[float]:
         """Embed an image.
 
         Args:
-            image_path: Path to image (local or Google Cloud Storage) to generate
+            image_path: Path to image (local, Google Cloud Storage or web) to generate
             embeddings for.
+            contextual_text: Text to generate embeddings for.
 
         Returns:
             Embedding for the image.
@@ -409,8 +413,10 @@ def embed_image(self, image_path: str) -> List[float]:
         if self.model_type != GoogleEmbeddingModelType.MULTIMODAL:
             raise NotImplementedError("Only supported for multimodal models")
 
-        image = Image.load_from_file(image_path)
+        image_loader = ImageBytesLoader()
+        bytes_image = image_loader.load_bytes(image_path)
+        image = Image(bytes_image)
         result: MultiModalEmbeddingResponse = self.instance[
             "get_embeddings_with_retry"
-        ](image=image)
+        ](image=image, contextual_text=contextual_text)
         return result.image_embedding