diff --git a/train_and_deploy/configs/deployer_config.yaml b/train_and_deploy/configs/deployer_config.yaml
index c24231b2..cd575ec2 100644
--- a/train_and_deploy/configs/deployer_config.yaml
+++ b/train_and_deploy/configs/deployer_config.yaml
@@ -40,3 +40,6 @@ model:
 extra:
   notify_on_failure: True
 
+
+parameters:
+  target_env: staging
diff --git a/train_and_deploy/configs/inference_config.yaml b/train_and_deploy/configs/inference_config.yaml
index d6f1be75..1a46bd01 100644
--- a/train_and_deploy/configs/inference_config.yaml
+++ b/train_and_deploy/configs/inference_config.yaml
@@ -39,3 +39,5 @@ model:
 extra:
   notify_on_failure: True
 
+parameters:
+  target_env: staging
\ No newline at end of file
diff --git a/train_and_deploy/pipelines/batch_inference.py b/train_and_deploy/pipelines/batch_inference.py
index 3b7f5015..08d8d0fb 100644
--- a/train_and_deploy/pipelines/batch_inference.py
+++ b/train_and_deploy/pipelines/batch_inference.py
@@ -33,7 +33,9 @@
 
 
 @pipeline(on_failure=notify_on_failure)
-def gitguarden_batch_inference():
+def gitguarden_batch_inference(
+    target_env: str,
+):
     """
     Model batch inference pipeline.
 
@@ -66,6 +68,7 @@ def gitguarden_batch_inference():
     ########## Inference stage  ##########
     inference_predict(
         dataset_inf=df_inference,
+        target_env=target_env,
         after=["drift_quality_gate"],
     )
 
diff --git a/train_and_deploy/pipelines/local_deployment.py b/train_and_deploy/pipelines/local_deployment.py
index 0f0fffa6..dd3a4f36 100644
--- a/train_and_deploy/pipelines/local_deployment.py
+++ b/train_and_deploy/pipelines/local_deployment.py
@@ -21,9 +21,10 @@
 
 
 @pipeline(on_failure=notify_on_failure, enable_cache=False)
-def gitguarden_local_deployment():
-    """
-    Model deployment pipeline.
+def gitguarden_local_deployment(
+    target_env: str,
+):
+    """Model deployment pipeline.
 
     This is a pipeline deploys trained model for future inference.
     """
@@ -33,7 +34,7 @@ def gitguarden_local_deployment():
     ########## Deployment stage ##########
     # Get the production model artifact
     bento = bento_builder()
-    deployment_deploy(bento=bento)
+    deployment_deploy(bento=bento, target_env=target_env)
 
     notify_on_success(after=["deployment_deploy"])
     ### YOUR CODE ENDS HERE ###
diff --git a/train_and_deploy/service.py b/train_and_deploy/service.py
index b43c0d07..83170eab 100644
--- a/train_and_deploy/service.py
+++ b/train_and_deploy/service.py
@@ -11,4 +11,4 @@
 
 @svc.api(input=input_spec, output=NumpyNdarray())
 async def predict(input_arr):
-    return await gitguarden_runner.predict.async_run(input_arr)
+    return await gitguarden_runner.predict.async_run(input_arr)
\ No newline at end of file
diff --git a/train_and_deploy/steps/deployment/deployment_deploy.py b/train_and_deploy/steps/deployment/deployment_deploy.py
index 5fbe1144..3cd109c9 100644
--- a/train_and_deploy/steps/deployment/deployment_deploy.py
+++ b/train_and_deploy/steps/deployment/deployment_deploy.py
@@ -1,100 +1,60 @@
-# Apache Software License 2.0
+#  Copyright (c) ZenML GmbH 2022. All Rights Reserved.
 #
-# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#       https://www.apache.org/licenses/LICENSE-2.0
 #
-
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
 
 from typing import Optional
 
 from bentoml._internal.bento import bento
-from typing_extensions import Annotated
-from zenml import (
-    ArtifactConfig,
-    Model,
-    get_step_context,
-    log_artifact_metadata,
-    step,
-)
+from zenml import get_step_context, step
 from zenml.client import Client
-from zenml.integrations.bentoml.services.bentoml_container_deployment import (
-    BentoMLContainerDeploymentService,
+from zenml.integrations.bentoml.services.bentoml_local_deployment import (
+    BentoMLLocalDeploymentConfig,
+    BentoMLLocalDeploymentService,
 )
-from zenml.integrations.bentoml.services.deployment_type import (
-    BentoMLDeploymentType,
-)
-from zenml.integrations.bentoml.steps import bentoml_model_deployer_step
 from zenml.logger import get_logger
+from zenml.utils import source_utils
 
 logger = get_logger(__name__)
 
+
 @step
 def deployment_deploy(
     bento: bento.Bento,
-) -> (
-    Annotated[
-        Optional[BentoMLContainerDeploymentService],
-        ArtifactConfig(name="bentoml_deployment", is_deployment_artifact=True),
-    ]
-):
-    """Predictions step.
-
-    This is an example of a predictions step that takes the data in and returns
-    predicted values.
-
-    This step is parameterized, which allows you to configure the step
-    independently of the step code, before running it in a pipeline.
-    In this example, the step can be configured to use different input data.
-    See the documentation for more information:
-
-        https://docs.zenml.io/user-guide/advanced-guide/configure-steps-pipelines
-
-    Args:
-        dataset_inf: The inference dataset.
-
-    Returns:
-        The predictions as pandas series
-    """
-    ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ###
-    if Client().active_stack.orchestrator.flavor == "local":
-        model = get_step_context().model
-
-        # deploy predictor service
-        bentoml_deployment = bentoml_model_deployer_step.entrypoint(
-            model_name=model.name,  # Name of the model
-            port=3009,  # Port to be used by the http server
-            production=True,  # Deploy the model in production mode
-            timeout=1000,
-            bento=bento,
-            deployment_type=BentoMLDeploymentType.CONTAINER,
-        )
-
-        bentoml_service = Client().get_service(name_id_or_prefix=bentoml_deployment.uuid)
-
-        log_artifact_metadata(
-            metadata={
-                "service_type": "bentoml",
-                "status": bentoml_service.state,
-                "prediction_url": bentoml_service.prediction_url,
-                "health_check_url": bentoml_service.health_check_url,
-                "model_uri": model.get_artifact(name="model").uri,
-                "bento_tag" : bentoml_service.config.get("bento_tag"),
-                "bentoml_model_image": bentoml_service.config.get("image"),
-            }
-        )
-    else:
-        logger.warning("Skipping deployment as the orchestrator is not local.")
-        bentoml_deployment = None
-    ### YOUR CODE ENDS HERE ###
-    return bentoml_deployment
\ No newline at end of file
+    target_env: str,
+) -> Optional[BentoMLLocalDeploymentService]:
+    # Deploy a model using the MLflow Model Deployer
+    zenml_client = Client()
+    step_context = get_step_context()
+    pipeline_name = step_context.pipeline.name
+    step_name = step_context.step_run.name
+    model_deployer = zenml_client.active_stack.model_deployer
+    bentoml_deployment_config = BentoMLLocalDeploymentConfig(
+        model_name=step_context.model.name,
+        model_version=target_env,
+        description="An example of deploying a model using the MLflow Model Deployer",
+        pipeline_name=pipeline_name,
+        pipeline_step_name=step_name,
+        model_uri=bento.info.labels.get("model_uri"),
+        bento_tag=str(bento.tag),
+        bento_uri=bento.info.labels.get("bento_uri"),
+        working_dir=source_utils.get_source_root(),
+        timeout=1500,
+    )
+    service = model_deployer.deploy_model(
+        config=bentoml_deployment_config,
+        service_type=BentoMLLocalDeploymentService.SERVICE_TYPE,
+    )
+    logger.info(
+        f"The deployed service info: {model_deployer.get_model_server_info(service)}"
+    )
+    return service
diff --git a/train_and_deploy/steps/inference/inference_predict.py b/train_and_deploy/steps/inference/inference_predict.py
index 99077df1..76bb017f 100644
--- a/train_and_deploy/steps/inference/inference_predict.py
+++ b/train_and_deploy/steps/inference/inference_predict.py
@@ -16,13 +16,13 @@
 #
 
 
-from typing import Optional
-
+from typing import Optional, cast
+from zenml.client import Client
 import pandas as pd
 from typing_extensions import Annotated
 from zenml import get_step_context, step
-from zenml.integrations.bentoml.services.bentoml_container_deployment import (
-    BentoMLContainerDeploymentService,
+from zenml.integrations.bentoml.services.bentoml_local_deployment import (
+    BentoMLLocalDeploymentService,
 )
 from zenml.logger import get_logger
 
@@ -32,6 +32,7 @@
 @step
 def inference_predict(
     dataset_inf: pd.DataFrame,
+    target_env: str,
 ) -> Annotated[pd.Series, "predictions"]:
     """Predictions step.
 
@@ -55,12 +56,18 @@ def inference_predict(
     model = get_step_context().model
 
     # get predictor
-    predictor_service: Optional[BentoMLContainerDeploymentService] = model.load_artifact(
-        "bentomldeployment"
+    zenml_client = Client()
+    model_deployer = zenml_client.active_stack.model_deployer
+
+    # fetch existing services with same pipeline name, step name and model name
+    existing_services = model_deployer.find_model_server(
+        model_name=model.name,
+        model_version=target_env,
     )
+    predictor_service = cast(BentoMLLocalDeploymentService, existing_services[0])
     if predictor_service is not None:
         # run prediction from service
-        predictions = predictor_service.predict(request=dataset_inf)
+        predictions = predictor_service.predict(api_endpoint="predict",data=dataset_inf)
     else:
         logger.warning(
             "Predicting from loaded model instead of deployment service "