Merge pull request #1063 from Codium-ai/tr/seed_and_tests

Add end-to-end tests for GitHub, GitLab, and Bitbucket apps; added seed
Codium-ai · Jul 28, 2024 · 5f5257d · 5f5257d
2 parents 3a77652 + 38c38ec
commit 5f5257d
Show file tree

Hide file tree

Showing 17 changed files with 401 additions and 19 deletions.
diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
@@ -36,6 +36,6 @@ jobs:
       - id: test
         name: Test dev docker
         run: |
-          docker run --rm codiumai/pr-agent:test pytest -v
+          docker run --rm codiumai/pr-agent:test pytest -v tests/unittest
         
           
diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml
@@ -0,0 +1,46 @@
+name: PR-Agent E2E tests
+
+on:
+  workflow_dispatch:
+#  schedule:
+#    - cron: '0 0 * * *' # This cron expression runs the workflow every night at midnight UTC
+
+jobs:
+  pr_agent_job:
+    runs-on: ubuntu-latest
+    name: PR-Agent E2E GitHub App Test
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      - name: Setup Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - id: build
+        name: Build dev docker
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          file: ./docker/Dockerfile
+          push: false
+          load: true
+          tags: codiumai/pr-agent:test
+          cache-from: type=gha,scope=dev
+          cache-to: type=gha,mode=max,scope=dev
+          target: test
+
+      - id: test1
+        name: E2E test github app
+        run: |
+           docker run -e GITHUB.USER_TOKEN=${{ secrets.TOKEN_GITHUB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_github_app.py
+
+      - id: test2
+        name: E2E gitlab webhook
+        run: |
+          docker run -e gitlab.PERSONAL_ACCESS_TOKEN=${{ secrets.TOKEN_GITLAB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_gitlab_webhook.py
+
+
+      - id: test3
+        name: E2E bitbucket app
+        run: |
+          docker run -e BITBUCKET.USERNAME=${{ secrets.BITBUCKET_USERNAME }}  -e BITBUCKET.PASSWORD=${{ secrets.BITBUCKET_PASSWORD }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_bitbucket_app.py
diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -129,6 +129,13 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
                 "force_timeout": get_settings().config.ai_timeout,
                 "api_base": self.api_base,
             }
+            seed = get_settings().config.get("seed", -1)
+            if temperature > 0 and seed >= 0:
+                raise ValueError(f"Seed ({seed}) is not supported with temperature ({temperature}) > 0")
+            elif seed >= 0:
+                get_logger().info(f"Using fixed seed of {seed}")
+                kwargs["seed"] = seed
+
             if self.repetition_penalty:
                 kwargs["repetition_penalty"] = self.repetition_penalty
 

diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
@@ -1,16 +1,20 @@
 [config]
+# models
 model="gpt-4-turbo-2024-04-09"
 model_turbo="gpt-4o"
 fallback_models=["gpt-4-0125-preview"]
+# CLI
 git_provider="github"
 publish_output=true
 publish_output_progress=true
 verbosity_level=0 # 0,1,2
 use_extra_bad_extensions=false
+# Configurations
 use_wiki_settings_file=true
 use_repo_settings_file=true
 use_global_settings_file=true
 ai_timeout=120 # 2minutes
+# token limits
 max_description_tokens = 500
 max_commits_tokens = 500
 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
@@ -22,6 +26,9 @@ ai_disclaimer=""  # Pro feature, full text for the AI disclaimer
 output_relevant_configurations=false
 large_patch_policy = "clip" # "clip", "skip"
 is_auto_command=false
+# seed
+seed=-1 # set positive value to fix the seed (and ensure temperature=0)
+temperature=0.2
 
 [pr_reviewer] # /review #
 # enable/disable features

diff --git a/pr_agent/tools/pr_add_docs.py b/pr_agent/tools/pr_add_docs.py
@@ -89,8 +89,8 @@ async def _get_prediction(self, model: str):
         if get_settings().config.verbosity_level >= 2:
             get_logger().info(f"\nSystem prompt:\n{system_prompt}")
             get_logger().info(f"\nUser prompt:\n{user_prompt}")
-        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
-                                                                        system=system_prompt, user=user_prompt)
+        response, finish_reason = await self.ai_handler.chat_completion(
+            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
 
         return response
 

diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py
@@ -304,8 +304,8 @@ async def _get_prediction(self, model: str, patches_diff: str) -> dict:
         environment = Environment(undefined=StrictUndefined)
         system_prompt = environment.from_string(self.pr_code_suggestions_prompt_system).render(variables)
         user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)
-        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
-                                                                        system=system_prompt, user=user_prompt)
+        response, finish_reason = await self.ai_handler.chat_completion(
+            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
 
         # load suggestions from the AI response
         data = self._prepare_pr_code_suggestions(response)

diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py
@@ -325,7 +325,7 @@ async def _get_prediction(self, model: str, patches_diff: str, prompt="pr_descri
 
         response, finish_reason = await self.ai_handler.chat_completion(
             model=model,
-            temperature=0.2,
+            temperature=get_settings().config.temperature,
             system=system_prompt,
             user=user_prompt
         )

diff --git a/pr_agent/tools/pr_generate_labels.py b/pr_agent/tools/pr_generate_labels.py
@@ -142,7 +142,7 @@ async def _get_prediction(self, model: str) -> str:
 
         response, finish_reason = await self.ai_handler.chat_completion(
             model=model,
-            temperature=0.2,
+            temperature=get_settings().config.temperature,
             system=system_prompt,
             user=user_prompt
         )

diff --git a/pr_agent/tools/pr_information_from_user.py b/pr_agent/tools/pr_information_from_user.py
@@ -66,8 +66,8 @@ async def _get_prediction(self, model: str):
         if get_settings().config.verbosity_level >= 2:
             get_logger().info(f"\nSystem prompt:\n{system_prompt}")
             get_logger().info(f"\nUser prompt:\n{user_prompt}")
-        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
-                                                                        system=system_prompt, user=user_prompt)
+        response, finish_reason = await self.ai_handler.chat_completion(
+            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
         return response
 
     def _prepare_pr_answer(self) -> str:

diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py
@@ -102,6 +102,6 @@ async def _get_prediction(self, model: str):
             print(f"\nSystem prompt:\n{system_prompt}")
             print(f"\nUser prompt:\n{user_prompt}")
 
-        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
-                                                                        system=system_prompt, user=user_prompt)
+        response, finish_reason = await self.ai_handler.chat_completion(
+            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
         return response
diff --git a/pr_agent/tools/pr_questions.py b/pr_agent/tools/pr_questions.py
@@ -108,12 +108,12 @@ async def _get_prediction(self, model: str):
         user_prompt = environment.from_string(get_settings().pr_questions_prompt.user).render(variables)
         if 'img_path' in variables:
             img_path = self.vars['img_path']
-            response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
-                                                                            system=system_prompt, user=user_prompt,
-                                                                            img_path=img_path)
+            response, finish_reason = await (self.ai_handler.chat_completion
+                                             (model=model, temperature=get_settings().config.temperature,
+                                              system=system_prompt, user=user_prompt, img_path=img_path))
         else:
-            response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
-                                                                            system=system_prompt, user=user_prompt)
+            response, finish_reason = await self.ai_handler.chat_completion(
+                model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
         return response
 
     def _prepare_pr_answer(self) -> str:

diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
@@ -180,7 +180,7 @@ async def _get_prediction(self, model: str) -> str:
 
         response, finish_reason = await self.ai_handler.chat_completion(
             model=model,
-            temperature=0.2,
+            temperature=get_settings().config.temperature,
             system=system_prompt,
             user=user_prompt
         )

diff --git a/pr_agent/tools/pr_update_changelog.py b/pr_agent/tools/pr_update_changelog.py
@@ -103,8 +103,8 @@ async def _get_prediction(self, model: str):
         environment = Environment(undefined=StrictUndefined)
         system_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.system).render(variables)
         user_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.user).render(variables)
-        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
-                                                                        system=system_prompt, user=user_prompt)
+        response, finish_reason = await self.ai_handler.chat_completion(
+            model=model, system=system_prompt, user=user_prompt, temperature=get_settings().config.temperature)
 
         return response
 

diff --git a/tests/e2e_tests/e2e_utils.py b/tests/e2e_tests/e2e_utils.py
@@ -0,0 +1,35 @@
+FILE_PATH = "pr_agent/cli_pip.py"
+
+PR_HEADER_START_WITH = '### **User description**\nupdate cli_pip.py\n\n\n___\n\n### **PR Type**'
+REVIEW_START_WITH = '## PR Reviewer Guide 🔍\n\n<table>\n<tr><td>⏱️&nbsp;<strong>Estimated effort to review</strong>:'
+IMPROVE_START_WITH_REGEX_PATTERN = r'^## PR Code Suggestions ✨\n\n<!-- [a-z0-9]+ -->\n\n<table><thead><tr><td>Category</td>'
+
+NUM_MINUTES = 5
+
+NEW_FILE_CONTENT = """\
+from pr_agent import cli
+from pr_agent.config_loader import get_settings
+
+
+def main():
+    # Fill in the following values
+    provider = "github"  # GitHub provider
+    user_token = "..."  # GitHub user token
+    openai_key = "ghs_afsdfasdfsdf"  # Example OpenAI key
+    pr_url = "..."  # PR URL, for example 'https://github.com/Codium-ai/pr-agent/pull/809'
+    command = "/improve"  # Command to run (e.g. '/review', '/describe', 'improve', '/ask="What is the purpose of this PR?"')
+
+    # Setting the configurations
+    get_settings().set("CONFIG.git_provider", provider)
+    get_settings().set("openai.key", openai_key)
+    get_settings().set("github.user_token", user_token)
+
+    # Run the command. Feedback will appear in GitHub PR comments
+    output = cli.run_command(pr_url, command)
+
+    print(output)
+
+if __name__ == '__main__':
+    main()
+"""
+
diff --git a/tests/e2e_tests/test_bitbucket_app.py b/tests/e2e_tests/test_bitbucket_app.py
@@ -0,0 +1,100 @@
+import hashlib
+import os
+import re
+import time
+from datetime import datetime
+
+import jwt
+from atlassian.bitbucket import Cloud
+
+import requests
+from requests.auth import HTTPBasicAuth
+
+from pr_agent.config_loader import get_settings
+from pr_agent.log import setup_logger, get_logger
+from tests.e2e_tests.e2e_utils import NEW_FILE_CONTENT, FILE_PATH, PR_HEADER_START_WITH, REVIEW_START_WITH, \
+    IMPROVE_START_WITH_REGEX_PATTERN, NUM_MINUTES
+
+
+log_level = os.environ.get("LOG_LEVEL", "INFO")
+setup_logger(log_level)
+logger = get_logger()
+
+def test_e2e_run_bitbucket_app():
+    repo_slug = 'pr-agent-tests'
+    project_key = 'codiumai'
+    base_branch = "main"  # or any base branch you want
+    new_branch = f"bitbucket_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
+    get_settings().config.git_provider = "bitbucket"
+
+    try:
+        # Add username and password for authentication
+        username = get_settings().get("BITBUCKET.USERNAME", None)
+        password = get_settings().get("BITBUCKET.PASSWORD", None)
+        s = requests.Session()
+        s.auth = (username, password)  # Use HTTP Basic Auth
+        bitbucket_client = Cloud(session=s)
+        repo = bitbucket_client.workspaces.get(workspace=project_key).repositories.get(repo_slug)
+
+        # Create a new branch from the base branch
+        logger.info(f"Creating a new branch {new_branch} from {base_branch}")
+        source_branch = repo.branches.get(base_branch)
+        target_repo = repo.branches.create(new_branch,source_branch.hash)
+
+        # Update the file content
+        url = (f"https://api.bitbucket.org/2.0/repositories/{project_key}/{repo_slug}/src")
+        files={FILE_PATH: NEW_FILE_CONTENT}
+        data={
+            "message": "update cli_pip.py",
+            "branch": new_branch,
+        }
+        requests.request("POST", url, auth=HTTPBasicAuth(username, password), data=data, files=files)
+
+
+        # Create a pull request
+        logger.info(f"Creating a pull request from {new_branch} to {base_branch}")
+        pr = repo.pullrequests.create(
+            title=f'{new_branch}',
+            description="update cli_pip.py",
+            source_branch=new_branch,
+            destination_branch=base_branch
+        )
+
+        # check every 1 minute, for 5 minutes if the PR has all the tool results
+        for i in range(NUM_MINUTES):
+            logger.info(f"Waiting for the PR to get all the tool results...")
+            time.sleep(60)
+            comments = list(pr.comments())
+            comments_raw = [c.raw for c in comments]
+            if len(comments) >= 5: # header, 3 suggestions, 1 review
+                valid_review = False
+                for comment_raw in comments_raw:
+                    if comment_raw.startswith('## PR Reviewer Guide 🔍'):
+                        valid_review = True
+                        break
+                if valid_review:
+                    break
+                else:
+                    logger.error(f"REVIEW feedback is invalid")
+                    raise Exception("REVIEW feedback is invalid")
+            else:
+                logger.info(f"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed")
+        else:
+            assert False, f"After {NUM_MINUTES} minutes, the PR did not get all the tool results"
+
+        # cleanup - delete the branch
+        pr.decline()
+        repo.branches.delete(new_branch)
+
+        # If we reach here, the test is successful
+        logger.info(f"Succeeded in running e2e test for Bitbucket app on the PR")
+    except Exception as e:
+        logger.error(f"Failed to run e2e test for Bitbucket app: {e}")
+        # delete the branch
+        pr.decline()
+        repo.branches.delete(new_branch)
+        assert False
+
+
+if __name__ == '__main__':
+    test_e2e_run_bitbucket_app()