diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 3bf08cb5c..bfe87dbdd 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -36,6 +36,6 @@ jobs: - id: test name: Test dev docker run: | - docker run --rm codiumai/pr-agent:test pytest -v + docker run --rm codiumai/pr-agent:test pytest -v tests/unittest diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml new file mode 100644 index 000000000..e49bcea39 --- /dev/null +++ b/.github/workflows/e2e_tests.yaml @@ -0,0 +1,46 @@ +name: PR-Agent E2E tests + +on: + workflow_dispatch: +# schedule: +# - cron: '0 0 * * *' # This cron expression runs the workflow every night at midnight UTC + +jobs: + pr_agent_job: + runs-on: ubuntu-latest + name: PR-Agent E2E GitHub App Test + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Setup Docker Buildx + uses: docker/setup-buildx-action@v2 + + - id: build + name: Build dev docker + uses: docker/build-push-action@v2 + with: + context: . + file: ./docker/Dockerfile + push: false + load: true + tags: codiumai/pr-agent:test + cache-from: type=gha,scope=dev + cache-to: type=gha,mode=max,scope=dev + target: test + + - id: test1 + name: E2E test github app + run: | + docker run -e GITHUB.USER_TOKEN=${{ secrets.TOKEN_GITHUB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_github_app.py + + - id: test2 + name: E2E gitlab webhook + run: | + docker run -e gitlab.PERSONAL_ACCESS_TOKEN=${{ secrets.TOKEN_GITLAB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_gitlab_webhook.py + + + - id: test3 + name: E2E bitbucket app + run: | + docker run -e BITBUCKET.USERNAME=${{ secrets.BITBUCKET_USERNAME }} -e BITBUCKET.PASSWORD=${{ secrets.BITBUCKET_PASSWORD }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_bitbucket_app.py \ No newline at end of file diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index f857e34e2..89dca60fa 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -129,6 +129,13 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: "force_timeout": get_settings().config.ai_timeout, "api_base": self.api_base, } + seed = get_settings().config.get("seed", -1) + if temperature > 0 and seed >= 0: + raise ValueError(f"Seed ({seed}) is not supported with temperature ({temperature}) > 0") + elif seed >= 0: + get_logger().info(f"Using fixed seed of {seed}") + kwargs["seed"] = seed + if self.repetition_penalty: kwargs["repetition_penalty"] = self.repetition_penalty diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 7e9d77e1e..e0a10a179 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -1,16 +1,20 @@ [config] +# models model="gpt-4-turbo-2024-04-09" model_turbo="gpt-4o" fallback_models=["gpt-4-0125-preview"] +# CLI git_provider="github" publish_output=true publish_output_progress=true verbosity_level=0 # 0,1,2 use_extra_bad_extensions=false +# Configurations use_wiki_settings_file=true use_repo_settings_file=true use_global_settings_file=true ai_timeout=120 # 2minutes +# token limits max_description_tokens = 500 max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. @@ -22,6 +26,9 @@ ai_disclaimer="" # Pro feature, full text for the AI disclaimer output_relevant_configurations=false large_patch_policy = "clip" # "clip", "skip" is_auto_command=false +# seed +seed=-1 # set positive value to fix the seed (and ensure temperature=0) +temperature=0.2 [pr_reviewer] # /review # # enable/disable features diff --git a/pr_agent/tools/pr_add_docs.py b/pr_agent/tools/pr_add_docs.py index 52d60ef94..3ec97b31c 100644 --- a/pr_agent/tools/pr_add_docs.py +++ b/pr_agent/tools/pr_add_docs.py @@ -89,8 +89,8 @@ async def _get_prediction(self, model: str): if get_settings().config.verbosity_level >= 2: get_logger().info(f"\nSystem prompt:\n{system_prompt}") get_logger().info(f"\nUser prompt:\n{user_prompt}") - response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2, - system=system_prompt, user=user_prompt) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) return response diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index ddad36c64..84e3cc8f1 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -304,8 +304,8 @@ async def _get_prediction(self, model: str, patches_diff: str) -> dict: environment = Environment(undefined=StrictUndefined) system_prompt = environment.from_string(self.pr_code_suggestions_prompt_system).render(variables) user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables) - response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2, - system=system_prompt, user=user_prompt) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) # load suggestions from the AI response data = self._prepare_pr_code_suggestions(response) diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 4345d7fc5..bf8c7205d 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -325,7 +325,7 @@ async def _get_prediction(self, model: str, patches_diff: str, prompt="pr_descri response, finish_reason = await self.ai_handler.chat_completion( model=model, - temperature=0.2, + temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt ) diff --git a/pr_agent/tools/pr_generate_labels.py b/pr_agent/tools/pr_generate_labels.py index 436947220..2657a857b 100644 --- a/pr_agent/tools/pr_generate_labels.py +++ b/pr_agent/tools/pr_generate_labels.py @@ -142,7 +142,7 @@ async def _get_prediction(self, model: str) -> str: response, finish_reason = await self.ai_handler.chat_completion( model=model, - temperature=0.2, + temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt ) diff --git a/pr_agent/tools/pr_information_from_user.py b/pr_agent/tools/pr_information_from_user.py index 035371818..e5bd2f727 100644 --- a/pr_agent/tools/pr_information_from_user.py +++ b/pr_agent/tools/pr_information_from_user.py @@ -66,8 +66,8 @@ async def _get_prediction(self, model: str): if get_settings().config.verbosity_level >= 2: get_logger().info(f"\nSystem prompt:\n{system_prompt}") get_logger().info(f"\nUser prompt:\n{user_prompt}") - response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2, - system=system_prompt, user=user_prompt) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) return response def _prepare_pr_answer(self) -> str: diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py index e26e06d59..56818e1cd 100644 --- a/pr_agent/tools/pr_line_questions.py +++ b/pr_agent/tools/pr_line_questions.py @@ -102,6 +102,6 @@ async def _get_prediction(self, model: str): print(f"\nSystem prompt:\n{system_prompt}") print(f"\nUser prompt:\n{user_prompt}") - response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2, - system=system_prompt, user=user_prompt) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) return response diff --git a/pr_agent/tools/pr_questions.py b/pr_agent/tools/pr_questions.py index 38efa13fe..8112510ea 100644 --- a/pr_agent/tools/pr_questions.py +++ b/pr_agent/tools/pr_questions.py @@ -108,12 +108,12 @@ async def _get_prediction(self, model: str): user_prompt = environment.from_string(get_settings().pr_questions_prompt.user).render(variables) if 'img_path' in variables: img_path = self.vars['img_path'] - response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2, - system=system_prompt, user=user_prompt, - img_path=img_path) + response, finish_reason = await (self.ai_handler.chat_completion + (model=model, temperature=get_settings().config.temperature, + system=system_prompt, user=user_prompt, img_path=img_path)) else: - response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2, - system=system_prompt, user=user_prompt) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt) return response def _prepare_pr_answer(self) -> str: diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index defc3696e..c689c9c9f 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -180,7 +180,7 @@ async def _get_prediction(self, model: str) -> str: response, finish_reason = await self.ai_handler.chat_completion( model=model, - temperature=0.2, + temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt ) diff --git a/pr_agent/tools/pr_update_changelog.py b/pr_agent/tools/pr_update_changelog.py index 839d18c1d..e937beeb5 100644 --- a/pr_agent/tools/pr_update_changelog.py +++ b/pr_agent/tools/pr_update_changelog.py @@ -103,8 +103,8 @@ async def _get_prediction(self, model: str): environment = Environment(undefined=StrictUndefined) system_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.system).render(variables) user_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.user).render(variables) - response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2, - system=system_prompt, user=user_prompt) + response, finish_reason = await self.ai_handler.chat_completion( + model=model, system=system_prompt, user=user_prompt, temperature=get_settings().config.temperature) return response diff --git a/tests/e2e_tests/e2e_utils.py b/tests/e2e_tests/e2e_utils.py new file mode 100644 index 000000000..5251d83e4 --- /dev/null +++ b/tests/e2e_tests/e2e_utils.py @@ -0,0 +1,35 @@ +FILE_PATH = "pr_agent/cli_pip.py" + +PR_HEADER_START_WITH = '### **User description**\nupdate cli_pip.py\n\n\n___\n\n### **PR Type**' +REVIEW_START_WITH = '## PR Reviewer Guide 🔍\n\n\n
⏱️ Estimated effort to review:' +IMPROVE_START_WITH_REGEX_PATTERN = r'^## PR Code Suggestions ✨\n\n\n\n' + +NUM_MINUTES = 5 + +NEW_FILE_CONTENT = """\ +from pr_agent import cli +from pr_agent.config_loader import get_settings + + +def main(): + # Fill in the following values + provider = "github" # GitHub provider + user_token = "..." # GitHub user token + openai_key = "ghs_afsdfasdfsdf" # Example OpenAI key + pr_url = "..." # PR URL, for example 'https://github.com/Codium-ai/pr-agent/pull/809' + command = "/improve" # Command to run (e.g. '/review', '/describe', 'improve', '/ask="What is the purpose of this PR?"') + + # Setting the configurations + get_settings().set("CONFIG.git_provider", provider) + get_settings().set("openai.key", openai_key) + get_settings().set("github.user_token", user_token) + + # Run the command. Feedback will appear in GitHub PR comments + output = cli.run_command(pr_url, command) + + print(output) + +if __name__ == '__main__': + main() +""" + diff --git a/tests/e2e_tests/test_bitbucket_app.py b/tests/e2e_tests/test_bitbucket_app.py new file mode 100644 index 000000000..47b9ca4f1 --- /dev/null +++ b/tests/e2e_tests/test_bitbucket_app.py @@ -0,0 +1,100 @@ +import hashlib +import os +import re +import time +from datetime import datetime + +import jwt +from atlassian.bitbucket import Cloud + +import requests +from requests.auth import HTTPBasicAuth + +from pr_agent.config_loader import get_settings +from pr_agent.log import setup_logger, get_logger +from tests.e2e_tests.e2e_utils import NEW_FILE_CONTENT, FILE_PATH, PR_HEADER_START_WITH, REVIEW_START_WITH, \ + IMPROVE_START_WITH_REGEX_PATTERN, NUM_MINUTES + + +log_level = os.environ.get("LOG_LEVEL", "INFO") +setup_logger(log_level) +logger = get_logger() + +def test_e2e_run_bitbucket_app(): + repo_slug = 'pr-agent-tests' + project_key = 'codiumai' + base_branch = "main" # or any base branch you want + new_branch = f"bitbucket_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}" + get_settings().config.git_provider = "bitbucket" + + try: + # Add username and password for authentication + username = get_settings().get("BITBUCKET.USERNAME", None) + password = get_settings().get("BITBUCKET.PASSWORD", None) + s = requests.Session() + s.auth = (username, password) # Use HTTP Basic Auth + bitbucket_client = Cloud(session=s) + repo = bitbucket_client.workspaces.get(workspace=project_key).repositories.get(repo_slug) + + # Create a new branch from the base branch + logger.info(f"Creating a new branch {new_branch} from {base_branch}") + source_branch = repo.branches.get(base_branch) + target_repo = repo.branches.create(new_branch,source_branch.hash) + + # Update the file content + url = (f"https://api.bitbucket.org/2.0/repositories/{project_key}/{repo_slug}/src") + files={FILE_PATH: NEW_FILE_CONTENT} + data={ + "message": "update cli_pip.py", + "branch": new_branch, + } + requests.request("POST", url, auth=HTTPBasicAuth(username, password), data=data, files=files) + + + # Create a pull request + logger.info(f"Creating a pull request from {new_branch} to {base_branch}") + pr = repo.pullrequests.create( + title=f'{new_branch}', + description="update cli_pip.py", + source_branch=new_branch, + destination_branch=base_branch + ) + + # check every 1 minute, for 5 minutes if the PR has all the tool results + for i in range(NUM_MINUTES): + logger.info(f"Waiting for the PR to get all the tool results...") + time.sleep(60) + comments = list(pr.comments()) + comments_raw = [c.raw for c in comments] + if len(comments) >= 5: # header, 3 suggestions, 1 review + valid_review = False + for comment_raw in comments_raw: + if comment_raw.startswith('## PR Reviewer Guide 🔍'): + valid_review = True + break + if valid_review: + break + else: + logger.error(f"REVIEW feedback is invalid") + raise Exception("REVIEW feedback is invalid") + else: + logger.info(f"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed") + else: + assert False, f"After {NUM_MINUTES} minutes, the PR did not get all the tool results" + + # cleanup - delete the branch + pr.decline() + repo.branches.delete(new_branch) + + # If we reach here, the test is successful + logger.info(f"Succeeded in running e2e test for Bitbucket app on the PR") + except Exception as e: + logger.error(f"Failed to run e2e test for Bitbucket app: {e}") + # delete the branch + pr.decline() + repo.branches.delete(new_branch) + assert False + + +if __name__ == '__main__': + test_e2e_run_bitbucket_app() diff --git a/tests/e2e_tests/test_github_app.py b/tests/e2e_tests/test_github_app.py new file mode 100644 index 000000000..6ecb1da8d --- /dev/null +++ b/tests/e2e_tests/test_github_app.py @@ -0,0 +1,96 @@ +import os +import re +import time +from datetime import datetime + +from pr_agent.config_loader import get_settings +from pr_agent.git_providers import get_git_provider +from pr_agent.log import setup_logger, get_logger +from tests.e2e_tests.e2e_utils import NEW_FILE_CONTENT, FILE_PATH, PR_HEADER_START_WITH, REVIEW_START_WITH, \ + IMPROVE_START_WITH_REGEX_PATTERN, NUM_MINUTES + +log_level = os.environ.get("LOG_LEVEL", "INFO") +setup_logger(log_level) +logger = get_logger() + + +def test_e2e_run_github_app(): + """ + What we want to do: + (1) open a PR in a repo 'https://github.com/Codium-ai/pr-agent-tests' + (2) wait for 5 minutes until the PR is processed by the GitHub app + (3) check that the relevant tools have been executed + """ + base_branch = "main" # or any base branch you want + new_branch = f"github_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}" + repo_url = 'Codium-ai/pr-agent-tests' + get_settings().config.git_provider = "github" + git_provider = get_git_provider()() + github_client = git_provider.github_client + repo = github_client.get_repo(repo_url) + + try: + # Create a new branch from the base branch + source = repo.get_branch(base_branch) + logger.info(f"Creating a new branch {new_branch} from {base_branch}") + repo.create_git_ref(ref=f"refs/heads/{new_branch}", sha=source.commit.sha) + + # Get the file you want to edit + file = repo.get_contents(FILE_PATH, ref=base_branch) + # content = file.decoded_content.decode() + + # Update the file content + logger.info(f"Updating the file {FILE_PATH}") + commit_message = "update cli_pip.py" + repo.update_file( + file.path, + commit_message, + NEW_FILE_CONTENT, + file.sha, + branch=new_branch + ) + + # Create a pull request + logger.info(f"Creating a pull request from {new_branch} to {base_branch}") + pr = repo.create_pull( + title=new_branch, + body="update cli_pip.py", + head=new_branch, + base=base_branch + ) + + # check every 1 minute, for 5, minutes if the PR has all the tool results + for i in range(NUM_MINUTES): + logger.info(f"Waiting for the PR to get all the tool results...") + time.sleep(60) + logger.info(f"Checking the PR {pr.html_url} after {i + 1} minute(s)") + pr.update() + pr_header_body = pr.body + comments = list(pr.get_issue_comments()) + if len(comments) == 2: + comments_body = [comment.body for comment in comments] + assert pr_header_body.startswith(PR_HEADER_START_WITH), "DESCRIBE feedback is invalid" + assert comments_body[0].startswith(REVIEW_START_WITH), "REVIEW feedback is invalid" + assert re.match(IMPROVE_START_WITH_REGEX_PATTERN, comments_body[1]), "IMPROVE feedback is invalid" + break + else: + logger.info(f"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed") + else: + assert False, f"After {NUM_MINUTES} minutes, the PR did not get all the tool results" + + # cleanup - delete the branch + logger.info(f"Deleting the branch {new_branch}") + repo.get_git_ref(f"heads/{new_branch}").delete() + + # If we reach here, the test is successful + logger.info(f"Succeeded in running e2e test for GitHub app on the PR {pr.html_url}") + except Exception as e: + logger.error(f"Failed to run e2e test for GitHub app: {e}") + # delete the branch + logger.info(f"Deleting the branch {new_branch}") + repo.get_git_ref(f"heads/{new_branch}").delete() + assert False + + +if __name__ == '__main__': + test_e2e_run_github_app() diff --git a/tests/e2e_tests/test_gitlab_webhook.py b/tests/e2e_tests/test_gitlab_webhook.py new file mode 100644 index 000000000..053bb0dc2 --- /dev/null +++ b/tests/e2e_tests/test_gitlab_webhook.py @@ -0,0 +1,91 @@ +import os +import re +import time +from datetime import datetime + +import gitlab + +from pr_agent.config_loader import get_settings +from pr_agent.git_providers import get_git_provider +from pr_agent.log import setup_logger, get_logger +from tests.e2e_tests.e2e_utils import NEW_FILE_CONTENT, FILE_PATH, PR_HEADER_START_WITH, REVIEW_START_WITH, \ + IMPROVE_START_WITH_REGEX_PATTERN, NUM_MINUTES + +log_level = os.environ.get("LOG_LEVEL", "INFO") +setup_logger(log_level) +logger = get_logger() + +def test_e2e_run_github_app(): + # GitLab setup + GITLAB_URL = "https://gitlab.com" + GITLAB_TOKEN = get_settings().gitlab.PERSONAL_ACCESS_TOKEN + gl = gitlab.Gitlab(GITLAB_URL, private_token=GITLAB_TOKEN) + repo_url = 'codiumai/pr-agent-tests' + project = gl.projects.get(repo_url) + + base_branch = "main" # or any base branch you want + new_branch = f"github_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}" + + try: + # Create a new branch from the base branch + logger.info(f"Creating a new branch {new_branch} from {base_branch}") + project.branches.create({'branch': new_branch, 'ref': base_branch}) + + # Get the file you want to edit + file = project.files.get(file_path=FILE_PATH, ref=base_branch) + # content = file.decode() + + # Update the file content + logger.info(f"Updating the file {FILE_PATH}") + commit_message = "update cli_pip.py" + file.content = NEW_FILE_CONTENT + file.save(branch=new_branch, commit_message=commit_message) + + # Create a merge request + logger.info(f"Creating a merge request from {new_branch} to {base_branch}") + mr = project.mergerequests.create({ + 'source_branch': new_branch, + 'target_branch': base_branch, + 'title': new_branch, + 'description': "update cli_pip.py" + }) + logger.info(f"Merge request created: {mr.web_url}") + + # check every 1 minute, for 5, minutes if the PR has all the tool results + for i in range(NUM_MINUTES): + logger.info(f"Waiting for the MR to get all the tool results...") + time.sleep(60) + logger.info(f"Checking the MR {mr.web_url} after {i + 1} minute(s)") + mr = project.mergerequests.get(mr.iid) + mr_header_body = mr.description + comments = mr.notes.list()[::-1] + # clean all system comments + comments = [comment for comment in comments if comment.system is False] + if len(comments) == 2: # "changed the description" is received as the first comment + comments_body = [comment.body for comment in comments] + if 'Work in progress' in comments_body[1]: + continue + assert mr_header_body.startswith(PR_HEADER_START_WITH), "DESCRIBE feedback is invalid" + assert comments_body[0].startswith(REVIEW_START_WITH), "REVIEW feedback is invalid" + assert re.match(IMPROVE_START_WITH_REGEX_PATTERN, comments_body[1]), "IMPROVE feedback is invalid" + break + else: + logger.info(f"Waiting for the MR to get all the tool results. {i + 1} minute(s) passed") + else: + assert False, f"After {NUM_MINUTES} minutes, the MR did not get all the tool results" + + # cleanup - delete the branch + logger.info(f"Deleting the branch {new_branch}") + project.branches.delete(new_branch) + + # If we reach here, the test is successful + logger.info(f"Succeeded in running e2e test for GitLab app on the MR {mr.web_url}") + except Exception as e: + logger.error(f"Failed to run e2e test for GitHub app: {e}") + logger.info(f"Deleting the branch {new_branch}") + project.branches.delete(new_branch) + assert False + + +if __name__ == '__main__': + test_e2e_run_github_app()
Category