From a1582b53385963d9ddbf3e2ce66af9ef1d3a81bb Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 12 Sep 2024 09:01:36 +0300 Subject: [PATCH 1/2] enhance: cap patch extra lines and update documentation with separators and context adjustments --- docs/docs/faq/index.md | 10 ++++++++- docs/docs/tools/review.md | 3 +++ .../usage-guide/additional_configurations.md | 4 ++-- pr_agent/algo/pr_processing.py | 22 ++++++++++++++++--- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/docs/docs/faq/index.md b/docs/docs/faq/index.md index 85a2a12e9..7536492e2 100644 --- a/docs/docs/faq/index.md +++ b/docs/docs/faq/index.md @@ -20,7 +20,7 @@ Read more about this issue in our [blog](https://www.codium.ai/blog/understanding-the-challenges-and-pain-points-of-the-pull-request-cycle/) - +___ ??? note "Question: I received an incorrect or irrelevant suggestion. Why?" @@ -38,11 +38,15 @@ - In addition, we recommend to use the [`extra_instructions`](https://pr-agent-docs.codium.ai/tools/improve/#extra-instructions-and-best-practices) field to guide the model to suggestions that are more relevant to the specific needs of the project. - The interactive [PR chat](https://pr-agent-docs.codium.ai/chrome-extension/) also provides an easy way to get more tailored suggestions and feedback from the AI model. +___ + ??? note "Question: How can I get more tailored suggestions?" #### Answer:3 See [here](https://pr-agent-docs.codium.ai/tools/improve/#extra-instructions-and-best-practices) for more information on how to use the `extra_instructions` and `best_practices` configuration options, to guide the model to more tailored suggestions. +___ + ??? note "Question: Will you store my code ? Are you using my code to train models?" #### Answer:4 @@ -50,6 +54,8 @@ For a detailed overview of our data privacy policy, please refer to [this link](https://pr-agent-docs.codium.ai/overview/data_privacy/) +___ + ??? note "Question: Can I use my own LLM keys with PR-Agent?" #### Answer:5 @@ -57,3 +63,5 @@ PR-Agent Pro with SaaS deployment is a hosted version of PR-Agent, where Codium AI manages the infrastructure and the keys. For enterprise customers, on-prem deployment is also available. [Contact us](https://www.codium.ai/contact/#pricing) for more information. + +___ \ No newline at end of file diff --git a/docs/docs/tools/review.md b/docs/docs/tools/review.md index 67da435a3..27af7d93d 100644 --- a/docs/docs/tools/review.md +++ b/docs/docs/tools/review.md @@ -8,6 +8,9 @@ The tool can be triggered automatically every time a new PR is [opened](../usage Note that the main purpose of the `review` tool is to provide the **PR reviewer** with useful feedbacks and insights. The PR author, in contrast, may prefer to save time and focus on the output of the [improve](./improve.md) tool, which provides actionable code suggestions. +(Read more about the different personas in the PR process and how PR-Agent aims to assist them in our [blog](https://www.codium.ai/blog/understanding-the-challenges-and-pain-points-of-the-pull-request-cycle/)) + + ## Example usage ### Manual triggering diff --git a/docs/docs/usage-guide/additional_configurations.md b/docs/docs/usage-guide/additional_configurations.md index 5eda3919e..ff7898b9d 100644 --- a/docs/docs/usage-guide/additional_configurations.md +++ b/docs/docs/usage-guide/additional_configurations.md @@ -92,8 +92,8 @@ patch_extra_lines_before=4 patch_extra_lines_after=2 ``` -Increasing this number provides more context to the model, but will also increase the token budget. -If the PR is too large (see [PR Compression strategy](https://github.com/Codium-ai/pr-agent/blob/main/PR_COMPRESSION.md)), PR-Agent automatically sets this number to 0, using the original git patch. +Increasing this number provides more context to the model, but will also increase the token budget, and may overwhelm the model with too much information, unrelated to the actual PR code changes. +If the PR is too large (see [PR Compression strategy](https://github.com/Codium-ai/pr-agent/blob/main/PR_COMPRESSION.md)), PR-Agent automatically may set this number to 0, and will use the original git patch. ## Editing the prompts diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index f4d9269db..7ca6f2268 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -23,7 +23,7 @@ OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1500 OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 1000 - +MAX_EXTRA_LINES = 10 def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, @@ -38,6 +38,12 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, else: PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after + if PATCH_EXTRA_LINES_BEFORE > MAX_EXTRA_LINES: + PATCH_EXTRA_LINES_BEFORE = MAX_EXTRA_LINES + get_logger().warning(f"patch_extra_lines_before was {PATCH_EXTRA_LINES_BEFORE}, capping to {MAX_EXTRA_LINES}") + if PATCH_EXTRA_LINES_AFTER > MAX_EXTRA_LINES: + PATCH_EXTRA_LINES_AFTER = MAX_EXTRA_LINES + get_logger().warning(f"patch_extra_lines_after was {PATCH_EXTRA_LINES_AFTER}, capping to {MAX_EXTRA_LINES}") try: diff_files_original = git_provider.get_diff_files() @@ -408,11 +414,21 @@ def get_pr_multi_diffs(git_provider: GitProvider, for lang in pr_languages: sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True)) + # Get the maximum number of extra lines before and after the patch + PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before + PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after + if PATCH_EXTRA_LINES_BEFORE > MAX_EXTRA_LINES: + PATCH_EXTRA_LINES_BEFORE = MAX_EXTRA_LINES + get_logger().warning(f"patch_extra_lines_before was {PATCH_EXTRA_LINES_BEFORE}, capping to {MAX_EXTRA_LINES}") + if PATCH_EXTRA_LINES_AFTER > MAX_EXTRA_LINES: + PATCH_EXTRA_LINES_AFTER = MAX_EXTRA_LINES + get_logger().warning(f"patch_extra_lines_after was {PATCH_EXTRA_LINES_AFTER}, capping to {MAX_EXTRA_LINES}") + # try first a single run with standard diff string, with patch extension, and no deletions patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff( pr_languages, token_handler, add_line_numbers_to_hunks=True, - patch_extra_lines_before=get_settings().config.patch_extra_lines_before, - patch_extra_lines_after=get_settings().config.patch_extra_lines_after) + patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE, + patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER) # if we are under the limit, return the full diff if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model): From 7de6bb015041b162bcbc3847dd25ca434c116119 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 12 Sep 2024 09:05:26 +0300 Subject: [PATCH 2/2] enhance: cap patch extra lines and update documentation with separators and context adjustments --- pr_agent/algo/pr_processing.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 7ca6f2268..02c416d0f 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -26,6 +26,13 @@ MAX_EXTRA_LINES = 10 +def cap_and_log_extra_lines(value, direction) -> int: + if value > MAX_EXTRA_LINES: + get_logger().warning(f"patch_extra_lines_{direction} was {value}, capping to {MAX_EXTRA_LINES}") + return MAX_EXTRA_LINES + return value + + def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str, add_line_numbers_to_hunks: bool = False, @@ -38,12 +45,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, else: PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after - if PATCH_EXTRA_LINES_BEFORE > MAX_EXTRA_LINES: - PATCH_EXTRA_LINES_BEFORE = MAX_EXTRA_LINES - get_logger().warning(f"patch_extra_lines_before was {PATCH_EXTRA_LINES_BEFORE}, capping to {MAX_EXTRA_LINES}") - if PATCH_EXTRA_LINES_AFTER > MAX_EXTRA_LINES: - PATCH_EXTRA_LINES_AFTER = MAX_EXTRA_LINES - get_logger().warning(f"patch_extra_lines_after was {PATCH_EXTRA_LINES_AFTER}, capping to {MAX_EXTRA_LINES}") + PATCH_EXTRA_LINES_BEFORE = cap_and_log_extra_lines(PATCH_EXTRA_LINES_BEFORE, "before") + PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, "after") try: diff_files_original = git_provider.get_diff_files() @@ -417,12 +420,8 @@ def get_pr_multi_diffs(git_provider: GitProvider, # Get the maximum number of extra lines before and after the patch PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after - if PATCH_EXTRA_LINES_BEFORE > MAX_EXTRA_LINES: - PATCH_EXTRA_LINES_BEFORE = MAX_EXTRA_LINES - get_logger().warning(f"patch_extra_lines_before was {PATCH_EXTRA_LINES_BEFORE}, capping to {MAX_EXTRA_LINES}") - if PATCH_EXTRA_LINES_AFTER > MAX_EXTRA_LINES: - PATCH_EXTRA_LINES_AFTER = MAX_EXTRA_LINES - get_logger().warning(f"patch_extra_lines_after was {PATCH_EXTRA_LINES_AFTER}, capping to {MAX_EXTRA_LINES}") + PATCH_EXTRA_LINES_BEFORE = cap_and_log_extra_lines(PATCH_EXTRA_LINES_BEFORE, "before") + PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, "after") # try first a single run with standard diff string, with patch extension, and no deletions patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(