Skip to content

Commit

Permalink
patch_extra_lines_before and patch_extra_lines_after
Browse files Browse the repository at this point in the history
  • Loading branch information
mrT23 committed Aug 10, 2024
1 parent 1a8b143 commit 61bdfd3
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 36 deletions.
3 changes: 2 additions & 1 deletion docs/docs/usage-guide/additional_configurations.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ By default, around any change in your PR, git patch provides three lines of cont
For the `review`, `describe`, `ask` and `add_docs` tools, if the token budget allows, PR-Agent tries to increase the number of lines of context, via the parameter:
```
[config]
patch_extra_lines=3
patch_extra_lines_before=6
patch_extra_lines_after=2
```

Increasing this number provides more context to the model, but will also increase the token budget.
Expand Down
27 changes: 8 additions & 19 deletions pr_agent/algo/git_patch_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,8 @@
from pr_agent.log import get_logger


def extend_patch(original_file_str, patch_str, num_lines) -> str:
"""
Extends the given patch to include a specified number of surrounding lines.
Args:
original_file_str (str): The original file to which the patch will be applied.
patch_str (str): The patch to be applied to the original file.
num_lines (int): The number of surrounding lines to include in the extended patch.
Returns:
str: The extended patch string.
"""
if not patch_str or num_lines == 0:
def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch_extra_lines_after=0) -> str:
if not patch_str or (patch_extra_lines_before == 0 and patch_extra_lines_after == 0):
return patch_str

if type(original_file_str) == bytes:
Expand All @@ -43,7 +32,7 @@ def extend_patch(original_file_str, patch_str, num_lines) -> str:
# finish previous hunk
if start1 != -1:
extended_patch_lines.extend(
original_lines[start1 + size1 - 1:start1 + size1 - 1 + num_lines])
original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after])

res = list(match.groups())
for i in range(len(res)):
Expand All @@ -55,10 +44,10 @@ def extend_patch(original_file_str, patch_str, num_lines) -> str:
start1, size1, size2 = map(int, res[:3])
start2 = 0
section_header = res[4]
extended_start1 = max(1, start1 - num_lines)
extended_size1 = size1 + (start1 - extended_start1) + num_lines
extended_start2 = max(1, start2 - num_lines)
extended_size2 = size2 + (start2 - extended_start2) + num_lines
extended_start1 = max(1, start1 - patch_extra_lines_before)
extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after
extended_start2 = max(1, start2 - patch_extra_lines_before)
extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after
extended_patch_lines.append(
f'@@ -{extended_start1},{extended_size1} '
f'+{extended_start2},{extended_size2} @@ {section_header}')
Expand All @@ -74,7 +63,7 @@ def extend_patch(original_file_str, patch_str, num_lines) -> str:
# finish previous hunk
if start1 != -1:
extended_patch_lines.extend(
original_lines[start1 + size1 - 1:start1 + size1 - 1 + num_lines])
original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after])

extended_patch_str = '\n'.join(extended_patch_lines)
return extended_patch_str
Expand Down
25 changes: 10 additions & 15 deletions pr_agent/algo/pr_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler,
large_pr_handling=False,
return_remaining_files=False):
if disable_extra_lines:
PATCH_EXTRA_LINES = 0
PATCH_EXTRA_LINES_BEFORE = 0
PATCH_EXTRA_LINES_AFTER = 0
else:
PATCH_EXTRA_LINES = get_settings().config.patch_extra_lines
PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before
PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after

try:
diff_files_original = git_provider.get_diff_files()
Expand Down Expand Up @@ -64,7 +66,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler,

# generate a standard diff string, with patch extension
patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
pr_languages, token_handler, add_line_numbers_to_hunks, patch_extra_lines=PATCH_EXTRA_LINES)
pr_languages, token_handler, add_line_numbers_to_hunks,
patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE, patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER)

# if we are under the limit, return the full diff
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):
Expand Down Expand Up @@ -174,17 +177,8 @@ def get_pr_diff_multiple_patchs(git_provider: GitProvider, token_handler: TokenH
def pr_generate_extended_diff(pr_languages: list,
token_handler: TokenHandler,
add_line_numbers_to_hunks: bool,
patch_extra_lines: int = 0) -> Tuple[list, int, list]:
"""
Generate a standard diff string with patch extension, while counting the number of tokens used and applying diff
minimization techniques if needed.
Args:
- pr_languages: A list of dictionaries representing the languages used in the pull request and their corresponding
files.
- token_handler: An object of the TokenHandler class used for handling tokens in the context of the pull request.
- add_line_numbers_to_hunks: A boolean indicating whether to add line numbers to the hunks in the diff.
"""
patch_extra_lines_before: int = 0,
patch_extra_lines_after: int = 0) -> Tuple[list, int, list]:
total_tokens = token_handler.prompt_tokens # initial tokens
patches_extended = []
patches_extended_tokens = []
Expand All @@ -196,7 +190,8 @@ def pr_generate_extended_diff(pr_languages: list,
continue

# extend each patch with extra lines of context
extended_patch = extend_patch(original_file_content_str, patch, num_lines=patch_extra_lines)
extended_patch = extend_patch(original_file_content_str, patch,
patch_extra_lines_before, patch_extra_lines_after)
if not extended_patch:
get_logger().warning(f"Failed to extend patch for file: {file.filename}")
continue
Expand Down
3 changes: 2 additions & 1 deletion pr_agent/settings/configuration.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ max_commits_tokens = 500
max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
custom_model_max_tokens=-1 # for models not in the default list
#
patch_extra_lines = 1
patch_extra_lines_before = 6
patch_extra_lines_after = 2
secret_provider=""
cli_mode=false
ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs
Expand Down

0 comments on commit 61bdfd3

Please sign in to comment.