core/commands/slash/edit.ts

import {
  filterCodeBlockLines,
  filterEnglishLinesAtEnd,
  filterEnglishLinesAtStart,
  fixCodeLlamaFirstLineIndentation,
  stopAtLines,
  streamWithNewLines,
} from "../../autocomplete/lineStream.js";
import { streamLines } from "../../diff/util.js";
import { ContextItemWithId, ILLM, SlashCommand } from "../../index.js";
import { stripImages } from "../../llm/images.js";
import {
  dedentAndGetCommonWhitespace,
  getMarkdownLanguageTagForFile,
} from "../../util/index.js";
import {
  contextItemToRangeInFileWithContents,
  type RangeInFileWithContents,
} from "../util.js";

const PROMPT = `Take the file prefix and suffix into account, but only rewrite the code_to_edit as specified in the user_request. The code you write in modified_code_to_edit will replace the code between the code_to_edit tags. Do NOT preface your answer or write anything other than code. The </modified_code_to_edit> tag should be written to indicate the end of the modified code section. Do not ever use nested tags.

Example:

<file_prefix>
class Database:
    def __init__(self):
        self._data = {{}}

    def get(self, key):
        return self._data[key]

</file_prefix>
<code_to_edit>
    def set(self, key, value):
        self._data[key] = value
</code_to_edit>
<file_suffix>

    def clear_all():
        self._data = {{}}
</file_suffix>
<user_request>
Raise an error if the key already exists.
</user_request>
<modified_code_to_edit>
    def set(self, key, value):
        if key in self._data:
            raise KeyError(f"Key {{key}} already exists")
        self._data[key] = value
</modified_code_to_edit>

Main task:
`;

export async function getPromptParts(
  rif: RangeInFileWithContents,
  fullFileContents: string,
  model: ILLM,
  input: string,
  tokenLimit: number | undefined,
) {
  const maxTokens = Math.floor(model.contextLength / 2);

  const TOKENS_TO_BE_CONSIDERED_LARGE_RANGE = tokenLimit ?? 1200;
  // if (model.countTokens(rif.contents) > TOKENS_TO_BE_CONSIDERED_LARGE_RANGE) {
  //   throw new Error(
  //     "\n\n**It looks like you've selected a large range to edit, which may take a while to complete. If you'd like to cancel, click the 'X' button above. If you highlight a more specific range, Continue will only edit within it.**"
  //   );
  // }

  const BUFFER_FOR_FUNCTIONS = 400;
  let totalTokens =
    model.countTokens(fullFileContents + PROMPT + input) +
    BUFFER_FOR_FUNCTIONS +
    maxTokens;

  const fullFileContentsList = fullFileContents.split("\n");
  const maxStartLine = rif.range.start.line;
  const minEndLine = rif.range.end.line;
  let curStartLine = 0;
  let curEndLine = fullFileContentsList.length - 1;

  if (totalTokens > model.contextLength) {
    while (curEndLine > minEndLine) {
      totalTokens -= model.countTokens(fullFileContentsList[curEndLine]);
      curEndLine--;
      if (totalTokens < model.contextLength) {
        break;
      }
    }
  }

  if (totalTokens > model.contextLength) {
    while (curStartLine < maxStartLine) {
      curStartLine++;
      totalTokens -= model.countTokens(fullFileContentsList[curStartLine]);
      if (totalTokens < model.contextLength) {
        break;
      }
    }
  }

  let filePrefix = fullFileContentsList
    .slice(curStartLine, maxStartLine)
    .join("\n");
  let fileSuffix = fullFileContentsList
    .slice(minEndLine, curEndLine - 1)
    .join("\n");

  if (rif.contents.length > 0) {
    let lines = rif.contents.split(/\r?\n/);
    let firstLine = lines[0] || null;
    while (firstLine && firstLine.trim() === "") {
      filePrefix += firstLine;
      rif.contents = rif.contents.substring(firstLine.length);
      lines = rif.contents.split(/\r?\n/);
      firstLine = lines[0] || null;
    }

    let lastLine = lines[lines.length - 1] || null;
    while (lastLine && lastLine.trim() === "") {
      fileSuffix = lastLine + fileSuffix;
      rif.contents = rif.contents.substring(
        0,
        rif.contents.length - lastLine.length,
      );
      lines = rif.contents.split(/\r?\n/);
      lastLine = lines[lines.length - 1] || null;
    }

    while (rif.contents.startsWith("\n")) {
      filePrefix += "\n";
      rif.contents = rif.contents.substring(1);
    }
    while (rif.contents.endsWith("\n")) {
      fileSuffix = `\n${fileSuffix}`;
      rif.contents = rif.contents.substring(0, rif.contents.length - 1);
    }
  }
  return { filePrefix, fileSuffix, contents: rif.contents, maxTokens };
}

function compilePrompt(
  filePrefix: string,
  contents: string,
  fileSuffix: string,
  input: string,
): string {
  if (contents.trim() === "") {
    // Separate prompt for insertion at the cursor, the other tends to cause it to repeat whole file
    return `\
<file_prefix>
${filePrefix}
</file_prefix>
<insertion_code_here>
<file_suffix>
${fileSuffix}
</file_suffix>
<user_request>
${input}
</user_request>

Please output the code to be inserted at the cursor in order to fulfill the user_request. Do NOT preface your answer or write anything other than code. You should not write any tags, just the code. Make sure to correctly indent the code:`;
  }

  let prompt = PROMPT;
  if (filePrefix.trim() !== "") {
    prompt += `
<file_prefix>
${filePrefix}
</file_prefix>`;
  }
  prompt += `
<code_to_edit>
${contents}
</code_to_edit>`;

  if (fileSuffix.trim() !== "") {
    prompt += `
<file_suffix>
${fileSuffix}
</file_suffix>`;
  }
  prompt += `
<user_request>
${input}
</user_request>
<modified_code_to_edit>
`;

  return prompt;
}

function isEndLine(line: string) {
  return (
    line.includes("</modified_code_to_edit>") ||
    line.includes("</code_to_edit>") ||
    line.includes("[/CODE]")
  );
}

function lineToBeIgnored(line: string, isFirstLine = false): boolean {
  return (
    line.includes("```") ||
    line.includes("<modified_code_to_edit>") ||
    line.includes("<file_prefix>") ||
    line.includes("</file_prefix>") ||
    line.includes("<file_suffix>") ||
    line.includes("</file_suffix>") ||
    line.includes("<user_request>") ||
    line.includes("</user_request>") ||
    line.includes("<code_to_edit>")
  );
}

const EditSlashCommand: SlashCommand = {
  name: "edit",
  description: "Edit selected code",
  run: async function* ({ ide, llm, input, history, contextItems, params }) {
    let contextItemToEdit = contextItems.find(
      (item: ContextItemWithId) =>
        item.editing && item.id.providerTitle === "code",
    );
    if (!contextItemToEdit) {
      contextItemToEdit = contextItems.find(
        (item: ContextItemWithId) => item.id.providerTitle === "code",
      );
    }

    if (!contextItemToEdit) {
      yield "Please highlight the code you want to edit, then press `cmd/ctrl+shift+L` to add it to chat";
      return;
    }

    // Strip unecessary parts of the input (the fact that you have to do this is suboptimal, should be refactored away)
    let content = history[history.length - 1].content;
    if (typeof content !== "string") {
      content.forEach((part) => {
        if (part.text?.startsWith("/edit")) {
          part.text = part.text.replace("/edit", "").trimStart();
        }
      });
    } else if (input?.startsWith("/edit")) {
      content = input.replace("/edit", "").trimStart();
    } else if (input?.startsWith("/comment")) {
      content = input.replace("/comment", "").trimStart();
    }
    let userInput = stripImages(content).replace(
      `\`\`\`${contextItemToEdit.name}\n${contextItemToEdit.content}\n\`\`\`\n`,
      "",
    );
    // if the above replace fails to find a match, the code will still be present
    // in the userInput. Replace it with input if available.
    if (userInput.includes("```") && (input !== "" || !input)) {
      userInput = input;
    }

    const rif: RangeInFileWithContents =
      contextItemToRangeInFileWithContents(contextItemToEdit);

    await ide.saveFile(rif.filepath);
    const fullFileContents = await ide.readFile(rif.filepath);

    let { filePrefix, contents, fileSuffix, maxTokens } = await getPromptParts(
      rif,
      fullFileContents,
      llm,
      userInput,
      params?.tokenLimit,
    );
    const [dedentedContents, commonWhitespace] =
      dedentAndGetCommonWhitespace(contents);
    contents = dedentedContents;

    const prompt = compilePrompt(filePrefix, contents, fileSuffix, userInput);
    const fullFileContentsLines = fullFileContents.split("\n");
    const fullPrefixLines = fullFileContentsLines.slice(
      0,
      Math.max(0, rif.range.start.line - 1),
    );
    const fullSuffixLines = fullFileContentsLines.slice(rif.range.end.line);

    let linesToDisplay: string[] = [];

    async function sendDiffUpdate(lines: string[], final = false) {
      const completion = lines.join("\n");

      // Don't do this at the very end, just show the inserted code
      if (final) {
        linesToDisplay = [];
      }

      // Only recalculate at every new-line, because this is sort of expensive
      else if (completion.endsWith("\n")) {
        const contentsLines = rif.contents.split("\n");
        let rewrittenLines = 0;
        for (const line of lines) {
          for (let i = rewrittenLines; i < contentsLines.length; i++) {
            if (
              //   difflib.SequenceMatcher(
              //     null, line, contentsLines[i]
              //   ).ratio()
              //   > 0.7
              line.trim() === contentsLines[i].trim() && // Temp replacement for difflib (TODO)
              contentsLines[i].trim() !== ""
            ) {
              rewrittenLines = i + 1;
              break;
            }
          }
        }
        linesToDisplay = contentsLines.slice(rewrittenLines);
      }

      const newFileContents = `${fullPrefixLines.join("\n")}\n${completion}\n${
        linesToDisplay.length > 0 ? `${linesToDisplay.join("\n")}\n` : ""
      }${fullSuffixLines.join("\n")}`;

      const stepIndex = history.length - 1;

      await ide.showDiff(rif.filepath, newFileContents, stepIndex);
    }

    // Important state variables
    // -------------------------
    const originalLines = rif.contents === "" ? [] : rif.contents.split("\n");
    // In the actual file, taking into account block offset
    let currentLineInFile = rif.range.start.line;
    let currentBlockLines: string[] = [];
    let originalLinesBelowPreviousBlocks = originalLines;
    // The start of the current block in file, taking into account block offset
    let currentBlockStart = -1;
    let offsetFromBlocks = 0;

    // Don't end the block until you've matched N simultaneous lines
    // This helps avoid many tiny blocks
    const LINES_TO_MATCH_BEFORE_ENDING_BLOCK = 2;
    // If a line has been matched at the end of the block, this is its index within originalLinesBelowPreviousBlocks
    // Except we are keeping track of multiple potentialities, so it's a list
    // We always check the lines following each of these leads, but if multiple make it out at the end, we use the first one
    // This is a tuple of (index_of_last_matched_line, number_of_lines_matched)
    let indicesOfLastMatchedLines: [number, number][] = [];

    async function handleGeneratedLine(line: string) {
      if (currentBlockLines.length === 0) {
        // Set this as the start of the next block
        currentBlockStart =
          rif.range.start.line +
          originalLines.length -
          originalLinesBelowPreviousBlocks.length +
          offsetFromBlocks;
        if (
          originalLinesBelowPreviousBlocks.length > 0 &&
          line === originalLinesBelowPreviousBlocks[0]
        ) {
          // Line is equal to the next line in file, move past this line
          originalLinesBelowPreviousBlocks =
            originalLinesBelowPreviousBlocks.slice(1);
          return;
        }
      }

      // In a block, and have already matched at least one line
      // Check if the next line matches, for each of the candidates
      const matchesFound: any[] = [];
      let firstValidMatch: any = null;
      for (const [
        index_of_last_matched_line,
        num_lines_matched,
      ] of indicesOfLastMatchedLines) {
        if (
          index_of_last_matched_line + 1 <
            originalLinesBelowPreviousBlocks.length &&
          line ===
            originalLinesBelowPreviousBlocks[index_of_last_matched_line + 1]
        ) {
          matchesFound.push([
            index_of_last_matched_line + 1,
            num_lines_matched + 1,
          ]);
          if (
            firstValidMatch === null &&
            num_lines_matched + 1 >= LINES_TO_MATCH_BEFORE_ENDING_BLOCK
          ) {
            firstValidMatch = [
              index_of_last_matched_line + 1,
              num_lines_matched + 1,
            ];
          }
        }
      }
      indicesOfLastMatchedLines = matchesFound;

      if (firstValidMatch !== null) {
        // We've matched the required number of lines, insert suggestion!

        // We added some lines to the block that were matched (including maybe some blank lines)
        // So here we will strip all matching lines from the end of currentBlockLines
        const linesStripped: string[] = [];
        let indexOfLastLineInBlock: number = firstValidMatch[0];
        while (
          currentBlockLines.length > 0 &&
          currentBlockLines[currentBlockLines.length - 1] ===
            originalLinesBelowPreviousBlocks[indexOfLastLineInBlock - 1]
        ) {
          linesStripped.push(currentBlockLines.pop() as string);
          indexOfLastLineInBlock -= 1;
        }

        // Reset current block / update variables
        currentLineInFile += 1;
        offsetFromBlocks += currentBlockLines.length;
        originalLinesBelowPreviousBlocks =
          originalLinesBelowPreviousBlocks.slice(indexOfLastLineInBlock + 1);
        currentBlockLines = [];
        currentBlockStart = -1;
        indicesOfLastMatchedLines = [];

        return;
      }

      // Always look for new matching candidates
      const newMatches: any[] = [];
      for (let i = 0; i < originalLinesBelowPreviousBlocks.length; i++) {
        const ogLine = originalLinesBelowPreviousBlocks[i];
        // TODO: It's a bit sus to be disqualifying empty lines.
        // What you ideally do is find ALL matches, and then throw them out as you check the following lines
        if (ogLine === line) {
          // and og_line.trim() !== "":
          newMatches.push([i, 1]);
        }
      }
      indicesOfLastMatchedLines = indicesOfLastMatchedLines.concat(newMatches);

      // Make sure they are sorted by index
      indicesOfLastMatchedLines = indicesOfLastMatchedLines.sort(
        (a, b) => a[0] - b[0],
      );

      currentBlockLines.push(line);
    }

    let messages = history;
    messages[messages.length - 1] = { role: "user", content: prompt };

    let linesOfPrefixCopied = 0;
    const lines = [];
    let unfinishedLine = "";
    let completionLinesCovered = 0;
    let repeatingFileSuffix = false;
    const lineBelowHighlightedRange = fileSuffix.trim().split("\n")[0];

    // Use custom templates defined by the model
    const template = llm.promptTemplates?.edit;
    let generator: AsyncGenerator<string>;
    if (template) {
      const rendered = llm.renderPromptTemplate(
        template,
        // typeof template === 'string' ? template : template.prompt,
        messages.slice(0, messages.length - 1),
        {
          codeToEdit: rif.contents,
          userInput,
          filePrefix: filePrefix,
          fileSuffix: fileSuffix,

          // Some built-in templates use these instead of the above
          prefix: filePrefix,
          suffix: fileSuffix,

          language: getMarkdownLanguageTagForFile(rif.filepath),
          systemMessage: llm.systemMessage ?? "",
          // "contextItems": (await sdk.getContextItemChatMessages()).map(x => x.content || "").join("\n\n"),
        },
      );
      if (typeof rendered === "string") {
        messages = [
          {
            role: "user",
            content: rendered,
          },
        ];
      } else {
        messages = rendered;
      }

      const completion = llm.streamComplete(rendered as string, {
        maxTokens: Math.min(maxTokens, Math.floor(llm.contextLength / 2), 4096),
        raw: true,
      });
      let lineStream = streamLines(completion);

      lineStream = filterEnglishLinesAtStart(lineStream);

      lineStream = filterEnglishLinesAtEnd(filterCodeBlockLines(lineStream));
      lineStream = stopAtLines(lineStream, () => {});

      generator = streamWithNewLines(
        fixCodeLlamaFirstLineIndentation(lineStream),
      );
    } else {
      async function* gen() {
        for await (const chunk of llm.streamChat(messages, {
          temperature: 0.5, // TODO
          maxTokens: Math.min(
            maxTokens,
            Math.floor(llm.contextLength / 2),
            4096,
          ),
        })) {
          yield stripImages(chunk.content);
        }
      }

      generator = gen();
    }

    for await (const chunk of generator) {
      // Stop early if it is repeating the fileSuffix or the step was deleted
      if (repeatingFileSuffix) {
        break;
      }

      // Allow stopping breakpoints
      yield undefined;

      // Accumulate lines
      const chunkLines = chunk.split("\n");
      chunkLines[0] = unfinishedLine + chunkLines[0];
      if (chunk.endsWith("\n")) {
        unfinishedLine = "";
        chunkLines.pop(); // because this will be an empty string
      } else {
        unfinishedLine = chunkLines.pop() ?? "";
      }

      // Deal with newly accumulated lines
      for (let i = 0; i < chunkLines.length; i++) {
        // Trailing whitespace doesn't matter
        chunkLines[i] = chunkLines[i].trimEnd();
        chunkLines[i] = commonWhitespace + chunkLines[i];

        // Lines that should signify the end of generation
        if (isEndLine(chunkLines[i])) {
          break;
        }
        // Lines that should be ignored, like the <> tags
        if (lineToBeIgnored(chunkLines[i], completionLinesCovered === 0)) {
          continue; // noice
        }
        // Check if we are currently just copying the prefix
        if (
          (linesOfPrefixCopied > 0 || completionLinesCovered === 0) &&
          linesOfPrefixCopied < filePrefix.split("\n").length &&
          chunkLines[i] === fullPrefixLines[linesOfPrefixCopied]
        ) {
          // This is a sketchy way of stopping it from repeating the filePrefix. Is a bug if output happens to have a matching line
          linesOfPrefixCopied += 1;
          continue; // also nice
        }
        // Because really short lines might be expected to be repeated, this is only a !heuristic!
        // Stop when it starts copying the fileSuffix
        if (
          chunkLines[i].trim() === lineBelowHighlightedRange.trim() &&
          chunkLines[i].trim().length > 4 &&
          !(
            originalLinesBelowPreviousBlocks.length > 0 &&
            chunkLines[i].trim() === originalLinesBelowPreviousBlocks[0].trim()
          )
        ) {
          repeatingFileSuffix = true;
          break;
        }

        lines.push(chunkLines[i]);
        completionLinesCovered += 1;
        currentLineInFile += 1;
      }

      await sendDiffUpdate(
        lines.concat([
          unfinishedLine?.startsWith("<")
            ? commonWhitespace
            : commonWhitespace + unfinishedLine,
        ]),
      );
    }

    // Add the unfinished line
    if (
      unfinishedLine !== "" &&
      !lineToBeIgnored(unfinishedLine, completionLinesCovered === 0) &&
      !isEndLine(unfinishedLine)
    ) {
      unfinishedLine = commonWhitespace + unfinishedLine;
      lines.push(unfinishedLine);
      await handleGeneratedLine(unfinishedLine);
      completionLinesCovered += 1;
      currentLineInFile += 1;
    }

    await sendDiffUpdate(lines, true);

    if (params?.recap) {
      const prompt = `This is the code before editing:
\`\`\`
${contents}
\`\`\`

This is the code after editing:

\`\`\`
${lines.join("\n")}
\`\`\`

Please briefly explain the changes made to the code above. Give no more than 2-3 sentences, and use markdown bullet points:`;

      for await (const update of llm.streamComplete(prompt)) {
        yield update;
      }
    }
  },
};

export default EditSlashCommand;