diff --git a/_sources/notebooks/2_inference.ipynb b/_sources/notebooks/2_inference.ipynb index 3d124de..064f71a 100644 --- a/_sources/notebooks/2_inference.ipynb +++ b/_sources/notebooks/2_inference.ipynb @@ -15,12 +15,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "initial_id", "metadata": { "collapsed": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from pathlib import Path\n", "\n", @@ -49,12 +58,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "ad4d16d13804be69", "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading (…)okenizer_config.json: 100%|██████████| 1.29k/1.29k [00:00<00:00, 169kB/s]\n", + "Downloading (…)olve/main/vocab.json: 100%|██████████| 575k/575k [00:00<00:00, 5.89MB/s]\n", + "Downloading (…)olve/main/merges.txt: 100%|██████████| 294k/294k [00:00<00:00, 1.09MB/s]\n", + "Downloading (…)in/added_tokens.json: 100%|██████████| 1.87k/1.87k [00:00<00:00, 1.13MB/s]\n", + "Downloading (…)cial_tokens_map.json: 100%|██████████| 913/913 [00:00<00:00, 515kB/s]\n" + ] + } + ], "source": [ "# download tokenizer from huggingface\n", "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/codereviewer\")\n", @@ -65,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "cb003d6d8f578da1", "metadata": { "collapsed": false @@ -99,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "d06f51b2150c61c4", "metadata": { "collapsed": false @@ -146,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "7a5b97449733bbc6", "metadata": { "collapsed": false @@ -194,12 +215,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "c508661efcdcad40", "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 478kB/s]\n", + "Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:02<00:00, 306MB/s] \n", + "Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 36.7kB/s]\n", + "100%|██████████| 636/636 [11:08<00:00, 1.05s/it]\n", + "100%|██████████| 63/63 [03:31<00:00, 3.35s/it]\n", + "100%|██████████| 63/63 [01:57<00:00, 1.87s/it]\n", + "100%|██████████| 63/63 [02:39<00:00, 2.53s/it]\n" + ] + } + ], "source": [ "# download the pretrained model from huggingface\n", "hf_model = AutoModelForSeq2SeqLM.from_pretrained(\"microsoft/codereviewer\")\n", @@ -235,12 +270,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "851255e54c49484a", "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 290kB/s]\n", + "Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:22<00:00, 39.4MB/s] \n", + "Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.bias', 'cls_head.weight']\n", + "- This IS expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 49.2kB/s]\n", + "100%|██████████| 636/636 [15:58<00:00, 1.51s/it]\n", + "100%|██████████| 63/63 [01:41<00:00, 1.61s/it]\n", + "100%|██████████| 63/63 [01:32<00:00, 1.47s/it]\n", + "100%|██████████| 63/63 [01:27<00:00, 1.39s/it]\n" + ] + } + ], "source": [ "# download the fine-tuned model\n", "ft_model = AutoModelForSeq2SeqLM.from_pretrained(\"waleko/codereviewer-finetuned-msg\")\n", diff --git a/_sources/notebooks/3_evaluation.ipynb b/_sources/notebooks/3_evaluation.ipynb index 6a8181f..42de24c 100644 --- a/_sources/notebooks/3_evaluation.ipynb +++ b/_sources/notebooks/3_evaluation.ipynb @@ -2,22 +2,31 @@ "cells": [ { "cell_type": "markdown", - "source": [ - "# Predictions Evaluation" - ], + "id": "bc60e7255799cccf", "metadata": { "collapsed": false }, - "id": "bc60e7255799cccf" + "source": [ + "# Predictions Evaluation" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "initial_id", "metadata": { "collapsed": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from pathlib import Path\n", "import numpy as np\n", @@ -28,7 +37,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, + "id": "76e1f5dc15fd5625", + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "def analyze_preds(base_file, sample_size=5):\n", @@ -48,15 +61,15 @@ " print(f'HF Pred: {hf_pred}')\n", " print(f'Fine Tuned Pred: {fine_tuned_pred}')\n", " return df" - ], - "metadata": { - "collapsed": false - }, - "id": "76e1f5dc15fd5625" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "id": "1f5e6defa2df6726", + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "def calc_bleu(df):\n", @@ -83,163 +96,539 @@ " print(f'HF BLEU: {hf_bleu}')\n", " print(f'Fine Tuned BLEU: {ft_bleu}')\n", " return hf_bleu, ft_bleu" - ], - "metadata": { - "collapsed": false - }, - "id": "1f5e6defa2df6726" + ] }, { "cell_type": "markdown", + "id": "3698532ffeff76b4", + "metadata": { + "collapsed": false + }, "source": [ "## Qualitative Evaluation\n", "We will now compare the predictions of the HF model and the fine-tuned model on samples of the four datasets.\n", "\n", "We will print the code, the prediction of the HF model and the prediction of the fine-tuned model." - ], - "metadata": { - "collapsed": false - }, - "id": "3698532ffeff76b4" + ] }, { "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "df = {}" - ], + "execution_count": 4, + "id": "642eedcac45ee834", "metadata": { "collapsed": false }, - "id": "642eedcac45ee834" + "outputs": [], + "source": [ + "df = {}" + ] }, { "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "df['msg'] = analyze_preds('../data/msg-test')" - ], + "execution_count": 5, + "id": "30dc56606146a2d6", "metadata": { "collapsed": false }, - "id": "30dc56606146a2d6" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------\n", + "@@ -20,10 +20,6 @@ from pylint import checkers, interfaces\n", + " from pylint.checkers import utils\n", + "\n", + "\n", + "-def _is_constant_empty_str(node):\n", + "- return isinstance(node, nodes.Const) and node.value == \"\"\n", + "-\n", + "-\n", + " class CompareToEmptyStringChecker(checkers.BaseChecker):\n", + " \"\"\"Checks for comparisons to empty string.\n", + " Most of the times you should use the fact that empty strings are false.\n", + "HF Pred: Why is this removed?\n", + "Fine Tuned Pred: Removing this helper function because it isn't used anywhere else and `_is_constant_empty_str` is used in other places.\n", + "-------------------\n", + "@@ -14,7 +14,7 @@ import net.sourceforge.pmd.RuleViolation;\n", + " /**\n", + " * A {@link RuleViolation} implementation that is immutable, and therefore cache friendly\n", + " */\n", + "-public final class CachedRuleViolation implements RuleViolation {\n", + "+public class CachedRuleViolation implements RuleViolation {\n", + "\n", + " private final CachedRuleMapper mapper;\n", + " \n", + "HF Pred: Why did you remove `final`\n", + "Fine Tuned Pred: Why did you remove the final modif\n", + "-------------------\n", + "@@ -225,6 +225,18 @@ class BokehPlot(DimensionedPlot):\n", + " source.data.update(converted_data)\n", + " else:\n", + " source.stream(data, stream.length)\n", + "+ return\n", + "+\n", + "+ # Determine if the CDS.data requires a full update or simply needs\n", + "+ # to be updated, this is done by determining whether newly added\n", + "+ # or not updated columns have the same length\n", + "+ new_length = [len(v) for v in data.values() if isinstance(v, (list, np.ndarray))]\n", + "+ length = [len(v) for v in source.data.values() if isinstance(v, (list, np.ndarray))]\n", + "+ not_updated = [k for k in source.data if k not in data]\n", + "+ new = [k for k in data if k not in source.data]\n", + "+ if ((not_updated and new_length and any(len(source.data[n]) != new_length[0] for n in not_updated))\n", + "+ or (new and length and any(len(data[n]) != length[0] for n in new))):\n", + "+ source.data = data\n", + " else:\n", + " source.data.update(data)\n", + " \n", + "HF Pred: I don't think this is the right way to do this. If the length o\n", + "Fine Tuned Pred: @Eric89GXL @mluessi do we need to check \n", + "-------------------\n", + "@@ -128,7 +128,7 @@ public class RestRequest {\n", + " \t *\n", + " \t * @param method\t\t\t\tthe HTTP method for the request (GET/POST/DELETE etc)\n", + " \t * @param path\t\t\t\t\tthe URI path, this will automatically be resolved against the users current instance host.\n", + "-\t * @param httpEntity\t\t\tthe request body if there is one, can be null.\n", + "+\t * @param requestEntity\t\t\tthe request body if there is one, can be null.\n", + " \t */\n", + " \tpublic RestRequest(RestMethod method, String path, HttpEntity requestEntity) {\n", + " \t\tthis(method, path, requestEntity, null);\n", + "HF Pred: Why did you change this?\n", + "Fine Tuned Pred: This change should be\n", + "-------------------\n", + "@@ -37,6 +37,12 @@ module Selenium\n", + " def bridge_class\n", + " Bridge\n", + " end\n", + "+\n", + "+ def print_page(**options)\n", + "+ options[:page_ranges] &&= Array(options[:page_ranges])\n", + "+\n", + "+ bridge.print_page(options)\n", + "+ end\n", + " end # Driver\n", + " end # Firefox\n", + " end # WebDriver\n", + "HF Pred: Extra empty line de\n", + "Fine Tuned Pred: Use 2 (not 0) spaces for indenting an expre\n" + ] + } + ], + "source": [ + "df['msg'] = analyze_preds('../data/msg-test')" + ] }, { "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "df['vscode'] = analyze_preds('../data/microsoft_vscode_1000.csv')" - ], + "execution_count": 6, + "id": "19fbf302695a4c36", "metadata": { "collapsed": false }, - "id": "19fbf302695a4c36" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------\n", + "@@ -1127,4 +1134,12 @@ registry.add('eslint-stylish', {\n", + " \tapplyTo: ApplyToKind.allDocuments,\n", + " \tfileLocation: FileLocationKind.Absolute,\n", + " \tpattern: defaultPattern('eslint-stylish')\n", + "-});\n", + "\\ No newline at end of file\n", + "+});\n", + "+\n", + "+registry.add('go', {\n", + "+\towner: 'typescript',\n", + "+\tapplyTo: ApplyToKind.allDocuments,\n", + "+\tfileLocation: FileLocationKind.Relative,\n", + "+\tfilePrefix: '${cwd}',\n", + "HF Pred: Why is this needed?\n", + "Fine Tuned Pred: I am not sure if this is a good idea. It makes me wonder if we shoul\n", + "-------------------\n", + "@@ -0,0 +1,46 @@\n", + "+\n", + "+\n", + "+\t@@NAME@@.desktop\n", + "+\tCC-BY-SA-3.0\n", + "+\tMIT\n", + "+\t@@NAME_LONG@@\n", + "+\thttps://code.visualstudio.com\n", + "+\tCode editor for developers supporting integration with existing tools\n", + "+\t\n", + "+\t\t

\n", + "+\t\t\tVisual Studio Code is a lightweight but powerful source code editor which\n", + "HF Pred: I think this should be `@@NA\n", + "Fine Tuned Pred: We don't need this file, it's only \n", + "-------------------\n", + "@@ -171,7 +171,7 @@ export function distinct(array: T[], keyFn?: (t: T) => string): T[] {\n", + " }\n", + " \n", + " export function first(array: T[], fn: (item: T) => boolean, notFoundValue: T = null): T {\n", + "-\tfor (let i = 0; i < array.length; i++) {\n", + "+\tfor (let i = 0, I = array.length; i < I; i++) {\n", + " \t\tconst element = array[i];\n", + "HF Pred: I think this should be `array.length - 1`\n", + "Fine Tuned Pred: Wouldn't it be clearer to name this `array`?\n", + "-------------------\n", + "@@ -18,7 +18,6 @@ import { IWorkspaceContextService } from 'vs/platform/workspace/common/workspace\n", + " import { IWorkbenchContribution, IWorkbenchContributionsRegistry, Extensions as WorkbenchExtensions } from 'vs/workbench/common/contributions';\n", + "HF Pred: import { IWorkbenchContrib\n", + "Fine Tuned Pred: Are these imports used somewhere?\n", + "-------------------\n", + "@@ -74,6 +74,7 @@ export interface INextEditorGroupsAccessor {\n", + " \tgetGroup(identifier: GroupIdentifier): INextEditorGroupView;\n", + " \n", + " \taddGroup(location: INextEditorGroupView | GroupIdentifier, direction: GroupDirection, options?: IAddGroupOptions): INextEditorGroup;\n", + "+ removeGroup(): void;\n", + "HF Pred: removeGroup(identifier: GroupIdentifier);\n", + "Fine Tuned Pred: isn't this a breaking change?\n" + ] + } + ], + "source": [ + "df['vscode'] = analyze_preds('../data/microsoft_vscode_1000.csv')" + ] }, { "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "df['kotlin'] = analyze_preds('../data/JetBrains_kotlin_1000.csv')" - ], + "execution_count": 7, + "id": "d6f7b55edbe73e06", "metadata": { "collapsed": false }, - "id": "d6f7b55edbe73e06" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------\n", + "@@ -178,7 +181,7 @@ abstract class AbstractDebugTest : CodegenTestCase() {\n", + " var inBoxMethod = false\n", + " vmLoop@\n", + " while (true) {\n", + "- val eventSet = virtualMachine.eventQueue().remove(1000)\n", + "+ val eventSet = virtualMachine.eventQueue().remove(1000) ?: continue\n", + "HF Pred: Why is this change needed?\n", + "Fine Tuned Pred: This is the fix for the bug described in th\n", + "-------------------\n", + "@@ -3,8 +3,8 @@ package kara.internal\n", + " /* Test somthing */\n", + " class MoreeFun {\n", + " fun wrte() {\n", + "- val childen = 12\n", + "- val come = childen\n", + "+ val children = 12\n", + "HF Pred: Why did you remove this\n", + "Fine Tuned Pred: I think this should be a warning instead of a warning.\n", + "-------------------\n", + "@@ -0,0 +1,16 @@\n", + "+// IGNORE_BACKEND: JVM\n", + "+// KOTLIN_CONFIGURATION_FLAGS: ASSERTIONS_MODE=jvm\n", + "+\n", + "+class Outer {\n", + "+ class Inner {\n", + "+ fun f() { assert(true) }\n", + "+ }\n", + "+}\n", + "+\n", + "+// We set the assertion status based on top-level classes.\n", + "+// 0 LDC LOuter\\$Inner;.class\n", + "+// Outer\\$Inner.:\n", + "+// 1 LDC LOuter;.class\\s*INVOKEVIRTUAL java/lang/Class.desiredAssertionStatus \\(\\)Z\n", + "+// 1 PUTSTATIC Outer\\$Inner.\\$assertionsDisabled : Z\n", + "+// Outer\\$Inner.f:\n", + "+// 1 GETSTATIC Outer\\$Inner.\\$assertionsDisabled\n", + "HF Pred: I don't think this is the right place to put this code\n", + "Fine Tuned Pred: Is there a reason why this doesn't go in `src/\n", + "-------------------\n", + "@@ -247,7 +245,7 @@ internal abstract class IndicesHandler(protected val context: CommonBackendConte\n", + " \n", + " internal class ArrayIndicesHandler(context: CommonBackendContext) : IndicesHandler(context) {\n", + "HF Pred: }\n", + "Fine Tuned Pred: Should be `ArrayBackend` instead of `CommonBackendContext`.\n", + "-------------------\n", + "@@ -0,0 +1,17 @@\n", + "+// !DIAGNOSTICS: -UNUSED_PARAMETER\n", + "+class C {\n", + "+ [kotlin.jvm.overloads] private fun foo(s: String = \"OK\") {\n", + "+ }\n", + "+}\n", + "+\n", + "+fun foo() {\n", + "+ class D {\n", + "+ [kotlin.jvm.overloads] fun foo(s: String = \"OK\") {\n", + "+ }\n", + "+ }\n", + "+\n", + "+ val x = object {\n", + "+ [kotlin.jvm.overloads] fun foo(s: String = \"OK\") {\n", + "+ }\n", + "+ }\n", + "+}\n", + "HF Pred: I think we can remove `` here.\n", + "Fine Tuned Pred: Shouldn't this be `-UNUSED_PARAMETER`?\n" + ] + } + ], + "source": [ + "df['kotlin'] = analyze_preds('../data/JetBrains_kotlin_1000.csv')" + ] }, { "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "df['uppy'] = analyze_preds('../data/transloadit_uppy_1000.csv')" - ], + "execution_count": 8, + "id": "b3a70e469063ec64", "metadata": { "collapsed": false }, - "id": "b3a70e469063ec64" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------\n", + "@@ -1,6 +1,6 @@\n", + " import { expectError, expectType } from 'tsd'\n", + " import DefaultStore from '@uppy/store-default'\n", + "-import Uppy, { UIPlugin } from '..'\n", + "+import Uppy, { SuccessResponse, UIPlugin, UppyFile } from '..'\n", + " import type { UploadedUppyFile, FailedUppyFile, PluginOptions } from '..'\n", + "HF Pred: Why do we need to ex\n", + "Fine Tuned Pred: What's the reason for this change?\n", + "-------------------\n", + "@@ -1076,6 +1076,7 @@ class Uppy {\n", + " const currentProgress = this.getFile(file.id).progress\n", + " this.setFileState(file.id, {\n", + " progress: Object.assign({}, currentProgress, {\n", + "+ postprocess: this.postProcessors.length > 0,\n", + "HF Pred: postprocess: this.postProcessors.length > 0,\n", + "Fine Tuned Pred: Why don't we use `getPostProcessors` here as w\n", + "-------------------\n", + "@@ -681,6 +681,8 @@ module.exports = class Tus extends Plugin {\n", + " this.uppy.on('reset-progress', this.handleResetProgress)\n", + " \n", + " if (this.opts.autoRetry) {\n", + "+ this.uppy.log('[Tus] The `autoRetry` option may be removed in Uppy 2.0. See https://github.com/transloadit/uppy/pull/2347 for alternatives.', 'warning')\n", + "HF Pred: this.log('[Tus] The `autoRetry` option may b\n", + "Fine Tuned Pred: This link is broken right now.\n", + "-------------------\n", + "@@ -142,6 +142,7 @@ module.exports = class DragDrop extends Plugin {\n", + " const restrictions = this.uppy.opts.restrictions\n", + " return (\n", + " CodeReviewer Model Inference +

+

1 Tokenizers and Datasets#

@@ -376,6 +382,15 @@

1 Tokenizers and Datasets +
Downloading (…)okenizer_config.json: 100%|██████████| 1.29k/1.29k [00:00<00:00, 169kB/s]
+Downloading (…)olve/main/vocab.json: 100%|██████████| 575k/575k [00:00<00:00, 5.89MB/s]
+Downloading (…)olve/main/merges.txt: 100%|██████████| 294k/294k [00:00<00:00, 1.09MB/s]
+Downloading (…)in/added_tokens.json: 100%|██████████| 1.87k/1.87k [00:00<00:00, 1.13MB/s]
+Downloading (…)cial_tokens_map.json: 100%|██████████| 913/913 [00:00<00:00, 515kB/s]
+
+
+

@@ -497,6 +523,20 @@

Fine-tuned CodeReviewer +
+
Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 290kB/s]
+Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:22<00:00, 39.4MB/s] 
+Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.bias', 'cls_head.weight']
+- This IS expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 49.2kB/s]
+100%|██████████| 636/636 [15:58<00:00,  1.51s/it]
+100%|██████████| 63/63 [01:41<00:00,  1.61s/it]
+100%|██████████| 63/63 [01:32<00:00,  1.47s/it]
+100%|██████████| 63/63 [01:27<00:00,  1.39s/it]
+
+
+

diff --git a/notebooks/3_evaluation.html b/notebooks/3_evaluation.html index fb261a1..443c9ff 100644 --- a/notebooks/3_evaluation.html +++ b/notebooks/3_evaluation.html @@ -342,7 +342,7 @@

Contents

Predictions Evaluation#

-
-
def analyze_preds(base_file, sample_size=5):
+
def analyze_preds(base_file, sample_size=5):
     # read files
     hf_preds_file = Path(base_file).with_suffix('.hf_pred.csv')
     fine_tuned_file = Path(base_file).with_suffix('.finetuned_pred.csv')
@@ -376,7 +382,7 @@ 

Predictions Evaluation
-
def calc_bleu(df):
+
def calc_bleu(df):
     refs = list(df['target'])
     preds = list(df['prediction'])
     for i in range(len(preds)):
@@ -410,73 +416,381 @@ 

Qualitative EvaluationWe will print the code, the prediction of the HF model and the prediction of the fine-tuned model.

-
df = {}
+
df = {}
 
-
df['msg'] = analyze_preds('../data/msg-test')
+
df['msg'] = analyze_preds('../data/msg-test')
+
+
+
+
+
-------------------
+@@ -20,10 +20,6 @@ from pylint import checkers, interfaces
+ from pylint.checkers import utils
+
+
+-def _is_constant_empty_str(node):
+-    return isinstance(node, nodes.Const) and node.value == ""
+-
+-
+ class CompareToEmptyStringChecker(checkers.BaseChecker):
+     """Checks for comparisons to empty string.
+     Most of the times you should use the fact that empty strings are false.
+HF Pred: Why is this removed?
+Fine Tuned Pred: Removing this helper function because it isn't used anywhere else and `_is_constant_empty_str` is used in other places.
+-------------------
+@@ -14,7 +14,7 @@ import net.sourceforge.pmd.RuleViolation;
+ /**
+  * A {@link RuleViolation} implementation that is immutable, and therefore cache friendly
+  */
+-public final class CachedRuleViolation implements RuleViolation {
++public class CachedRuleViolation implements RuleViolation {
+
+     private final CachedRuleMapper mapper;
+ 
+HF Pred: Why did you remove `final`
+Fine Tuned Pred: Why did you remove the final modif
+-------------------
+@@ -225,6 +225,18 @@ class BokehPlot(DimensionedPlot):
+                     source.data.update(converted_data)
+                 else:
+                     source.stream(data, stream.length)
++            return
++
++        # Determine if the CDS.data requires a full update or simply needs
++        # to be updated, this is done by determining whether newly added
++        # or not updated columns have the same length
++        new_length = [len(v) for v in data.values() if isinstance(v, (list, np.ndarray))]
++        length = [len(v) for v in source.data.values() if isinstance(v, (list, np.ndarray))]
++        not_updated = [k for k in source.data if k not in data]
++        new = [k for k in data if k not in source.data]
++        if ((not_updated and new_length and any(len(source.data[n]) != new_length[0] for n in not_updated))
++            or (new and length and any(len(data[n]) != length[0] for n in new))):
++            source.data = data
+         else:
+             source.data.update(data)
+ 
+HF Pred: I don't think this is the right way to do this. If the length o
+Fine Tuned Pred: @Eric89GXL @mluessi do we need to check 
+-------------------
+@@ -128,7 +128,7 @@ public class RestRequest {
+ 	 *
+ 	 * @param method				the HTTP method for the request (GET/POST/DELETE etc)
+ 	 * @param path					the URI path, this will automatically be resolved against the users current instance host.
+-	 * @param httpEntity			the request body if there is one, can be null.
++	 * @param requestEntity			the request body if there is one, can be null.
+ 	 */
+ 	public RestRequest(RestMethod method, String path, HttpEntity requestEntity) {
+ 		this(method, path, requestEntity, null);
+HF Pred: Why did you change this?
+Fine Tuned Pred: This change should be
+-------------------
+@@ -37,6 +37,12 @@ module Selenium
+         def bridge_class
+           Bridge
+         end
++
++        def print_page(**options)
++          options[:page_ranges] &&= Array(options[:page_ranges])
++
++          bridge.print_page(options)
++        end
+       end # Driver
+     end # Firefox
+   end # WebDriver
+HF Pred: Extra empty line de
+Fine Tuned Pred: Use 2 (not 0) spaces for indenting an expre
 
-
df['vscode'] = analyze_preds('../data/microsoft_vscode_1000.csv')
+
df['vscode'] = analyze_preds('../data/microsoft_vscode_1000.csv')
+
+
+
+
+
-------------------
+@@ -1127,4 +1134,12 @@ registry.add('eslint-stylish', {
+ 	applyTo: ApplyToKind.allDocuments,
+ 	fileLocation: FileLocationKind.Absolute,
+ 	pattern: defaultPattern('eslint-stylish')
+-});
+\ No newline at end of file
++});
++
++registry.add('go', {
++	owner: 'typescript',
++	applyTo: ApplyToKind.allDocuments,
++	fileLocation: FileLocationKind.Relative,
++	filePrefix: '${cwd}',
+HF Pred: Why is this needed?
+Fine Tuned Pred: I am not sure if this is a good idea. It makes me wonder if we shoul
+-------------------
+@@ -0,0 +1,46 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<component type="desktop">
++	<id>@@NAME@@.desktop</id>
++	<metadata_license>CC-BY-SA-3.0</metadata_license>
++	<project_license>MIT</project_license>
++	<name>@@NAME_LONG@@</name>
++	<url type="homepage">https://code.visualstudio.com</url>
++	<summary>Code editor for developers supporting integration with existing tools</summary>
++	<description>
++		<p>
++			Visual Studio Code is a lightweight but powerful source code editor which
+HF Pred: I think this should be `@@NA
+Fine Tuned Pred: We don't need this file, it's only 
+-------------------
+@@ -171,7 +171,7 @@ export function distinct<T>(array: T[], keyFn?: (t: T) => string): T[] {
+ }
+ 
+ export function first<T>(array: T[], fn: (item: T) => boolean, notFoundValue: T = null): T {
+-	for (let i = 0; i < array.length; i++) {
++	for (let i = 0, I = array.length; i < I; i++) {
+ 		const element = array[i];
+HF Pred: I think this should be `array.length - 1`
+Fine Tuned Pred: Wouldn't it be clearer to name this `array`?
+-------------------
+@@ -18,7 +18,6 @@ import { IWorkspaceContextService } from 'vs/platform/workspace/common/workspace
+ import { IWorkbenchContribution, IWorkbenchContributionsRegistry, Extensions as WorkbenchExtensions } from 'vs/workbench/common/contributions';
+HF Pred: import { IWorkbenchContrib
+Fine Tuned Pred: Are these imports used somewhere?
+-------------------
+@@ -74,6 +74,7 @@ export interface INextEditorGroupsAccessor {
+ 	getGroup(identifier: GroupIdentifier): INextEditorGroupView;
+ 
+ 	addGroup(location: INextEditorGroupView | GroupIdentifier, direction: GroupDirection, options?: IAddGroupOptions): INextEditorGroup;
++  removeGroup(): void;
+HF Pred: removeGroup(identifier: GroupIdentifier);
+Fine Tuned Pred: isn't this a breaking change?
 
-
df['kotlin'] = analyze_preds('../data/JetBrains_kotlin_1000.csv')
+
df['kotlin'] = analyze_preds('../data/JetBrains_kotlin_1000.csv')
+
+
+
+
+
-------------------
+@@ -178,7 +181,7 @@ abstract class AbstractDebugTest : CodegenTestCase() {
+         var inBoxMethod = false
+         vmLoop@
+         while (true) {
+-            val eventSet = virtualMachine.eventQueue().remove(1000)
++            val eventSet = virtualMachine.eventQueue().remove(1000) ?: continue
+HF Pred: Why is this change needed?
+Fine Tuned Pred: This is the fix for the bug described in th
+-------------------
+@@ -3,8 +3,8 @@ package kara.internal
+ /* Test <TYPO descr="Typo: In word 'somthing'">somthing</TYPO> */
+ class <TYPO descr="Typo: In word 'Moree'">Moree</TYPO>Fun {
+   fun <TYPO descr="Typo: In word 'wrte'">wrte</TYPO>() {
+-    val <TYPO descr="Typo: In word 'childen'">childen</TYPO> = 12
+-    val <warning descr="[UNUSED_VARIABLE] Variable 'come' is never used">come</warning> = childen
++    val <TYPO descr="Typo: In word 'children'">children</TYPO> = 12
+HF Pred: Why did you remove this
+Fine Tuned Pred: I think this should be a warning instead of a warning.
+-------------------
+@@ -0,0 +1,16 @@
++// IGNORE_BACKEND: JVM
++// KOTLIN_CONFIGURATION_FLAGS: ASSERTIONS_MODE=jvm
++
++class Outer {
++    class Inner {
++        fun f() { assert(true) }
++    }
++}
++
++// We set the assertion status based on top-level classes.
++// 0 LDC LOuter\$Inner;.class
++// Outer\$Inner.<clinit>:
++// 1 LDC LOuter;.class\s*INVOKEVIRTUAL java/lang/Class.desiredAssertionStatus \(\)Z
++// 1 PUTSTATIC Outer\$Inner.\$assertionsDisabled : Z
++// Outer\$Inner.f:
++// 1 GETSTATIC Outer\$Inner.\$assertionsDisabled
+HF Pred: I don't think this is the right place to put this code
+Fine Tuned Pred: Is there a reason why this doesn't go in `src/
+-------------------
+@@ -247,7 +245,7 @@ internal abstract class IndicesHandler(protected val context: CommonBackendConte
+ 
+ internal class ArrayIndicesHandler(context: CommonBackendContext) : IndicesHandler(context) {
+HF Pred: }
+Fine Tuned Pred: Should be `ArrayBackend` instead of `CommonBackendContext`.
+-------------------
+@@ -0,0 +1,17 @@
++// !DIAGNOSTICS: -UNUSED_PARAMETER
++class C {
++    <!OVERLOADS_PRIVATE!>[kotlin.jvm.overloads] private fun foo(s: String = "OK")<!> {
++    }
++}
++
++fun foo() {
++    class D {
++        <!OVERLOADS_PRIVATE!>[kotlin.jvm.overloads] fun foo(s: String = "OK")<!> {
++        }
++    }
++
++    val <!UNUSED_VARIABLE!>x<!> = object {
++        <!OVERLOADS_PRIVATE!>[kotlin.jvm.overloads] fun foo(s: String = "OK")<!> {
++    }
++    }
++}
+HF Pred: I think we can remove `<!UNUSED_VARIABLE!>` here.
+Fine Tuned Pred: Shouldn't this be `-UNUSED_PARAMETER`?
 
-
df['uppy'] = analyze_preds('../data/transloadit_uppy_1000.csv')
+
df['uppy'] = analyze_preds('../data/transloadit_uppy_1000.csv')
+
+
+
+
+
-------------------
+@@ -1,6 +1,6 @@
+ import { expectError, expectType } from 'tsd'
+ import DefaultStore from '@uppy/store-default'
+-import Uppy, { UIPlugin } from '..'
++import Uppy, { SuccessResponse, UIPlugin, UppyFile } from '..'
+ import type { UploadedUppyFile, FailedUppyFile, PluginOptions } from '..'
+HF Pred: Why do we need to ex
+Fine Tuned Pred: What's the reason for this change?
+-------------------
+@@ -1076,6 +1076,7 @@ class Uppy {
+       const currentProgress = this.getFile(file.id).progress
+       this.setFileState(file.id, {
+         progress: Object.assign({}, currentProgress, {
++          postprocess: this.postProcessors.length > 0,
+HF Pred: postprocess: this.postProcessors.length > 0,
+Fine Tuned Pred: Why don't we use `getPostProcessors` here as w
+-------------------
+@@ -681,6 +681,8 @@ module.exports = class Tus extends Plugin {
+     this.uppy.on('reset-progress', this.handleResetProgress)
+ 
+     if (this.opts.autoRetry) {
++      this.uppy.log('[Tus] The `autoRetry` option may be removed in Uppy 2.0. See https://github.com/transloadit/uppy/pull/2347 for alternatives.', 'warning')
+HF Pred: this.log('[Tus] The `autoRetry` option may b
+Fine Tuned Pred: This link is broken right now.
+-------------------
+@@ -142,6 +142,7 @@ module.exports = class DragDrop extends Plugin {
+     const restrictions = this.uppy.opts.restrictions
+     return (
+       <input
++        id={'uppy-input-' + this.id}
+HF Pred: const input
+Fine Tuned Pred: Does it matter that this is prefixed with `uppy-`?
+-------------------
+@@ -41,7 +41,7 @@ ar_SA.strings = {
+   encoding: 'التشفير...',
+   enterCorrectUrl: 'خطأ في الرابط: ارجو التأكد من ادخال رابط مباشر للملف',
+   enterUrlToImport: 'ادخل الرابط لاستيراد الملفات',
+-  exceedsSize: 'الملف اكبر من الحجم المسموح',
++  exceedsSize: 'الملف اكبر من الحجم المسموح %{size}',
+HF Pred: e
+Fine Tuned Pred: Can you provide the reason for this change?
 

As we can see, the fine-tuned model produces better predictions than the HF model. The predictions are much more insightful and detailed. The HF model tends to produce more generic predictions, while the fine-tuned model produces predictions that are more specific to the code.

+

Only one thing is weird: the codereviewer model predictions are sometimes cut and not shown in full. This is not due to sentence limits, as the limit is pretty high (512 tokens). Unfortunately, it is up to the future work to figure out the reason behind this.

Quantitative Evaluation#

For each dataset, we calculate the BLEU-4 score for the predictions of the HF model and the fine-tuned model. The BLEU score is a measure of how similar the predictions are to the target. The higher the score, the better the predictions.

-
calc_bleu_score('../data/msg-test')
+
calc_bleu_score('../data/msg-test')
+
+
+
+
+
Total: 10169
+Total: 10169
+
+
+
HF BLEU: 4.25
+Fine Tuned BLEU: 4.71
+
+
+
(4.25, 4.71)
 
-
calc_bleu_score('../data/microsoft_vscode_1000.csv')
+
calc_bleu_score('../data/microsoft_vscode_1000.csv')
+
+
+
+
+
Total: 1000
+Total: 1000
+
+
+
HF BLEU: 2.39
+Fine Tuned BLEU: 3.41
+
+
+
(2.39, 3.41)
 
-
calc_bleu_score('../data/JetBrains_kotlin_1000.csv')
+
calc_bleu_score('../data/JetBrains_kotlin_1000.csv')
+
+
+
+
+
Total: 1000
+Total: 1000
+
+
+
HF BLEU: 2.71
+Fine Tuned BLEU: 3.91
+
+
+
(2.71, 3.91)
 
-
calc_bleu_score('../data/transloadit_uppy_1000.csv')
+
calc_bleu_score('../data/transloadit_uppy_1000.csv')
+
+
+
+
+
Total: 1000
+Total: 1000
+
+
+
HF BLEU: 3.25
+Fine Tuned BLEU: 3.84
+
+
+
(3.25, 3.84)
 
-

As we can see, the fine-tuned model performs better than the HF model on all datasets. The semantic value of the predictions is also better, as we can see in the qualitative evaluation.

+

As we can see, the fine-tuned model performs better than the HF model on all datasets. Nevertheless, the score is still pretty low. This means as authors put it “it is a hard task”.

+

The semantic value of the predictions is also better, as we can see in the qualitative evaluation.

diff --git a/searchindex.js b/searchindex.js index cfa14a1..11a4099 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["README", "docs/conclusion", "docs/intro", "notebooks/1_collect_reviews", "notebooks/2_inference", "notebooks/3_evaluation"], "filenames": ["README.md", "docs/conclusion.md", "docs/intro.md", "notebooks/1_collect_reviews.ipynb", "notebooks/2_inference.ipynb", "notebooks/3_evaluation.ipynb"], "titles": ["CodeReviewer ML Performance", "<no title>", "CodeReviewer ML Performance", "Collecting Code Review Data", "CodeReviewer Model Inference", "Predictions Evaluation"], "terms": {"thi": [2, 3], "small": 2, "sampl": [2, 5], "book": 2, "give": 2, "you": [2, 3], "feel": 2, "how": [2, 5], "structur": 2, "It": 2, "show": 2, "off": 2, "few": 2, "major": 2, "file": [2, 3, 5], "type": 2, "well": 2, "some": 2, "doe": 2, "go": 2, "depth": 2, "ani": 2, "particular": 2, "topic": 2, "check": 2, "out": 2, "jupyt": [2, 3], "document": 2, "more": [2, 5], "inform": 2, "page": 2, "bundl": 2, "see": [2, 5], "collect": 2, "code": [2, 4, 5], "review": [2, 4], "data": [2, 5], "model": [2, 5], "infer": 2, "predict": 2, "evalu": 2, "p4v": [2, 4], "codebert": 2, "hug": 2, "face": 2, "space": 2, "p4vv37": 2, "url": 2, "http": [2, 4], "huggingfac": 2, "co": [2, 4], "codebert_codereview": 2, "visit": 2, "2023": 2, "09": 2, "13": 2, "llg": [2, 3, 4], "22": [2, 3, 4], "zhiyu": 2, "li": 2, "shuai": 2, "lu": 2, "daya": 2, "guo": 2, "nan": [2, 5], "duan": 2, "shailesh": 2, "jannu": 2, "grant": 2, "jenk": 2, "deep": 2, "majumd": 2, "jare": 2, "green": 2, "alexei": 2, "svyatkovskii": 2, "shengyu": 2, "fu": 2, "other": [2, 3], "pre": 2, "train": 2, "autom": 2, "activ": 2, "arxiv": 2, "preprint": 2, "2203": 2, "09095": 2, "2022": 2, "In": 3, "notebook": 3, "we": [3, 4, 5], "from": [3, 4, 5], "github": 3, "us": [3, 4], "pygithub": 3, "librari": 3, "interact": 3, "api": 3, "getpass": 3, "import": [3, 4, 5], "auth": 3, "panda": [3, 4, 5], "pd": [3, 4, 5], "tqdm": [3, 4], "autonotebook": [3, 4], "c": [3, 5], "user": 3, "akovr": 3, "appdata": 3, "local": 3, "temp": 3, "ipykernel_15472": 3, "323726258": 3, "py": 3, "5": [3, 4, 5], "tqdmexperimentalwarn": 3, "mode": 3, "instead": 3, "forc": 3, "consol": 3, "e": 3, "g": 3, "although": 3, "can": [3, 4, 5], "without": 3, "authent": 3, "need": 3, "increas": 3, "rate": 3, "limit": 3, "access": 3, "token": 3, "enter": 3, "below": 3, "If": 3, "do": 3, "run": 3, "60": 3, "request": 3, "per": 3, "hour": [3, 4], "your": 3, "els": 3, "warn": 3, "possibl": 3, "next": 3, "defin": 3, "function": 3, "repositori": 3, "def": [3, 4, 5], "collect_review": 3, "repo_nam": 3, "str": 3, "num_com": 3, "int": 3, "1000": 3, "skip_author": 3, "true": [3, 4, 5], "allow_thread": 3, "fals": [3, 4, 5], "save": 3, "max_length": [3, 4], "512": [3, 4], "crawl": 3, "repo": 3, "param": 3, "name": 3, "format": 3, "owner": 3, "number": 3, "comment": 3, "load": 3, "skip": 3, "made": [3, 4], "author": [3, 4], "pull": 3, "allow": 3, "ar": [3, 5], "repli": 3, "csv": [3, 4, 5], "maximum": 3, "length": 3, "diff": 3, "hunk": 3, "return": [3, 4, 5], "datafram": [3, 4, 5], "column": 3, "diff_hunk": [3, 4], "human_review": [3, 4], "created_at": 3, "count": 3, "set": 3, "get_repo": 3, "comment_pag": 3, "get_pulls_review_com": 3, "iter": 3, "over": 3, "progress_bar": 3, "total": 3, "len": [3, 4, 5], "have": [3, 4], "enough": 3, "stop": 3, "break": 3, "alreadi": 3, "continu": 3, "too": 3, "long": [3, 4], "get": 3, "commit": 3, "commit_author": 3, "get_git_commit": 3, "commit_id": 3, "add": [3, 4], "along": 3, "ground": 3, "truth": 3, "append": [3, 4], "bodi": 3, "updat": 3, "1": 3, "df": [3, 4, 5], "remov": 3, "keep": 3, "first": 3, "loc": 3, "groupbi": 3, "idxmin": 3, "to_csv": [3, 4], "f": [3, 5], "replac": [3, 5], "_": [3, 5], "final": 3, "follow": [3, 4], "microsoft": [3, 4], "vscode": [3, 5], "jetbrain": 3, "kotlin": [3, 5], "transloadit": 3, "uppi": [3, 5], "i": [3, 4, 5], "chosen": 3, "becaus": 3, "thei": 3, "popular": 3, "larg": 3, "The": [3, 4, 5], "also": [3, 5], "similar": [3, 5], "criteria": 3, "select": 3, "studi": 3, "folder": 3, "addition": 3, "test": [3, 4, 5], "dataset": [3, 5], "zenodo": 3, "avail": [3, 4], "msg": [3, 4, 5], "let": 4, "s": 4, "gener": [4, 5], "pathlib": [4, 5], "path": [4, 5], "numpi": [4, 5], "np": [4, 5], "torch": 4, "util": [4, 5], "dataload": 4, "transform": 4, "autotoken": 4, "automodelforseq2seqlm": 4, "p": 4, "enorm": 4, "thank": 4, "provid": 4, "open": 4, "sourc": 4, "work": 4, "download": 4, "from_pretrain": 4, "requir": 4, "special": 4, "process_token": 4, "class": 4, "reviewsdataset": 4, "__init__": 4, "self": 4, "y": 4, "x": 4, "tensor": 4, "appli": 4, "lambda": 4, "row": 4, "encode_diff": 4, "axi": 4, "dtype": 4, "cpu": 4, "__len__": 4, "__getitem__": 4, "idx": 4, "here": 4, "creat": 4, "each": [4, 5], "project": 4, "filenam": 4, "jetbrains_kotlin_1000": [4, 5], "microsoft_vscode_1000": [4, 5], "transloadit_uppy_1000": [4, 5], "read_csv": [4, 5], "batch_siz": 4, "16": 4, "shuffl": 4, "6": 4, "8gb": 4, "gpu": 4, "now": [4, 5], "two": 4, "devic": 4, "cuda": 4, "eval": 4, "result": 4, "inputs_mask": 4, "ne": 4, "pad_id": 4, "pred": [4, 5], "attention_mask": 4, "use_cach": 4, "num_beam": 4, "early_stop": 4, "num_return_sequ": 4, "decod": 4, "preds_np": 4, "detach": 4, "preds_decod": 4, "apply_along_axi": 4, "skip_special_token": 4, "clean_up_tokenization_spac": 4, "list": [4, 5], "hub": 4, "pretrain": 4, "hf_model": 4, "zip": 4, "df_pred": 4, "target": [4, 5], "with_suffix": [4, 5], "hf_pred": [4, 5], "head": 4, "task": 4, "instruct": 4, "For": [4, 5], "paramet": 4, "learning_r": 4, "3e": 4, "4": [4, 5], "max_source_length": 4, "took": 4, "about": 4, "12": 4, "singl": 4, "nvidia": 4, "geforc": 4, "a100": 4, "wa": 4, "epoch": 4, "waleko": 4, "finetun": 4, "ft_model": 4, "finetuned_pr": [4, 5], "smooth_bleu": 5, "bleu_fromstr": 5, "analyze_pr": 5, "base_fil": 5, "sample_s": 5, "read": 5, "hf_preds_fil": 5, "fine_tuned_fil": 5, "fine_tun": 5, "put": 5, "fine_tuned_pr": 5, "regex": 5, "print": 5, "to_numpi": 5, "hf": 5, "fine": 5, "tune": 5, "calc_bleu": 5, "ref": 5, "rang": 5, "char": 5, "join": 5, "split": 5, "rmstop": 5, "calc_bleu_scor": 5, "ft_pred": 5, "inplac": 5, "hf_bleu": 5, "ft_bleu": 5, "bleu": 5, "compar": 5, "four": 5, "As": 5, "produc": 5, "better": 5, "than": 5, "much": 5, "insight": 5, "detail": 5, "tend": 5, "while": 5, "specif": 5, "calcul": 5, "score": 5, "measur": 5, "higher": 5, "perform": 5, "all": 5, "semant": 5, "valu": 5}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"codereview": [0, 2, 4], "ml": [0, 2], "perform": [0, 2], "tabl": 2, "content": 2, "bibliographi": 2, "collect": 3, "code": 3, "review": 3, "data": [3, 4], "model": 4, "infer": 4, "1": 4, "token": 4, "dataset": 4, "2": 4, "load": 4, "3": 4, "predict": [4, 5], "function": 4, "huggingfac": 4, "pre": 4, "train": 4, "checkpoint": 4, "fine": 4, "tune": 4, "evalu": 5, "qualit": 5, "quantit": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["README", "docs/conclusion", "docs/intro", "notebooks/1_collect_reviews", "notebooks/2_inference", "notebooks/3_evaluation"], "filenames": ["README.md", "docs/conclusion.md", "docs/intro.md", "notebooks/1_collect_reviews.ipynb", "notebooks/2_inference.ipynb", "notebooks/3_evaluation.ipynb"], "titles": ["CodeReviewer ML Performance", "<no title>", "CodeReviewer ML Performance", "Collecting Code Review Data", "CodeReviewer Model Inference", "Predictions Evaluation"], "terms": {"thi": [2, 3, 4, 5], "small": 2, "sampl": [2, 5], "book": 2, "give": 2, "you": [2, 3, 4, 5], "feel": 2, "how": [2, 5], "structur": 2, "It": [2, 5], "show": 2, "off": 2, "few": 2, "major": 2, "file": [2, 3, 5], "type": [2, 5], "well": 2, "some": [2, 4], "doe": [2, 5], "go": [2, 5], "depth": 2, "ani": [2, 5], "particular": 2, "topic": 2, "check": [2, 5], "out": [2, 5], "jupyt": [2, 3, 4, 5], "document": 2, "more": [2, 5], "inform": 2, "page": 2, "bundl": 2, "see": [2, 4, 5], "collect": 2, "code": [2, 4, 5], "review": [2, 4], "data": [2, 5], "model": [2, 5], "infer": 2, "predict": 2, "evalu": 2, "p4v": [2, 4], "codebert": 2, "hug": 2, "face": 2, "space": [2, 5], "p4vv37": 2, "url": [2, 5], "http": [2, 4, 5], "huggingfac": 2, "co": [2, 4], "codebert_codereview": 2, "visit": 2, "2023": 2, "09": 2, "13": 2, "llg": [2, 3, 4], "22": [2, 3, 4, 5], "zhiyu": 2, "li": 2, "shuai": 2, "lu": 2, "daya": 2, "guo": 2, "nan": [2, 5], "duan": 2, "shailesh": 2, "jannu": 2, "grant": 2, "jenk": 2, "deep": 2, "majumd": 2, "jare": 2, "green": 2, "alexei": 2, "svyatkovskii": 2, "shengyu": 2, "fu": 2, "other": [2, 3, 5], "pre": 2, "train": 2, "autom": 2, "activ": 2, "arxiv": 2, "preprint": 2, "2203": 2, "09095": 2, "2022": 2, "In": [3, 5], "notebook": 3, "we": [3, 4, 5], "from": [3, 4, 5], "github": [3, 5], "us": [3, 4, 5], "pygithub": 3, "librari": 3, "interact": 3, "api": 3, "getpass": 3, "import": [3, 4, 5], "auth": 3, "panda": [3, 4, 5], "pd": [3, 4, 5], "tqdm": [3, 4, 5], "autonotebook": [3, 4, 5], "c": [3, 5], "user": [3, 5], "akovr": 3, "appdata": 3, "local": 3, "temp": 3, "ipykernel_15472": 3, "323726258": 3, "py": [3, 4, 5], "5": [3, 4, 5], "tqdmexperimentalwarn": 3, "mode": 3, "instead": [3, 5], "forc": 3, "consol": 3, "e": [3, 4, 5], "g": [3, 4], "although": 3, "can": [3, 4, 5], "without": 3, "authent": 3, "need": [3, 5], "increas": 3, "rate": 3, "limit": [3, 5], "access": 3, "token": [3, 5], "enter": 3, "below": 3, "If": [3, 5], "do": [3, 5], "run": 3, "60": 3, "request": [3, 5], "per": 3, "hour": [3, 4], "your": 3, "els": [3, 5], "warn": [3, 5], "possibl": 3, "next": 3, "defin": 3, "function": [3, 5], "repositori": 3, "def": [3, 4, 5], "collect_review": 3, "repo_nam": 3, "str": 3, "num_com": 3, "int": 3, "1000": [3, 5], "skip_author": 3, "true": [3, 4, 5], "allow_thread": 3, "fals": [3, 4, 5], "save": 3, "max_length": [3, 4], "512": [3, 4, 5], "crawl": 3, "repo": 3, "param": [3, 5], "name": [3, 5], "format": 3, "owner": [3, 5], "number": 3, "comment": 3, "load": 3, "skip": 3, "made": [3, 4], "author": [3, 4, 5], "pull": [3, 5], "allow": 3, "ar": [3, 4, 5], "repli": 3, "csv": [3, 4, 5], "maximum": 3, "length": [3, 5], "diff": 3, "hunk": 3, "return": [3, 4, 5], "datafram": [3, 4, 5], "column": [3, 5], "diff_hunk": [3, 4], "human_review": [3, 4], "created_at": 3, "count": 3, "set": [3, 5], "get_repo": 3, "comment_pag": 3, "get_pulls_review_com": 3, "iter": 3, "over": 3, "progress_bar": 3, "total": [3, 5], "len": [3, 4, 5], "have": [3, 4, 5], "enough": 3, "stop": 3, "break": [3, 5], "alreadi": 3, "continu": [3, 5], "too": 3, "long": [3, 4], "get": [3, 5], "commit": 3, "commit_author": 3, "get_git_commit": 3, "commit_id": 3, "add": [3, 4, 5], "along": 3, "ground": 3, "truth": 3, "append": [3, 4], "bodi": [3, 5], "updat": [3, 4, 5], "1": [3, 5], "df": [3, 4, 5], "remov": [3, 5], "keep": 3, "first": [3, 5], "loc": 3, "groupbi": 3, "idxmin": 3, "to_csv": [3, 4], "f": [3, 5], "replac": [3, 5], "_": [3, 5], "final": [3, 5], "follow": [3, 4], "microsoft": [3, 4], "vscode": [3, 5], "jetbrain": 3, "kotlin": [3, 5], "transloadit": [3, 5], "uppi": [3, 5], "i": [3, 4, 5], "chosen": 3, "becaus": [3, 5], "thei": 3, "popular": 3, "larg": 3, "The": [3, 4, 5], "also": [3, 5], "similar": [3, 5], "criteria": 3, "select": 3, "studi": 3, "folder": 3, "addition": 3, "test": [3, 4, 5], "dataset": [3, 5], "zenodo": 3, "avail": [3, 4], "msg": [3, 4, 5], "let": [4, 5], "s": [4, 5], "gener": [4, 5], "pathlib": [4, 5], "path": [4, 5], "numpi": [4, 5], "np": [4, 5], "torch": 4, "util": [4, 5], "dataload": 4, "transform": 4, "autotoken": 4, "automodelforseq2seqlm": 4, "opt": [4, 5], "conda": [4, 5], "lib": [4, 5], "python3": [4, 5], "8": [4, 5], "site": [4, 5], "packag": [4, 5], "auto": [4, 5], "tqdmwarn": [4, 5], "iprogress": [4, 5], "found": [4, 5], "pleas": [4, 5], "ipywidget": [4, 5], "readthedoc": [4, 5], "io": [4, 5], "en": [4, 5], "stabl": [4, 5], "user_instal": [4, 5], "html": [4, 5], "notebook_tqdm": [4, 5], "p": [4, 5], "enorm": 4, "thank": 4, "provid": [4, 5], "open": 4, "sourc": [4, 5], "work": [4, 5], "download": 4, "from_pretrain": 4, "requir": [4, 5], "special": 4, "process_token": 4, "okenizer_config": 4, "json": 4, "100": 4, "29k": 4, "00": 4, "169kb": 4, "olv": 4, "main": 4, "vocab": 4, "575k": 4, "89mb": 4, "merg": 4, "txt": 4, "294k": 4, "09mb": 4, "added_token": 4, "87k": 4, "13mb": 4, "cial_tokens_map": 4, "913": 4, "515kb": 4, "class": [4, 5], "reviewsdataset": 4, "__init__": 4, "self": 4, "y": 4, "x": [4, 5], "tensor": 4, "appli": 4, "lambda": 4, "row": 4, "encode_diff": 4, "axi": 4, "dtype": 4, "cpu": 4, "__len__": 4, "__getitem__": 4, "idx": 4, "here": [4, 5], "creat": 4, "each": [4, 5], "project": 4, "filenam": 4, "jetbrains_kotlin_1000": [4, 5], "microsoft_vscode_1000": [4, 5], "transloadit_uppy_1000": [4, 5], "read_csv": [4, 5], "batch_siz": 4, "16": [4, 5], "shuffl": 4, "6": [4, 5], "8gb": 4, "gpu": 4, "now": [4, 5], "two": 4, "devic": 4, "cuda": 4, "eval": 4, "result": 4, "inputs_mask": 4, "ne": 4, "pad_id": 4, "pred": [4, 5], "attention_mask": 4, "use_cach": 4, "num_beam": 4, "early_stop": 4, "num_return_sequ": 4, "decod": 4, "preds_np": 4, "detach": 4, "preds_decod": 4, "apply_along_axi": 4, "skip_special_token": 4, "clean_up_tokenization_spac": 4, "list": [4, 5], "hub": 4, "pretrain": 4, "hf_model": 4, "zip": 4, "df_pred": 4, "target": [4, 5], "with_suffix": [4, 5], "hf_pred": [4, 5], "head": 4, "lve": 4, "config": 4, "13k": 4, "478kb": 4, "pytorch_model": 4, "bin": 4, "892m": 4, "02": 4, "306mb": 4, "neration_config": 4, "168": 4, "36": 4, "7kb": 4, "636": 4, "11": 4, "08": 4, "05": 4, "63": 4, "03": 4, "31": 4, "35": 4, "01": 4, "57": 4, "87": 4, "39": [4, 5], "53": 4, "task": [4, 5], "instruct": 4, "For": [4, 5], "paramet": 4, "learning_r": 4, "3e": 4, "4": [4, 5], "max_source_length": 4, "took": 4, "about": 4, "12": [4, 5], "singl": 4, "nvidia": 4, "geforc": 4, "a100": 4, "wa": 4, "epoch": 4, "waleko": 4, "finetun": 4, "ft_model": 4, "finetuned_pr": [4, 5], "290kb": 4, "4mb": 4, "weight": 4, "were": 4, "when": 4, "initi": 4, "t5forconditionalgener": 4, "cls_head": 4, "bia": 4, "IS": 4, "expect": 4, "anoth": 4, "architectur": 4, "bertforsequenceclassif": 4, "bertforpretrain": 4, "NOT": 4, "exactli": 4, "ident": 4, "49": 4, "2kb": 4, "15": 4, "58": 4, "51": 4, "41": [4, 5], "61": 4, "32": 4, "47": 4, "27": 4, "smooth_bleu": 5, "bleu_fromstr": 5, "analyze_pr": 5, "base_fil": 5, "sample_s": 5, "read": 5, "hf_preds_fil": 5, "fine_tuned_fil": 5, "fine_tun": 5, "put": 5, "fine_tuned_pr": 5, "regex": 5, "print": 5, "to_numpi": 5, "hf": 5, "fine": 5, "tune": 5, "calc_bleu": 5, "ref": 5, "rang": 5, "char": 5, "join": 5, "split": 5, "rmstop": 5, "calc_bleu_scor": 5, "ft_pred": 5, "inplac": 5, "hf_bleu": 5, "ft_bleu": 5, "bleu": 5, "compar": 5, "four": 5, "20": 5, "10": 5, "pylint": 5, "checker": 5, "interfac": 5, "_is_constant_empty_str": 5, "node": 5, "isinst": 5, "const": 5, "valu": 5, "comparetoemptystringcheck": 5, "basecheck": 5, "comparison": 5, "empti": 5, "string": 5, "most": 5, "time": 5, "should": 5, "fact": 5, "why": 5, "helper": 5, "isn": 5, "t": 5, "anywher": 5, "place": 5, "14": 5, "7": 5, "net": 5, "sourceforg": 5, "pmd": 5, "ruleviol": 5, "A": 5, "link": 5, "implement": 5, "immut": 5, "therefor": 5, "cach": 5, "friendli": 5, "public": 5, "cachedruleviol": 5, "privat": 5, "cachedrulemapp": 5, "mapper": 5, "did": 5, "modif": 5, "225": 5, "18": 5, "bokehplot": 5, "dimensionedplot": 5, "converted_data": 5, "stream": 5, "determin": 5, "cd": 5, "full": 5, "simpli": 5, "done": 5, "whether": 5, "newli": 5, "ad": 5, "same": 5, "new_length": 5, "v": 5, "ndarrai": 5, "not_upd": 5, "k": 5, "new": 5, "n": 5, "0": 5, "don": 5, "think": 5, "right": 5, "wai": 5, "o": 5, "eric89gxl": 5, "mluessi": 5, "128": 5, "restrequest": 5, "method": 5, "post": 5, "delet": 5, "etc": 5, "uri": 5, "automat": 5, "resolv": 5, "against": 5, "current": 5, "instanc": 5, "host": 5, "httpentiti": 5, "one": 5, "null": 5, "requestent": 5, "restmethod": 5, "chang": 5, "37": 5, "modul": 5, "selenium": 5, "bridge_class": 5, "bridg": 5, "end": 5, "print_pag": 5, "option": 5, "page_rang": 5, "arrai": 5, "driver": 5, "firefox": 5, "webdriv": 5, "extra": 5, "line": 5, "de": 5, "2": 5, "indent": 5, "an": 5, "expr": 5, "1127": 5, "1134": 5, "registri": 5, "eslint": 5, "stylish": 5, "applyto": 5, "applytokind": 5, "alldocu": 5, "fileloc": 5, "filelocationkind": 5, "absolut": 5, "pattern": 5, "defaultpattern": 5, "No": 5, "newlin": 5, "typescript": 5, "rel": 5, "fileprefix": 5, "cwd": 5, "am": 5, "sure": 5, "good": 5, "idea": 5, "make": 5, "me": 5, "wonder": 5, "shoul": 5, "46": 5, "xml": 5, "version": 5, "encod": 5, "utf": 5, "compon": 5, "desktop": 5, "id": 5, "metadata_licens": 5, "cc": 5, "BY": 5, "sa": 5, "3": 5, "project_licens": 5, "mit": 5, "name_long": 5, "homepag": 5, "visualstudio": 5, "com": 5, "summari": 5, "editor": 5, "develop": 5, "support": 5, "integr": 5, "exist": 5, "tool": 5, "descript": 5, "visual": 5, "studio": 5, "lightweight": 5, "power": 5, "which": 5, "na": 5, "onli": 5, "171": 5, "export": 5, "distinct": 5, "keyfn": 5, "fn": 5, "item": 5, "boolean": 5, "notfoundvalu": 5, "element": 5, "wouldn": 5, "clearer": 5, "iworkspacecontextservic": 5, "vs": 5, "platform": 5, "workspac": 5, "common": 5, "iworkbenchcontribut": 5, "iworkbenchcontributionsregistri": 5, "extens": 5, "workbenchextens": 5, "workbench": 5, "contribut": 5, "iworkbenchcontrib": 5, "somewher": 5, "74": 5, "inexteditorgroupsaccessor": 5, "getgroup": 5, "identifi": 5, "groupidentifi": 5, "inexteditorgroupview": 5, "addgroup": 5, "locat": 5, "direct": 5, "groupdirect": 5, "iaddgroupopt": 5, "inexteditorgroup": 5, "removegroup": 5, "void": 5, "178": 5, "181": 5, "abstract": 5, "abstractdebugtest": 5, "codegentestcas": 5, "var": 5, "inboxmethod": 5, "vmloop": 5, "while": 5, "val": 5, "eventset": 5, "virtualmachin": 5, "eventqueu": 5, "fix": 5, "bug": 5, "describ": 5, "th": 5, "kara": 5, "intern": 5, "typo": 5, "descr": 5, "word": 5, "somth": 5, "fun": 5, "wrte": 5, "childen": 5, "unused_vari": 5, "variabl": 5, "come": 5, "never": 5, "children": 5, "ignore_backend": 5, "jvm": 5, "kotlin_configuration_flag": 5, "assertions_mod": 5, "outer": 5, "inner": 5, "assert": 5, "statu": 5, "base": 5, "top": 5, "level": 5, "ldc": 5, "louter": 5, "clinit": 5, "invokevirtu": 5, "java": 5, "lang": 5, "desiredassertionstatu": 5, "z": 5, "putstat": 5, "assertionsdis": 5, "getstat": 5, "Is": 5, "reason": 5, "doesn": 5, "src": 5, "247": 5, "245": 5, "indiceshandl": 5, "protect": 5, "context": 5, "commonbackendcont": 5, "arrayindiceshandl": 5, "commonbackendcontext": 5, "arraybackend": 5, "17": 5, "diagnost": 5, "unused_paramet": 5, "overloads_priv": 5, "overload": 5, "foo": 5, "ok": 5, "d": 5, "object": 5, "shouldn": 5, "expecterror": 5, "expecttyp": 5, "tsd": 5, "defaultstor": 5, "store": 5, "default": 5, "uiplugin": 5, "successrespons": 5, "uppyfil": 5, "uploadeduppyfil": 5, "faileduppyfil": 5, "pluginopt": 5, "ex": 5, "what": 5, "1076": 5, "currentprogress": 5, "getfil": 5, "progress": 5, "setfilest": 5, "assign": 5, "postprocess": 5, "postprocessor": 5, "getpostprocessor": 5, "w": 5, "681": 5, "tu": 5, "extend": 5, "plugin": 5, "reset": 5, "handleresetprogress": 5, "autoretri": 5, "log": 5, "mai": 5, "2347": 5, "altern": 5, "b": 5, "broken": 5, "142": 5, "dragdrop": 5, "restrict": 5, "input": 5, "matter": 5, "prefix": 5, "ar_sa": 5, "\u0627\u0644\u062a\u0634\u0641\u064a\u0631": 5, "entercorrecturl": 5, "\u062e\u0637\u0623": 5, "\u0641\u064a": 5, "\u0627\u0644\u0631\u0627\u0628\u0637": 5, "\u0627\u0631\u062c\u0648": 5, "\u0627\u0644\u062a\u0623\u0643\u062f": 5, "\u0645\u0646": 5, "\u0627\u062f\u062e\u0627\u0644": 5, "\u0631\u0627\u0628\u0637": 5, "\u0645\u0628\u0627\u0634\u0631": 5, "\u0644\u0644\u0645\u0644\u0641": 5, "enterurltoimport": 5, "\u0627\u062f\u062e\u0644": 5, "\u0644\u0627\u0633\u062a\u064a\u0631\u0627\u062f": 5, "\u0627\u0644\u0645\u0644\u0641\u0627\u062a": 5, "exceedss": 5, "\u0627\u0644\u0645\u0644\u0641": 5, "\u0627\u0643\u0628\u0631": 5, "\u0627\u0644\u062d\u062c\u0645": 5, "\u0627\u0644\u0645\u0633\u0645\u0648\u062d": 5, "size": 5, "As": 5, "produc": 5, "better": 5, "than": 5, "much": 5, "insight": 5, "detail": 5, "tend": 5, "specif": 5, "thing": 5, "weird": 5, "codereview": 5, "sometim": 5, "cut": 5, "shown": 5, "due": 5, "sentenc": 5, "pretti": 5, "high": 5, "unfortun": 5, "up": 5, "futur": 5, "figur": 5, "behind": 5, "calcul": 5, "score": 5, "measur": 5, "higher": 5, "10169": 5, "25": 5, "71": 5, "91": 5, "84": 5, "perform": 5, "all": 5, "nevertheless": 5, "still": 5, "low": 5, "mean": 5, "hard": 5, "semant": 5}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"codereview": [0, 2, 4], "ml": [0, 2], "perform": [0, 2], "tabl": 2, "content": 2, "bibliographi": 2, "collect": 3, "code": 3, "review": 3, "data": [3, 4], "model": 4, "infer": 4, "1": 4, "token": 4, "dataset": 4, "2": 4, "load": 4, "3": 4, "predict": [4, 5], "function": 4, "huggingfac": 4, "pre": 4, "train": 4, "checkpoint": 4, "fine": 4, "tune": 4, "evalu": 5, "qualit": 5, "quantit": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file