diff --git a/_sources/notebooks/2_inference.ipynb b/_sources/notebooks/2_inference.ipynb index 064f71a..38567f0 100644 --- a/_sources/notebooks/2_inference.ipynb +++ b/_sources/notebooks/2_inference.ipynb @@ -15,21 +15,12 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 57, "id": "initial_id", "metadata": { "collapsed": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -58,24 +49,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 58, "id": "ad4d16d13804be69", "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading (…)okenizer_config.json: 100%|██████████| 1.29k/1.29k [00:00<00:00, 169kB/s]\n", - "Downloading (…)olve/main/vocab.json: 100%|██████████| 575k/575k [00:00<00:00, 5.89MB/s]\n", - "Downloading (…)olve/main/merges.txt: 100%|██████████| 294k/294k [00:00<00:00, 1.09MB/s]\n", - "Downloading (…)in/added_tokens.json: 100%|██████████| 1.87k/1.87k [00:00<00:00, 1.13MB/s]\n", - "Downloading (…)cial_tokens_map.json: 100%|██████████| 913/913 [00:00<00:00, 515kB/s]\n" - ] - } - ], + "outputs": [], "source": [ "# download tokenizer from huggingface\n", "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/codereviewer\")\n", @@ -86,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 59, "id": "cb003d6d8f578da1", "metadata": { "collapsed": false @@ -120,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 60, "id": "d06f51b2150c61c4", "metadata": { "collapsed": false @@ -167,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 61, "id": "7a5b97449733bbc6", "metadata": { "collapsed": false @@ -192,11 +171,11 @@ " )\n", " # decode the predictions\n", " preds_np = preds.detach().cpu().numpy()\n", - " preds_decoded = np.apply_along_axis(lambda row: tokenizer.decode(\n", - " row[2:], skip_special_tokens=True, clean_up_tokenization_spaces=False\n", - " ), 1, preds_np)\n", + " preds_decoded = [tokenizer.decode(row[2:],\n", + " skip_special_tokens=True,\n", + " clean_up_tokenization_spaces=False) for row in preds_np]\n", " # add the decoded predictions to the result\n", - " result += list(preds_decoded)\n", + " result += preds_decoded\n", " return result" ] }, @@ -215,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 62, "id": "c508661efcdcad40", "metadata": { "collapsed": false @@ -225,13 +204,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 478kB/s]\n", - "Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:02<00:00, 306MB/s] \n", - "Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 36.7kB/s]\n", - "100%|██████████| 636/636 [11:08<00:00, 1.05s/it]\n", - "100%|██████████| 63/63 [03:31<00:00, 3.35s/it]\n", - "100%|██████████| 63/63 [01:57<00:00, 1.87s/it]\n", - "100%|██████████| 63/63 [02:39<00:00, 2.53s/it]\n" + "100%|██████████| 636/636 [11:27<00:00, 1.08s/it]\n", + "100%|██████████| 63/63 [03:37<00:00, 3.45s/it]\n", + "100%|██████████| 63/63 [02:01<00:00, 1.93s/it]\n", + "100%|██████████| 63/63 [02:46<00:00, 2.64s/it]\n" ] } ], @@ -270,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 63, "id": "851255e54c49484a", "metadata": { "collapsed": false @@ -280,16 +256,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 290kB/s]\n", - "Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:22<00:00, 39.4MB/s] \n", - "Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.bias', 'cls_head.weight']\n", + "Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.weight', 'cls_head.bias']\n", "- This IS expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", - "Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 49.2kB/s]\n", - "100%|██████████| 636/636 [15:58<00:00, 1.51s/it]\n", - "100%|██████████| 63/63 [01:41<00:00, 1.61s/it]\n", - "100%|██████████| 63/63 [01:32<00:00, 1.47s/it]\n", - "100%|██████████| 63/63 [01:27<00:00, 1.39s/it]\n" + "100%|██████████| 636/636 [15:51<00:00, 1.50s/it]\n", + "100%|██████████| 63/63 [01:40<00:00, 1.59s/it]\n", + "100%|██████████| 63/63 [01:32<00:00, 1.48s/it]\n", + "100%|██████████| 63/63 [01:26<00:00, 1.38s/it]\n" ] } ], diff --git a/_sources/notebooks/3_evaluation.ipynb b/_sources/notebooks/3_evaluation.ipynb index 42de24c..430ac28 100644 --- a/_sources/notebooks/3_evaluation.ipynb +++ b/_sources/notebooks/3_evaluation.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "bc60e7255799cccf", "metadata": { @@ -12,21 +13,12 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 13, "id": "initial_id", "metadata": { "collapsed": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "import numpy as np\n", @@ -37,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 14, "id": "76e1f5dc15fd5625", "metadata": { "collapsed": false @@ -51,21 +43,25 @@ " hf_preds = pd.read_csv(hf_preds_file)\n", " fine_tuned = pd.read_csv(fine_tuned_file)\n", " # put in df\n", - " df = pd.DataFrame({'code': fine_tuned['code'], 'hf_pred': hf_preds['prediction'], 'fine_tuned_pred': fine_tuned['prediction']})\n", + " df = pd.DataFrame({'code': fine_tuned['code'],\n", + " 'hf_pred': hf_preds['prediction'],\n", + " 'fine_tuned_pred': fine_tuned['prediction'],\n", + " 'target': fine_tuned['target']})\n", " df.replace(np.nan, '', regex=True)\n", " # print sample with predictions\n", - " sample = df.sample(sample_size)\n", - " for code, hf_pred, fine_tuned_pred in sample.to_numpy():\n", + " sample = df.sample(sample_size, random_state=42)\n", + " for code, hf_pred, fine_tuned_pred, target in sample.to_numpy():\n", " print('-------------------')\n", " print(code)\n", - " print(f'HF Pred: {hf_pred}')\n", - " print(f'Fine Tuned Pred: {fine_tuned_pred}')\n", + " print(f'## Human Review: {target}')\n", + " print(f'## HF Pred: {hf_pred}')\n", + " print(f'## Fine Tuned Pred: {fine_tuned_pred}')\n", " return df" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 15, "id": "1f5e6defa2df6726", "metadata": { "collapsed": false @@ -99,6 +95,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3698532ffeff76b4", "metadata": { @@ -113,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 16, "id": "642eedcac45ee834", "metadata": { "collapsed": false @@ -125,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 17, "id": "30dc56606146a2d6", "metadata": { "collapsed": false @@ -136,81 +133,77 @@ "output_type": "stream", "text": [ "-------------------\n", - "@@ -20,10 +20,6 @@ from pylint import checkers, interfaces\n", - " from pylint.checkers import utils\n", + "@@ -1,6 +1,7 @@\n", + " # frozen_string_literal: true\n", "\n", + " require 'hocon'\n", + "+require 'bolt/error'\n", "\n", - "-def _is_constant_empty_str(node):\n", - "- return isinstance(node, nodes.Const) and node.value == \"\"\n", - "-\n", - "-\n", - " class CompareToEmptyStringChecker(checkers.BaseChecker):\n", - " \"\"\"Checks for comparisons to empty string.\n", - " Most of the times you should use the fact that empty strings are false.\n", - "HF Pred: Why is this removed?\n", - "Fine Tuned Pred: Removing this helper function because it isn't used anywhere else and `_is_constant_empty_str` is used in other places.\n", + " class TransportConfig\n", + " attr_accessor :host, :port, :ssl_cert, :ssl_key, :ssl_ca_cert, :ssl_cipher_suites,\n", + "## Human Review: Looks like this isn't used in this file?\n", + "## HF Pred: Why do we need this?\n", + "## Fine Tuned Pred: I am surprised you don't use `hocon` here.\n", "-------------------\n", - "@@ -14,7 +14,7 @@ import net.sourceforge.pmd.RuleViolation;\n", - " /**\n", - " * A {@link RuleViolation} implementation that is immutable, and therefore cache friendly\n", - " */\n", - "-public final class CachedRuleViolation implements RuleViolation {\n", - "+public class CachedRuleViolation implements RuleViolation {\n", + "@@ -92,7 +92,7 @@ public final class OAuth2AuthorizedClientArgumentResolver implements HandlerMeth\n", + " \t\t\t\t\t\t\tclientRegistrationId.flatMap(id -> Mono.error(new IllegalStateException(\n", + " \t\t\t\t\t\t\t\t\t\"Unable to resolve the Authorized Client with registration identifier \\\"\"\n", + " \t\t\t\t\t\t\t\t\t\t\t+ id\n", + "-\t\t\t\t\t\t\t\t\t\t\t+ \"\\\". An \\\"authenticated\\\" or \\\"unauthenticated\\\" session is required. To allow for unauthenticated access, ensure ServerHttpSecurity.anonymous() is configured.\"))))\n", + "+\t\t\t\t\t\t\t\t\t\t\t+ \"\\\". An \\\"authenticated\\\" or \\\"anonymous\\\" request is required. To allow for anonymous access, ensure ServerHttpSecurity.anonymous() is configured.\"))))\n", + " \t\t\t\t\t.flatMap(zipped -> {\n", + " \t\t\t\t\t\tString registrationId = zipped.getT1();\n", + " \t\t\t\t\t\tString username = zipped.getT2();\n", + "## Human Review: I don't think this should be changed since on the reactive side we don't support anonymous users.\n", + "## HF Pred: I don't think we should change this message. It's not clear to me why this is an error.\n", + "## Fine Tuned Pred: I think this message should be updated. `An \"anonymous\" request is required. To allow for anonymous access, ServerHttpSecurity.anonymous() is configured.`\n", + "-------------------\n", + "@@ -3,7 +3,7 @@ const test = require('./shared').assert,\n", + " setupDatabase = require('./shared').setupDatabase,\n", + " Script = require('vm'),\n", + " expect = require('chai').expect,\n", + "- normalizedFunctionString = require('bson/lib/bson/parser/utils').normalizedFunctionString,\n", + "+ normalizedFunctionString = require('bson/lib/parser/utils').normalizedFunctionString,\n", + " Buffer = require('safe-buffer').Buffer;\n", "\n", - " private final CachedRuleMapper mapper;\n", - " \n", - "HF Pred: Why did you remove `final`\n", - "Fine Tuned Pred: Why did you remove the final modif\n", + " const {\n", + "## Human Review: feel free when editing sections like this to introduce modern features like object destructuring.\n", + "## HF Pred: Why this change?\n", + "## Fine Tuned Pred: whats with the whitespace changes in this file?\n", "-------------------\n", - "@@ -225,6 +225,18 @@ class BokehPlot(DimensionedPlot):\n", - " source.data.update(converted_data)\n", - " else:\n", - " source.stream(data, stream.length)\n", - "+ return\n", + "@@ -0,0 +1,17 @@\n", + "+\n", - "+\n", - "+\t@@NAME@@.desktop\n", - "+\tCC-BY-SA-3.0\n", - "+\tMIT\n", - "+\t@@NAME_LONG@@\n", - "+\thttps://code.visualstudio.com\n", - "+\tCode editor for developers supporting integration with existing tools\n", - "+\t\n", - "+\t\t

\n", - "+\t\t\tVisual Studio Code is a lightweight but powerful source code editor which\n", - "HF Pred: I think this should be `@@NA\n", - "Fine Tuned Pred: We don't need this file, it's only \n", + "@@ -340,6 +341,17 @@ export class DebugEditorContribution implements IDebugEditorContribution {\n", + " \t\treturn new RunOnceScheduler(() => this.hoverWidget.hide(), HOVER_DELAY);\n", + " \t}\n", + " \n", + "+\t@memoize\n", + "+\tprivate get provideNonDebugHoverScheduler(): RunOnceScheduler {\n", + "+\t\treturn new RunOnceScheduler(() => {\n", + "## Human Review: Where is this disposed?\n", + "## HF Pred: Why is this memoized?\n", + "## Fine Tuned Pred: This is not `Runnable` but `RunOnceScheduler`, right?\n", "-------------------\n", - "@@ -171,7 +171,7 @@ export function distinct(array: T[], keyFn?: (t: T) => string): T[] {\n", - " }\n", + "@@ -53,6 +53,11 @@ export class SassParser extends cssParser.Parser {\n", " \n", - " export function first(array: T[], fn: (item: T) => boolean, notFoundValue: T = null): T {\n", - "-\tfor (let i = 0; i < array.length; i++) {\n", - "+\tfor (let i = 0, I = array.length; i < I; i++) {\n", - " \t\tconst element = array[i];\n", - "HF Pred: I think this should be `array.length - 1`\n", - "Fine Tuned Pred: Wouldn't it be clearer to name this `array`?\n", + " \t// Sass variables: $font-size: 12px;\n", + " \tpublic _parseVariableDeclaration(panic:scanner.TokenType[]=[]): nodes.VariableDeclaration {\n", + "+\t\tvar cssVariableDeclaration= super._parseCssVariableDeclaration(panic);\n", + "## Human Review: That looks wrong. Not all places where you can declare a sass variable are also suited to declare a css variable.\n", + "\n", + "## HF Pred: Nit: space after `=`\n", + "## Fine Tuned Pred: Minor style issue: missing space before the `=`\n", "-------------------\n", - "@@ -18,7 +18,6 @@ import { IWorkspaceContextService } from 'vs/platform/workspace/common/workspace\n", - " import { IWorkbenchContribution, IWorkbenchContributionsRegistry, Extensions as WorkbenchExtensions } from 'vs/workbench/common/contributions';\n", - "HF Pred: import { IWorkbenchContrib\n", - "Fine Tuned Pred: Are these imports used somewhere?\n", + "@@ -537,6 +537,23 @@ export interface CompletionList {\n", + " \t */\n", + " \titems: CompletionItem[];\n", + " }\n", + "+\n", + "+/**\n", + "+ * Contains additional information about the context in which\n", + "+ * [completion provider](#CompletionItemProvider.provideCompletionItems) is triggered.\n", + "+ */\n", + "+export interface CompletionContext {\n", + "## Human Review: This also needs the following information *manual* invocation, *24x7* IntelliSense, completing incomplete result set \n", + "## HF Pred: I don't think we need this interface. We can just use `CompletionItemProvider`.\n", + "## Fine Tuned Pred: Is there a reason this is not an object?\n", "-------------------\n", - "@@ -74,6 +74,7 @@ export interface INextEditorGroupsAccessor {\n", - " \tgetGroup(identifier: GroupIdentifier): INextEditorGroupView;\n", + "@@ -439,6 +441,16 @@ export class DebugService implements debug.IDebugService {\n", + " \t\t});\n", + " \t}\n", " \n", - " \taddGroup(location: INextEditorGroupView | GroupIdentifier, direction: GroupDirection, options?: IAddGroupOptions): INextEditorGroup;\n", - "+ removeGroup(): void;\n", - "HF Pred: removeGroup(identifier: GroupIdentifier);\n", - "Fine Tuned Pred: isn't this a breaking change?\n" + "+\tprivate debouncedDisplayThreads(session: RawDebugSession) {\n", + "+\t\tconst timer = this.displayThreadsTimer.get(session.getId());\n", + "+\t\tif (timer) {\n", + "+\t\t\tclearTimeout(timer);\n", + "+\t\t}\n", + "+\t\tthis.displayThreadsTimer.set(session.getId(), setTimeout(() => {\n", + "+\t\t\tthis.fetchThreads(session).done(undefined, errors.onUnexpectedError);\n", + "## Human Review: This will make \n", + "## HF Pred: I'm not sure if this is the best way to do this. I think it would be better to do this in the `fetchThreads` function.\n", + "## Fine Tuned Pred: Is there a reason we can't just use `clearTimeout` here? Is there any difference in the code between `clearTimeout` and `set`?\n", + "-------------------\n", + "@@ -265,4 +266,33 @@ class MDDocumentContentProvider implements TextDocumentContentProvider {\n", + " \t\t\t}, 300);\n", + " \t\t}\n", + " \t}\n", + "+}\n", + "+\n", + "+class DocumentHeadingsProvider implements vscode.DocumentSymbolProvider {\n", + "+\n", + "+\t// http://daringfireball.net/projects/markdown/syntax#header\n", + "+\tprivate static _atxPattern = /^(#){1,6}\\s+.+(\\s+\\1)?/;\n", + "+\tprivate static _settext = /^\\s*[-=]+\\s*$/;\n", + "## Human Review: Make sure to also test that this does not pick up separators\n", + "\n", + "## HF Pred: nit: `_atxPattern` -> `_atxRegex`\n", + "## Fine Tuned Pred: Remove this and use directly the `SymbolProvider` below.\n" ] } ], @@ -293,7 +298,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 19, "id": "d6f7b55edbe73e06", "metadata": { "collapsed": false @@ -304,71 +309,70 @@ "output_type": "stream", "text": [ "-------------------\n", - "@@ -178,7 +181,7 @@ abstract class AbstractDebugTest : CodegenTestCase() {\n", - " var inBoxMethod = false\n", - " vmLoop@\n", - " while (true) {\n", - "- val eventSet = virtualMachine.eventQueue().remove(1000)\n", - "+ val eventSet = virtualMachine.eventQueue().remove(1000) ?: continue\n", - "HF Pred: Why is this change needed?\n", - "Fine Tuned Pred: This is the fix for the bug described in th\n", - "-------------------\n", - "@@ -3,8 +3,8 @@ package kara.internal\n", - " /* Test somthing */\n", - " class MoreeFun {\n", - " fun wrte() {\n", - "- val childen = 12\n", - "- val come = childen\n", - "+ val children = 12\n", - "HF Pred: Why did you remove this\n", - "Fine Tuned Pred: I think this should be a warning instead of a warning.\n", + "@@ -24,10 +24,13 @@ abstract class AbstractIrLineNumberTest : AbstractLineNumberTest() {\n", + " \n", + " override fun compareCustom(psiFile: KtFile, wholeFile: File) {\n", + " val fileText = psiFile.text\n", + "- val expectedLineNumbers = normalize(\n", + "- fileText.substring(fileText.indexOf(\"//\") + 2)\n", + "- .trim().split(\" \").map { it.trim() }.toMutableList()\n", + "- )\n", + "+ val expectedLineNumbers = fileText.split(\"\\n\".toRegex()).filter { line ->\n", + "## Human Review: How about simplifying this to:\n", + "\n", + " val expectedLineNumbers = normalize(\n", + " fileText.substring(Regex(\"// \\\\d+\").find(fileText)!!.range.start + 2)\n", + " .trim().split(\" \").map { it.trim() }.toMutableList()\n", + " )\n", + "\n", + "Then we are looking for a line that starts with \"// \" with a number of digits after it. That should work to exclude the comment lines with text. That looks consistent with what the old backend expects from these tests.\n", + "## HF Pred: Why did you change this?\n", + "## Fine Tuned Pred: Remove the `toRegex()` call here.\n", "-------------------\n", - "@@ -0,0 +1,16 @@\n", - "+// IGNORE_BACKEND: JVM\n", - "+// KOTLIN_CONFIGURATION_FLAGS: ASSERTIONS_MODE=jvm\n", - "+\n", - "+class Outer {\n", - "+ class Inner {\n", - "+ fun f() { assert(true) }\n", - "+ }\n", - "+}\n", - "+\n", - "+// We set the assertion status based on top-level classes.\n", - "+// 0 LDC LOuter\\$Inner;.class\n", - "+// Outer\\$Inner.:\n", - "+// 1 LDC LOuter;.class\\s*INVOKEVIRTUAL java/lang/Class.desiredAssertionStatus \\(\\)Z\n", - "+// 1 PUTSTATIC Outer\\$Inner.\\$assertionsDisabled : Z\n", - "+// Outer\\$Inner.f:\n", - "+// 1 GETSTATIC Outer\\$Inner.\\$assertionsDisabled\n", - "HF Pred: I don't think this is the right place to put this code\n", - "Fine Tuned Pred: Is there a reason why this doesn't go in `src/\n", + "@@ -45,7 +45,6 @@ import org.jetbrains.kotlin.types.KotlinType\n", + " import org.jetbrains.kotlin.types.typeUtil.isPrimitiveNumberType\n", + " import org.jetbrains.kotlin.types.typeUtil.makeNotNullable\n", + " import org.jetbrains.kotlin.types.typeUtil.makeNullable\n", + "-import java.lang.AssertionError\n", + "## Human Review: Pls remove this file from pr\n", + "## HF Pred: import org.jetbrains.kotlin.util.KotlinConstants;\n", + "## Fine Tuned Pred: Why did you remove this import?\n", "-------------------\n", - "@@ -247,7 +245,7 @@ internal abstract class IndicesHandler(protected val context: CommonBackendConte\n", + "@@ -46,6 +46,7 @@ abstract class AbstractCheckLocalVariablesTableTest : CodegenTestCase() {\n", + " assertNotNull(\"Couldn't find class file for pattern $classFileRegex in: $pathsString\", outputFile)\n", " \n", - " internal class ArrayIndicesHandler(context: CommonBackendContext) : IndicesHandler(context) {\n", - "HF Pred: }\n", - "Fine Tuned Pred: Should be `ArrayBackend` instead of `CommonBackendContext`.\n", + " val actualLocalVariables = readLocalVariable(ClassReader(outputFile.asByteArray()), methodName)\n", + "+ checkLocalVariableTypes(ClassReader(outputFile.asByteArray()), methodName, actualLocalVariables)\n", + "## Human Review: Could we reuse class reader from previous line?\n", + "## HF Pred: assertEquals(expectedLocalVariables, actualLocalVariables)\n", + "## Fine Tuned Pred: What's the purpose of this line?\n", "-------------------\n", - "@@ -0,0 +1,17 @@\n", - "+// !DIAGNOSTICS: -UNUSED_PARAMETER\n", - "+class C {\n", - "+ [kotlin.jvm.overloads] private fun foo(s: String = \"OK\") {\n", - "+ }\n", - "+}\n", - "+\n", - "+fun foo() {\n", - "+ class D {\n", - "+ [kotlin.jvm.overloads] fun foo(s: String = \"OK\") {\n", - "+ }\n", - "+ }\n", + "@@ -37,3 +37,17 @@ public fun Char.equals(other: Char, ignoreCase: Boolean = false): Boolean {\n", + " * Returns `true` if this character is a Unicode surrogate code unit.\n", + " */\n", + " public fun Char.isSurrogate(): Boolean = this in Char.MIN_SURROGATE..Char.MAX_SURROGATE\n", "+\n", - "+ val x = object {\n", - "+ [kotlin.jvm.overloads] fun foo(s: String = \"OK\") {\n", - "+ }\n", - "+ }\n", - "+}\n", - "HF Pred: I think we can remove `` here.\n", - "Fine Tuned Pred: Shouldn't this be `-UNUSED_PARAMETER`?\n" + "+/**\n", + "+ * Minimum value for character\n", + "+ * @see \n", + "## Human Review: I don't think this way of linking is supported in `@see` tag. I'm not sure what is the purpose of these links anyway.\n", + "## HF Pred: This should be `Char.isSurrogate()`.\n", + "## Fine Tuned Pred: Please remove the `` tag.\n", + "-------------------\n", + "@@ -175,7 +195,9 @@ class ExpressionCodegen(\n", + " \n", + " override fun visitBlockBody(body: IrBlockBody, data: BlockInfo): StackValue {\n", + " return body.statements.fold(none()) { _, exp ->\n", + "- exp.accept(this, data)\n", + "+ val result = exp.accept(this, data)\n", + "+ (exp as? IrExpression)?.markEndOfStatementIfNeeded()\n", + "## Human Review: ```\n", + "exp.accept(this, data).also {\n", + " (exp as? IrExpression)?.markEndOfStatementIfNeeded()\n", + "}\n", + "```\n", + "## HF Pred: Why is this needed?\n", + "## Fine Tuned Pred: Unrelated to this PR, but this doesn't seem like the best place to markEndOfStatementIfNeeded. Should the markEndOfStatementIfNeeded be in some other place, or should the markEndOfStatementIfNeeded be in BlockInfoResolver?\n" ] } ], @@ -378,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 20, "id": "b3a70e469063ec64", "metadata": { "collapsed": false @@ -389,47 +393,63 @@ "output_type": "stream", "text": [ "-------------------\n", - "@@ -1,6 +1,6 @@\n", - " import { expectError, expectType } from 'tsd'\n", - " import DefaultStore from '@uppy/store-default'\n", - "-import Uppy, { UIPlugin } from '..'\n", - "+import Uppy, { SuccessResponse, UIPlugin, UppyFile } from '..'\n", - " import type { UploadedUppyFile, FailedUppyFile, PluginOptions } from '..'\n", - "HF Pred: Why do we need to ex\n", - "Fine Tuned Pred: What's the reason for this change?\n", + "@@ -28,6 +28,7 @@\n", + " \"@uppy/thumbnail-generator\": \"0.29.1\",\n", + " \"@uppy/utils\": \"0.29.1\",\n", + " \"classnames\": \"^2.2.6\",\n", + "+ \"cuid\": \"^2.1.1\",\n", + " \"drag-drop\": \"2.13.3\",\n", + "## Human Review: In other places we are using uuid, I think, is there a reason to use different packages? I was actually gonna suggest we switch to https://github.com/ai/nanoid everywhere, it’s smaller and ids are smaller.\n", + "## HF Pred: This should be `^2.1.0`, right?\n", + "## Fine Tuned Pred: Why are we using a fixed version here?\n", "-------------------\n", - "@@ -1076,6 +1076,7 @@ class Uppy {\n", - " const currentProgress = this.getFile(file.id).progress\n", - " this.setFileState(file.id, {\n", - " progress: Object.assign({}, currentProgress, {\n", - "+ postprocess: this.postProcessors.length > 0,\n", - "HF Pred: postprocess: this.postProcessors.length > 0,\n", - "Fine Tuned Pred: Why don't we use `getPostProcessors` here as w\n", + "@@ -49,7 +49,8 @@ class Tabs extends Component {\n", + " tabindex=\"-1\"\n", + " type=\"file\"\n", + " name=\"files[]\"\n", + "- multiple=\"true\"\n", + "+ multiple={this.props.maxNumberOfFiles !== 1 || !this.props.maxNumberOfFiles}\n", + "## Human Review: same here\n", + "## HF Pred: `multiple={this.props.maxNumberOfFiles !== 1}`\n", + "## Fine Tuned Pred: shouldnt it be only true if `this.props.maxNumberOfFiles` is defined ?\n", "-------------------\n", - "@@ -681,6 +681,8 @@ module.exports = class Tus extends Plugin {\n", - " this.uppy.on('reset-progress', this.handleResetProgress)\n", + "@@ -496,6 +494,25 @@ module.exports = class ProviderView {\n", + " }\n", + " }\n", " \n", - " if (this.opts.autoRetry) {\n", - "+ this.uppy.log('[Tus] The `autoRetry` option may be removed in Uppy 2.0. See https://github.com/transloadit/uppy/pull/2347 for alternatives.', 'warning')\n", - "HF Pred: this.log('[Tus] The `autoRetry` option may b\n", - "Fine Tuned Pred: This link is broken right now.\n", + "+ listAllFiles (path, files = null) {\n", + "+ files = files || []\n", + "+ return new Promise((resolve) => {\n", + "+ this.provider.list(path).then((res) => {\n", + "## Human Review: should we add a `catch` for this call. Otherwise what will happen when the call to `this.provider.list` fails?\n", + "## HF Pred: `files = files || []`\n", + "## Fine Tuned Pred: `resolve` is unnecessary here, you can just call `this.provider.list(path, files)`\n", "-------------------\n", - "@@ -142,6 +142,7 @@ module.exports = class DragDrop extends Plugin {\n", - " const restrictions = this.uppy.opts.restrictions\n", - " return (\n", - " CodeReviewer Model Inference -

-

1 Tokenizers and Datasets#

@@ -382,15 +376,6 @@

1 Tokenizers and Datasets -
Downloading (…)okenizer_config.json: 100%|██████████| 1.29k/1.29k [00:00<00:00, 169kB/s]
-Downloading (…)olve/main/vocab.json: 100%|██████████| 575k/575k [00:00<00:00, 5.89MB/s]
-Downloading (…)olve/main/merges.txt: 100%|██████████| 294k/294k [00:00<00:00, 1.09MB/s]
-Downloading (…)in/added_tokens.json: 100%|██████████| 1.87k/1.87k [00:00<00:00, 1.13MB/s]
-Downloading (…)cial_tokens_map.json: 100%|██████████| 913/913 [00:00<00:00, 515kB/s]
-
-
-
@@ -487,13 +472,10 @@

HuggingFace pre-trained checkpoint -
-
Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 290kB/s]
-Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:22<00:00, 39.4MB/s] 
-Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.bias', 'cls_head.weight']
+
Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.weight', 'cls_head.bias']
 - This IS expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
 - This IS NOT expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 49.2kB/s]
-100%|██████████| 636/636 [15:58<00:00,  1.51s/it]
-100%|██████████| 63/63 [01:41<00:00,  1.61s/it]
-100%|██████████| 63/63 [01:32<00:00,  1.47s/it]
-100%|██████████| 63/63 [01:27<00:00,  1.39s/it]
+100%|██████████| 636/636 [15:51<00:00,  1.50s/it]
+100%|██████████| 63/63 [01:40<00:00,  1.59s/it]
+100%|██████████| 63/63 [01:32<00:00,  1.48s/it]
+100%|██████████| 63/63 [01:26<00:00,  1.38s/it]
 
diff --git a/notebooks/3_evaluation.html b/notebooks/3_evaluation.html index 443c9ff..8c4ba67 100644 --- a/notebooks/3_evaluation.html +++ b/notebooks/3_evaluation.html @@ -350,12 +350,6 @@

Predictions Evaluation

-
-
/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
-  from .autonotebook import tqdm as notebook_tqdm
-
-
-
@@ -429,81 +427,77 @@

Qualitative Evaluation
-------------------
-@@ -20,10 +20,6 @@ from pylint import checkers, interfaces
- from pylint.checkers import utils
-
-
--def _is_constant_empty_str(node):
--    return isinstance(node, nodes.Const) and node.value == ""
--
--
- class CompareToEmptyStringChecker(checkers.BaseChecker):
-     """Checks for comparisons to empty string.
-     Most of the times you should use the fact that empty strings are false.
-HF Pred: Why is this removed?
-Fine Tuned Pred: Removing this helper function because it isn't used anywhere else and `_is_constant_empty_str` is used in other places.
--------------------
-@@ -14,7 +14,7 @@ import net.sourceforge.pmd.RuleViolation;
- /**
-  * A {@link RuleViolation} implementation that is immutable, and therefore cache friendly
-  */
--public final class CachedRuleViolation implements RuleViolation {
-+public class CachedRuleViolation implements RuleViolation {
+@@ -1,6 +1,7 @@
+ # frozen_string_literal: true
 
-     private final CachedRuleMapper mapper;
- 
-HF Pred: Why did you remove `final`
-Fine Tuned Pred: Why did you remove the final modif
+ require 'hocon'
++require 'bolt/error'
+
+ class TransportConfig
+   attr_accessor :host, :port, :ssl_cert, :ssl_key, :ssl_ca_cert, :ssl_cipher_suites,
+## Human Review: Looks like this isn't used in this file?
+## HF Pred: Why do we need this?
+## Fine Tuned Pred: I am surprised you don't use `hocon` here.
 -------------------
-@@ -225,6 +225,18 @@ class BokehPlot(DimensionedPlot):
-                     source.data.update(converted_data)
-                 else:
-                     source.stream(data, stream.length)
-+            return
-+
-+        # Determine if the CDS.data requires a full update or simply needs
-+        # to be updated, this is done by determining whether newly added
-+        # or not updated columns have the same length
-+        new_length = [len(v) for v in data.values() if isinstance(v, (list, np.ndarray))]
-+        length = [len(v) for v in source.data.values() if isinstance(v, (list, np.ndarray))]
-+        not_updated = [k for k in source.data if k not in data]
-+        new = [k for k in data if k not in source.data]
-+        if ((not_updated and new_length and any(len(source.data[n]) != new_length[0] for n in not_updated))
-+            or (new and length and any(len(data[n]) != length[0] for n in new))):
-+            source.data = data
-         else:
-             source.data.update(data)
- 
-HF Pred: I don't think this is the right way to do this. If the length o
-Fine Tuned Pred: @Eric89GXL @mluessi do we need to check 
+@@ -92,7 +92,7 @@ public final class OAuth2AuthorizedClientArgumentResolver implements HandlerMeth
+ 							clientRegistrationId.flatMap(id -> Mono.error(new IllegalStateException(
+ 									"Unable to resolve the Authorized Client with registration identifier \""
+ 											+ id
+-											+ "\". An \"authenticated\" or \"unauthenticated\" session is required. To allow for unauthenticated access, ensure ServerHttpSecurity.anonymous() is configured."))))
++											+ "\". An \"authenticated\" or \"anonymous\" request is required. To allow for anonymous access, ensure ServerHttpSecurity.anonymous() is configured."))))
+ 					.flatMap(zipped -> {
+ 						String registrationId = zipped.getT1();
+ 						String username = zipped.getT2();
+## Human Review: I don't think this should be changed since on the reactive side we don't support anonymous users.
+## HF Pred: I don't think we should change this message. It's not clear to me why this is an error.
+## Fine Tuned Pred: I think this message should be updated. `An "anonymous" request is required. To allow for anonymous access, ServerHttpSecurity.anonymous() is configured.`
 -------------------
-@@ -128,7 +128,7 @@ public class RestRequest {
- 	 *
- 	 * @param method				the HTTP method for the request (GET/POST/DELETE etc)
- 	 * @param path					the URI path, this will automatically be resolved against the users current instance host.
--	 * @param httpEntity			the request body if there is one, can be null.
-+	 * @param requestEntity			the request body if there is one, can be null.
- 	 */
- 	public RestRequest(RestMethod method, String path, HttpEntity requestEntity) {
- 		this(method, path, requestEntity, null);
-HF Pred: Why did you change this?
-Fine Tuned Pred: This change should be
+@@ -3,7 +3,7 @@ const test = require('./shared').assert,
+   setupDatabase = require('./shared').setupDatabase,
+   Script = require('vm'),
+   expect = require('chai').expect,
+-  normalizedFunctionString = require('bson/lib/bson/parser/utils').normalizedFunctionString,
++  normalizedFunctionString = require('bson/lib/parser/utils').normalizedFunctionString,
+   Buffer = require('safe-buffer').Buffer;
+
+ const {
+## Human Review: feel free when editing sections like this to introduce modern features like object destructuring.
+## HF Pred: Why this change?
+## Fine Tuned Pred: whats with the whitespace changes in this file?
 -------------------
-@@ -37,6 +37,12 @@ module Selenium
-         def bridge_class
-           Bridge
-         end
+@@ -0,0 +1,17 @@
++<?php
++/**
++ * Copyright © Bold Brand Commerce Sp. z o.o. All rights reserved.
++ * See LICENSE.txt for license details.
++ */
 +
-+        def print_page(**options)
-+          options[:page_ranges] &&= Array(options[:page_ranges])
++declare(strict_types=1);
 +
-+          bridge.print_page(options)
-+        end
-       end # Driver
-     end # Firefox
-   end # WebDriver
-HF Pred: Extra empty line de
-Fine Tuned Pred: Use 2 (not 0) spaces for indenting an expre
++namespace Ergonode\Core\Infrastructure\Exception;
++
++class SerializationException extends SerializerException
++{
++    public function __construct(string $message, \Throwable $previous = null)
++    {
++        parent::__construct($message, $previous);
++    }
++}
+## Human Review: I think it should been in `SharedKernel` module.
+## HF Pred: Missing license header.
+## Fine Tuned Pred: Why do we need a separate exception class here? Seems very specific to Elgg\Core\Infrastructure\Exception
+-------------------
+@@ -473,6 +473,7 @@ describe('GridFS Stream', function () {
+                     // Fail if user tries to abort an aborted stream
+                     uploadStream.abort().then(null, function (error) {
+                       expect(error.toString()).to.equal(
++                        // TODO(NODE-3405): Replace with MongoStreamClosedError
+                         'MongoDriverError: Cannot call abort() on a stream twice'
+                       );
+                       client.close(done);
+## Human Review: You can remove these if they've been resolved in NODE-3405 and this isn't depending on it
+## HF Pred: Is this TODO still relevant?
+## Fine Tuned Pred: What is the problem here?
 
@@ -516,59 +510,71 @@

Qualitative Evaluation
-------------------
-@@ -1127,4 +1134,12 @@ registry.add('eslint-stylish', {
- 	applyTo: ApplyToKind.allDocuments,
- 	fileLocation: FileLocationKind.Absolute,
- 	pattern: defaultPattern('eslint-stylish')
--});
-\ No newline at end of file
-+});
-+
-+registry.add('go', {
-+	owner: 'typescript',
-+	applyTo: ApplyToKind.allDocuments,
-+	fileLocation: FileLocationKind.Relative,
-+	filePrefix: '${cwd}',
-HF Pred: Why is this needed?
-Fine Tuned Pred: I am not sure if this is a good idea. It makes me wonder if we shoul
--------------------
-@@ -0,0 +1,46 @@
-+<?xml version="1.0" encoding="UTF-8"?>
-+<component type="desktop">
-+	<id>@@NAME@@.desktop</id>
-+	<metadata_license>CC-BY-SA-3.0</metadata_license>
-+	<project_license>MIT</project_license>
-+	<name>@@NAME_LONG@@</name>
-+	<url type="homepage">https://code.visualstudio.com</url>
-+	<summary>Code editor for developers supporting integration with existing tools</summary>
-+	<description>
-+		<p>
-+			Visual Studio Code is a lightweight but powerful source code editor which
-HF Pred: I think this should be `@@NA
-Fine Tuned Pred: We don't need this file, it's only 
+@@ -340,6 +341,17 @@ export class DebugEditorContribution implements IDebugEditorContribution {
+ 		return new RunOnceScheduler(() => this.hoverWidget.hide(), HOVER_DELAY);
+ 	}
+ 
++	@memoize
++	private get provideNonDebugHoverScheduler(): RunOnceScheduler {
++		return new RunOnceScheduler(() => {
+## Human Review: Where is this disposed?
+## HF Pred: Why is this memoized?
+## Fine Tuned Pred: This is not `Runnable` but `RunOnceScheduler`, right?
 -------------------
-@@ -171,7 +171,7 @@ export function distinct<T>(array: T[], keyFn?: (t: T) => string): T[] {
- }
+@@ -53,6 +53,11 @@ export class SassParser extends cssParser.Parser {
  
- export function first<T>(array: T[], fn: (item: T) => boolean, notFoundValue: T = null): T {
--	for (let i = 0; i < array.length; i++) {
-+	for (let i = 0, I = array.length; i < I; i++) {
- 		const element = array[i];
-HF Pred: I think this should be `array.length - 1`
-Fine Tuned Pred: Wouldn't it be clearer to name this `array`?
+ 	// Sass variables: $font-size: 12px;
+ 	public _parseVariableDeclaration(panic:scanner.TokenType[]=[]): nodes.VariableDeclaration {
++		var cssVariableDeclaration= super._parseCssVariableDeclaration(panic);
+## Human Review: That looks wrong. Not all places where you can declare a sass variable are also suited to declare a css variable.
+
+## HF Pred: Nit: space after `=`
+## Fine Tuned Pred: Minor style issue: missing space before the `=`
 -------------------
-@@ -18,7 +18,6 @@ import { IWorkspaceContextService } from 'vs/platform/workspace/common/workspace
- import { IWorkbenchContribution, IWorkbenchContributionsRegistry, Extensions as WorkbenchExtensions } from 'vs/workbench/common/contributions';
-HF Pred: import { IWorkbenchContrib
-Fine Tuned Pred: Are these imports used somewhere?
+@@ -537,6 +537,23 @@ export interface CompletionList {
+ 	 */
+ 	items: CompletionItem[];
+ }
++
++/**
++ * Contains additional information about the context in which
++ * [completion provider](#CompletionItemProvider.provideCompletionItems) is triggered.
++ */
++export interface CompletionContext {
+## Human Review: This also needs the following information  *manual* invocation, *24x7* IntelliSense, completing incomplete result set 
+## HF Pred: I don't think we need this interface. We can just use `CompletionItemProvider`.
+## Fine Tuned Pred: Is there a reason this is not an object?
 -------------------
-@@ -74,6 +74,7 @@ export interface INextEditorGroupsAccessor {
- 	getGroup(identifier: GroupIdentifier): INextEditorGroupView;
+@@ -439,6 +441,16 @@ export class DebugService implements debug.IDebugService {
+ 		});
+ 	}
  
- 	addGroup(location: INextEditorGroupView | GroupIdentifier, direction: GroupDirection, options?: IAddGroupOptions): INextEditorGroup;
-+  removeGroup(): void;
-HF Pred: removeGroup(identifier: GroupIdentifier);
-Fine Tuned Pred: isn't this a breaking change?
++	private debouncedDisplayThreads(session: RawDebugSession) {
++		const timer = this.displayThreadsTimer.get(session.getId());
++		if (timer) {
++			clearTimeout(timer);
++		}
++		this.displayThreadsTimer.set(session.getId(), setTimeout(() => {
++			this.fetchThreads(session).done(undefined, errors.onUnexpectedError);
+## Human Review: This will make 
+## HF Pred: I'm not sure if this is the best way to do this. I think it would be better to do this in the `fetchThreads` function.
+## Fine Tuned Pred: Is there a reason we can't just use `clearTimeout` here? Is there any difference in the code between `clearTimeout` and `set`?
+-------------------
+@@ -265,4 +266,33 @@ class MDDocumentContentProvider implements TextDocumentContentProvider {
+ 			}, 300);
+ 		}
+ 	}
++}
++
++class DocumentHeadingsProvider implements vscode.DocumentSymbolProvider {
++
++	// http://daringfireball.net/projects/markdown/syntax#header
++	private static _atxPattern = /^(#){1,6}\s+.+(\s+\1)?/;
++	private static _settext = /^\s*[-=]+\s*$/;
+## Human Review: Make sure to also test that this does not pick up separators
+
+## HF Pred: nit: `_atxPattern` -> `_atxRegex`
+## Fine Tuned Pred: Remove this and use directly the `SymbolProvider` below.
 
@@ -581,71 +587,70 @@

Qualitative Evaluation
-------------------
-@@ -178,7 +181,7 @@ abstract class AbstractDebugTest : CodegenTestCase() {
-         var inBoxMethod = false
-         vmLoop@
-         while (true) {
--            val eventSet = virtualMachine.eventQueue().remove(1000)
-+            val eventSet = virtualMachine.eventQueue().remove(1000) ?: continue
-HF Pred: Why is this change needed?
-Fine Tuned Pred: This is the fix for the bug described in th
+@@ -24,10 +24,13 @@ abstract class AbstractIrLineNumberTest : AbstractLineNumberTest() {
+ 
+     override fun compareCustom(psiFile: KtFile, wholeFile: File) {
+         val fileText = psiFile.text
+-        val expectedLineNumbers = normalize(
+-            fileText.substring(fileText.indexOf("//") + 2)
+-                .trim().split(" ").map { it.trim() }.toMutableList()
+-        )
++        val expectedLineNumbers = fileText.split("\n".toRegex()).filter { line ->
+## Human Review: How about simplifying this to:
+
+        val expectedLineNumbers = normalize(
+            fileText.substring(Regex("// \\d+").find(fileText)!!.range.start + 2)
+                .trim().split(" ").map { it.trim() }.toMutableList()
+        )
+
+Then we are looking for a line that starts with "// " with a number of digits after it. That should work to exclude the comment lines with text. That looks consistent with what the old backend expects from these tests.
+## HF Pred: Why did you change this?
+## Fine Tuned Pred: Remove the `toRegex()` call here.
 -------------------
-@@ -3,8 +3,8 @@ package kara.internal
- /* Test <TYPO descr="Typo: In word 'somthing'">somthing</TYPO> */
- class <TYPO descr="Typo: In word 'Moree'">Moree</TYPO>Fun {
-   fun <TYPO descr="Typo: In word 'wrte'">wrte</TYPO>() {
--    val <TYPO descr="Typo: In word 'childen'">childen</TYPO> = 12
--    val <warning descr="[UNUSED_VARIABLE] Variable 'come' is never used">come</warning> = childen
-+    val <TYPO descr="Typo: In word 'children'">children</TYPO> = 12
-HF Pred: Why did you remove this
-Fine Tuned Pred: I think this should be a warning instead of a warning.
+@@ -45,7 +45,6 @@ import org.jetbrains.kotlin.types.KotlinType
+ import org.jetbrains.kotlin.types.typeUtil.isPrimitiveNumberType
+ import org.jetbrains.kotlin.types.typeUtil.makeNotNullable
+ import org.jetbrains.kotlin.types.typeUtil.makeNullable
+-import java.lang.AssertionError
+## Human Review: Pls remove this file from pr
+## HF Pred: import org.jetbrains.kotlin.util.KotlinConstants;
+## Fine Tuned Pred: Why did you remove this import?
 -------------------
-@@ -0,0 +1,16 @@
-+// IGNORE_BACKEND: JVM
-+// KOTLIN_CONFIGURATION_FLAGS: ASSERTIONS_MODE=jvm
-+
-+class Outer {
-+    class Inner {
-+        fun f() { assert(true) }
-+    }
-+}
-+
-+// We set the assertion status based on top-level classes.
-+// 0 LDC LOuter\$Inner;.class
-+// Outer\$Inner.<clinit>:
-+// 1 LDC LOuter;.class\s*INVOKEVIRTUAL java/lang/Class.desiredAssertionStatus \(\)Z
-+// 1 PUTSTATIC Outer\$Inner.\$assertionsDisabled : Z
-+// Outer\$Inner.f:
-+// 1 GETSTATIC Outer\$Inner.\$assertionsDisabled
-HF Pred: I don't think this is the right place to put this code
-Fine Tuned Pred: Is there a reason why this doesn't go in `src/
--------------------
-@@ -247,7 +245,7 @@ internal abstract class IndicesHandler(protected val context: CommonBackendConte
+@@ -46,6 +46,7 @@ abstract class AbstractCheckLocalVariablesTableTest : CodegenTestCase() {
+             assertNotNull("Couldn't find class file for pattern $classFileRegex in: $pathsString", outputFile)
  
- internal class ArrayIndicesHandler(context: CommonBackendContext) : IndicesHandler(context) {
-HF Pred: }
-Fine Tuned Pred: Should be `ArrayBackend` instead of `CommonBackendContext`.
+             val actualLocalVariables = readLocalVariable(ClassReader(outputFile.asByteArray()), methodName)
++            checkLocalVariableTypes(ClassReader(outputFile.asByteArray()), methodName, actualLocalVariables)
+## Human Review: Could we reuse class reader from previous line?
+## HF Pred: assertEquals(expectedLocalVariables, actualLocalVariables)
+## Fine Tuned Pred: What's the purpose of this line?
 -------------------
-@@ -0,0 +1,17 @@
-+// !DIAGNOSTICS: -UNUSED_PARAMETER
-+class C {
-+    <!OVERLOADS_PRIVATE!>[kotlin.jvm.overloads] private fun foo(s: String = "OK")<!> {
-+    }
-+}
-+
-+fun foo() {
-+    class D {
-+        <!OVERLOADS_PRIVATE!>[kotlin.jvm.overloads] fun foo(s: String = "OK")<!> {
-+        }
-+    }
+@@ -37,3 +37,17 @@ public fun Char.equals(other: Char, ignoreCase: Boolean = false): Boolean {
+  * Returns `true` if this character is a Unicode surrogate code unit.
+  */
+ public fun Char.isSurrogate(): Boolean = this in Char.MIN_SURROGATE..Char.MAX_SURROGATE
 +
-+    val <!UNUSED_VARIABLE!>x<!> = object {
-+        <!OVERLOADS_PRIVATE!>[kotlin.jvm.overloads] fun foo(s: String = "OK")<!> {
-+    }
-+    }
-+}
-HF Pred: I think we can remove `<!UNUSED_VARIABLE!>` here.
-Fine Tuned Pred: Shouldn't this be `-UNUSED_PARAMETER`?
++/**
++ * Minimum value for character
++ * @see <a href="https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html">
+## Human Review: I don't think this way of linking is supported in `@see` tag.  I'm not sure what is the purpose of these links anyway.
+## HF Pred: This should be `Char.isSurrogate()`.
+## Fine Tuned Pred: Please remove the `<a>` tag.
+-------------------
+@@ -175,7 +195,9 @@ class ExpressionCodegen(
+ 
+     override fun visitBlockBody(body: IrBlockBody, data: BlockInfo): StackValue {
+         return body.statements.fold(none()) { _, exp ->
+-            exp.accept(this, data)
++            val result = exp.accept(this, data)
++            (exp as? IrExpression)?.markEndOfStatementIfNeeded()
+## Human Review: ```
+exp.accept(this, data).also {
+       (exp as? IrExpression)?.markEndOfStatementIfNeeded()
+}
+```
+## HF Pred: Why is this needed?
+## Fine Tuned Pred: Unrelated to this PR, but this doesn't seem like the best place to markEndOfStatementIfNeeded. Should the markEndOfStatementIfNeeded be in some other place, or should the markEndOfStatementIfNeeded be in BlockInfoResolver?
 
@@ -658,53 +663,69 @@

Qualitative Evaluation
-------------------
-@@ -1,6 +1,6 @@
- import { expectError, expectType } from 'tsd'
- import DefaultStore from '@uppy/store-default'
--import Uppy, { UIPlugin } from '..'
-+import Uppy, { SuccessResponse, UIPlugin, UppyFile } from '..'
- import type { UploadedUppyFile, FailedUppyFile, PluginOptions } from '..'
-HF Pred: Why do we need to ex
-Fine Tuned Pred: What's the reason for this change?
+@@ -28,6 +28,7 @@
+     "@uppy/thumbnail-generator": "0.29.1",
+     "@uppy/utils": "0.29.1",
+     "classnames": "^2.2.6",
++    "cuid": "^2.1.1",
+     "drag-drop": "2.13.3",
+## Human Review: In other places we are using uuid, I think, is there a reason to use different packages? I was actually gonna suggest we switch to https://github.com/ai/nanoid everywhere, it’s smaller and ids are smaller.
+## HF Pred: This should be `^2.1.0`, right?
+## Fine Tuned Pred: Why are we using a fixed version here?
 -------------------
-@@ -1076,6 +1076,7 @@ class Uppy {
-       const currentProgress = this.getFile(file.id).progress
-       this.setFileState(file.id, {
-         progress: Object.assign({}, currentProgress, {
-+          postprocess: this.postProcessors.length > 0,
-HF Pred: postprocess: this.postProcessors.length > 0,
-Fine Tuned Pred: Why don't we use `getPostProcessors` here as w
+@@ -49,7 +49,8 @@ class Tabs extends Component {
+             tabindex="-1"
+             type="file"
+             name="files[]"
+-            multiple="true"
++            multiple={this.props.maxNumberOfFiles !== 1 || !this.props.maxNumberOfFiles}
+## Human Review: same here
+## HF Pred: `multiple={this.props.maxNumberOfFiles !== 1}`
+## Fine Tuned Pred: shouldnt it be only true if `this.props.maxNumberOfFiles` is defined ?
 -------------------
-@@ -681,6 +681,8 @@ module.exports = class Tus extends Plugin {
-     this.uppy.on('reset-progress', this.handleResetProgress)
+@@ -496,6 +494,25 @@ module.exports = class ProviderView {
+     }
+   }
  
-     if (this.opts.autoRetry) {
-+      this.uppy.log('[Tus] The `autoRetry` option may be removed in Uppy 2.0. See https://github.com/transloadit/uppy/pull/2347 for alternatives.', 'warning')
-HF Pred: this.log('[Tus] The `autoRetry` option may b
-Fine Tuned Pred: This link is broken right now.
++  listAllFiles (path, files = null) {
++    files = files || []
++    return new Promise((resolve) => {
++      this.provider.list(path).then((res) => {
+## Human Review: should we add a `catch` for this call. Otherwise what will happen when the call to `this.provider.list` fails?
+## HF Pred: `files = files || []`
+## Fine Tuned Pred: `resolve` is unnecessary here, you can just call `this.provider.list(path, files)`
 -------------------
-@@ -142,6 +142,7 @@ module.exports = class DragDrop extends Plugin {
-     const restrictions = this.uppy.opts.restrictions
-     return (
-       <input
-+        id={'uppy-input-' + this.id}
-HF Pred: const input
-Fine Tuned Pred: Does it matter that this is prefixed with `uppy-`?
+@@ -422,7 +422,7 @@ module.exports = class ProviderView {
+   }
+ 
+   handleAuth () {
+-    const authState = btoa(JSON.stringify({ origin: location.origin }))
++    const authState = btoa(JSON.stringify({ origin: location.origin, redirect: 'http://localhost:3' }))
+     const link = `${this.provider.authUrl()}?state=${authState}`
+## Human Review: `http://localhost:3` 🤔 
+## HF Pred: I think this should be `https://localhost:3`
+## Fine Tuned Pred: I think it would be better if we could use `location.protocol` here instead of hardcoding `localhost:3`
 -------------------
-@@ -41,7 +41,7 @@ ar_SA.strings = {
-   encoding: 'التشفير...',
-   enterCorrectUrl: 'خطأ في الرابط: ارجو التأكد من ادخال رابط مباشر للملف',
-   enterUrlToImport: 'ادخل الرابط لاستيراد الملفات',
--  exceedsSize: 'الملف اكبر من الحجم المسموح',
-+  exceedsSize: 'الملف اكبر من الحجم المسموح %{size}',
-HF Pred: e
-Fine Tuned Pred: Can you provide the reason for this change?
+@@ -2,6 +2,14 @@
+ 
+ echo "Preparing for end to end test: copying static HTML and CSS, building JS"
+ rm -rf ./test/endtoend/dist && mkdir ./test/endtoend/dist
+-cp ./dist/uppy.min.css ./test/endtoend/dist 
+-cp ./test/endtoend/src/index.html ./test/endtoend/dist 
++rm -rf ./test/endtoend/node_modules
++
++UPPY_VERSION=$(cat package.json | grep version | head -1 | awk -F= "{ print $2 }" | sed 's/[version:,\",]//g' | tr -d '[[:space:]]')
++# archive the uppy package
++npm pack
+## Human Review: I think we should also run the `npm run prepublishOnly` script first here, so that the package is exactly the same as the published one (I thought this would happen automatically inside `npm pack` but it doesn't seem to)
+## HF Pred: Why do we need this?
+## Fine Tuned Pred: Do we want to keep the old version or is there a way to upgrade to a newer version?
 
-

As we can see, the fine-tuned model produces better predictions than the HF model. The predictions are much more insightful and detailed. The HF model tends to produce more generic predictions, while the fine-tuned model produces predictions that are more specific to the code.

-

Only one thing is weird: the codereviewer model predictions are sometimes cut and not shown in full. This is not due to sentence limits, as the limit is pretty high (512 tokens). Unfortunately, it is up to the future work to figure out the reason behind this.

+

As we can see, the models are pretty mediocre at understanding the issue. But I find that the HF model tends to produce more generic predictions, while the fine-tuned model produces predictions that are more specific to the code and shows a better understanding.

+

Still, both models predict something sensible but struggle to pin-point the problem.

Quantitative Evaluation#

@@ -720,11 +741,11 @@

Quantitative Evaluation -

diff --git a/searchindex.js b/searchindex.js index 11a4099..6685ffb 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["README", "docs/conclusion", "docs/intro", "notebooks/1_collect_reviews", "notebooks/2_inference", "notebooks/3_evaluation"], "filenames": ["README.md", "docs/conclusion.md", "docs/intro.md", "notebooks/1_collect_reviews.ipynb", "notebooks/2_inference.ipynb", "notebooks/3_evaluation.ipynb"], "titles": ["CodeReviewer ML Performance", "<no title>", "CodeReviewer ML Performance", "Collecting Code Review Data", "CodeReviewer Model Inference", "Predictions Evaluation"], "terms": {"thi": [2, 3, 4, 5], "small": 2, "sampl": [2, 5], "book": 2, "give": 2, "you": [2, 3, 4, 5], "feel": 2, "how": [2, 5], "structur": 2, "It": [2, 5], "show": 2, "off": 2, "few": 2, "major": 2, "file": [2, 3, 5], "type": [2, 5], "well": 2, "some": [2, 4], "doe": [2, 5], "go": [2, 5], "depth": 2, "ani": [2, 5], "particular": 2, "topic": 2, "check": [2, 5], "out": [2, 5], "jupyt": [2, 3, 4, 5], "document": 2, "more": [2, 5], "inform": 2, "page": 2, "bundl": 2, "see": [2, 4, 5], "collect": 2, "code": [2, 4, 5], "review": [2, 4], "data": [2, 5], "model": [2, 5], "infer": 2, "predict": 2, "evalu": 2, "p4v": [2, 4], "codebert": 2, "hug": 2, "face": 2, "space": [2, 5], "p4vv37": 2, "url": [2, 5], "http": [2, 4, 5], "huggingfac": 2, "co": [2, 4], "codebert_codereview": 2, "visit": 2, "2023": 2, "09": 2, "13": 2, "llg": [2, 3, 4], "22": [2, 3, 4, 5], "zhiyu": 2, "li": 2, "shuai": 2, "lu": 2, "daya": 2, "guo": 2, "nan": [2, 5], "duan": 2, "shailesh": 2, "jannu": 2, "grant": 2, "jenk": 2, "deep": 2, "majumd": 2, "jare": 2, "green": 2, "alexei": 2, "svyatkovskii": 2, "shengyu": 2, "fu": 2, "other": [2, 3, 5], "pre": 2, "train": 2, "autom": 2, "activ": 2, "arxiv": 2, "preprint": 2, "2203": 2, "09095": 2, "2022": 2, "In": [3, 5], "notebook": 3, "we": [3, 4, 5], "from": [3, 4, 5], "github": [3, 5], "us": [3, 4, 5], "pygithub": 3, "librari": 3, "interact": 3, "api": 3, "getpass": 3, "import": [3, 4, 5], "auth": 3, "panda": [3, 4, 5], "pd": [3, 4, 5], "tqdm": [3, 4, 5], "autonotebook": [3, 4, 5], "c": [3, 5], "user": [3, 5], "akovr": 3, "appdata": 3, "local": 3, "temp": 3, "ipykernel_15472": 3, "323726258": 3, "py": [3, 4, 5], "5": [3, 4, 5], "tqdmexperimentalwarn": 3, "mode": 3, "instead": [3, 5], "forc": 3, "consol": 3, "e": [3, 4, 5], "g": [3, 4], "although": 3, "can": [3, 4, 5], "without": 3, "authent": 3, "need": [3, 5], "increas": 3, "rate": 3, "limit": [3, 5], "access": 3, "token": [3, 5], "enter": 3, "below": 3, "If": [3, 5], "do": [3, 5], "run": 3, "60": 3, "request": [3, 5], "per": 3, "hour": [3, 4], "your": 3, "els": [3, 5], "warn": [3, 5], "possibl": 3, "next": 3, "defin": 3, "function": [3, 5], "repositori": 3, "def": [3, 4, 5], "collect_review": 3, "repo_nam": 3, "str": 3, "num_com": 3, "int": 3, "1000": [3, 5], "skip_author": 3, "true": [3, 4, 5], "allow_thread": 3, "fals": [3, 4, 5], "save": 3, "max_length": [3, 4], "512": [3, 4, 5], "crawl": 3, "repo": 3, "param": [3, 5], "name": [3, 5], "format": 3, "owner": [3, 5], "number": 3, "comment": 3, "load": 3, "skip": 3, "made": [3, 4], "author": [3, 4, 5], "pull": [3, 5], "allow": 3, "ar": [3, 4, 5], "repli": 3, "csv": [3, 4, 5], "maximum": 3, "length": [3, 5], "diff": 3, "hunk": 3, "return": [3, 4, 5], "datafram": [3, 4, 5], "column": [3, 5], "diff_hunk": [3, 4], "human_review": [3, 4], "created_at": 3, "count": 3, "set": [3, 5], "get_repo": 3, "comment_pag": 3, "get_pulls_review_com": 3, "iter": 3, "over": 3, "progress_bar": 3, "total": [3, 5], "len": [3, 4, 5], "have": [3, 4, 5], "enough": 3, "stop": 3, "break": [3, 5], "alreadi": 3, "continu": [3, 5], "too": 3, "long": [3, 4], "get": [3, 5], "commit": 3, "commit_author": 3, "get_git_commit": 3, "commit_id": 3, "add": [3, 4, 5], "along": 3, "ground": 3, "truth": 3, "append": [3, 4], "bodi": [3, 5], "updat": [3, 4, 5], "1": [3, 5], "df": [3, 4, 5], "remov": [3, 5], "keep": 3, "first": [3, 5], "loc": 3, "groupbi": 3, "idxmin": 3, "to_csv": [3, 4], "f": [3, 5], "replac": [3, 5], "_": [3, 5], "final": [3, 5], "follow": [3, 4], "microsoft": [3, 4], "vscode": [3, 5], "jetbrain": 3, "kotlin": [3, 5], "transloadit": [3, 5], "uppi": [3, 5], "i": [3, 4, 5], "chosen": 3, "becaus": [3, 5], "thei": 3, "popular": 3, "larg": 3, "The": [3, 4, 5], "also": [3, 5], "similar": [3, 5], "criteria": 3, "select": 3, "studi": 3, "folder": 3, "addition": 3, "test": [3, 4, 5], "dataset": [3, 5], "zenodo": 3, "avail": [3, 4], "msg": [3, 4, 5], "let": [4, 5], "s": [4, 5], "gener": [4, 5], "pathlib": [4, 5], "path": [4, 5], "numpi": [4, 5], "np": [4, 5], "torch": 4, "util": [4, 5], "dataload": 4, "transform": 4, "autotoken": 4, "automodelforseq2seqlm": 4, "opt": [4, 5], "conda": [4, 5], "lib": [4, 5], "python3": [4, 5], "8": [4, 5], "site": [4, 5], "packag": [4, 5], "auto": [4, 5], "tqdmwarn": [4, 5], "iprogress": [4, 5], "found": [4, 5], "pleas": [4, 5], "ipywidget": [4, 5], "readthedoc": [4, 5], "io": [4, 5], "en": [4, 5], "stabl": [4, 5], "user_instal": [4, 5], "html": [4, 5], "notebook_tqdm": [4, 5], "p": [4, 5], "enorm": 4, "thank": 4, "provid": [4, 5], "open": 4, "sourc": [4, 5], "work": [4, 5], "download": 4, "from_pretrain": 4, "requir": [4, 5], "special": 4, "process_token": 4, "okenizer_config": 4, "json": 4, "100": 4, "29k": 4, "00": 4, "169kb": 4, "olv": 4, "main": 4, "vocab": 4, "575k": 4, "89mb": 4, "merg": 4, "txt": 4, "294k": 4, "09mb": 4, "added_token": 4, "87k": 4, "13mb": 4, "cial_tokens_map": 4, "913": 4, "515kb": 4, "class": [4, 5], "reviewsdataset": 4, "__init__": 4, "self": 4, "y": 4, "x": [4, 5], "tensor": 4, "appli": 4, "lambda": 4, "row": 4, "encode_diff": 4, "axi": 4, "dtype": 4, "cpu": 4, "__len__": 4, "__getitem__": 4, "idx": 4, "here": [4, 5], "creat": 4, "each": [4, 5], "project": 4, "filenam": 4, "jetbrains_kotlin_1000": [4, 5], "microsoft_vscode_1000": [4, 5], "transloadit_uppy_1000": [4, 5], "read_csv": [4, 5], "batch_siz": 4, "16": [4, 5], "shuffl": 4, "6": [4, 5], "8gb": 4, "gpu": 4, "now": [4, 5], "two": 4, "devic": 4, "cuda": 4, "eval": 4, "result": 4, "inputs_mask": 4, "ne": 4, "pad_id": 4, "pred": [4, 5], "attention_mask": 4, "use_cach": 4, "num_beam": 4, "early_stop": 4, "num_return_sequ": 4, "decod": 4, "preds_np": 4, "detach": 4, "preds_decod": 4, "apply_along_axi": 4, "skip_special_token": 4, "clean_up_tokenization_spac": 4, "list": [4, 5], "hub": 4, "pretrain": 4, "hf_model": 4, "zip": 4, "df_pred": 4, "target": [4, 5], "with_suffix": [4, 5], "hf_pred": [4, 5], "head": 4, "lve": 4, "config": 4, "13k": 4, "478kb": 4, "pytorch_model": 4, "bin": 4, "892m": 4, "02": 4, "306mb": 4, "neration_config": 4, "168": 4, "36": 4, "7kb": 4, "636": 4, "11": 4, "08": 4, "05": 4, "63": 4, "03": 4, "31": 4, "35": 4, "01": 4, "57": 4, "87": 4, "39": [4, 5], "53": 4, "task": [4, 5], "instruct": 4, "For": [4, 5], "paramet": 4, "learning_r": 4, "3e": 4, "4": [4, 5], "max_source_length": 4, "took": 4, "about": 4, "12": [4, 5], "singl": 4, "nvidia": 4, "geforc": 4, "a100": 4, "wa": 4, "epoch": 4, "waleko": 4, "finetun": 4, "ft_model": 4, "finetuned_pr": [4, 5], "290kb": 4, "4mb": 4, "weight": 4, "were": 4, "when": 4, "initi": 4, "t5forconditionalgener": 4, "cls_head": 4, "bia": 4, "IS": 4, "expect": 4, "anoth": 4, "architectur": 4, "bertforsequenceclassif": 4, "bertforpretrain": 4, "NOT": 4, "exactli": 4, "ident": 4, "49": 4, "2kb": 4, "15": 4, "58": 4, "51": 4, "41": [4, 5], "61": 4, "32": 4, "47": 4, "27": 4, "smooth_bleu": 5, "bleu_fromstr": 5, "analyze_pr": 5, "base_fil": 5, "sample_s": 5, "read": 5, "hf_preds_fil": 5, "fine_tuned_fil": 5, "fine_tun": 5, "put": 5, "fine_tuned_pr": 5, "regex": 5, "print": 5, "to_numpi": 5, "hf": 5, "fine": 5, "tune": 5, "calc_bleu": 5, "ref": 5, "rang": 5, "char": 5, "join": 5, "split": 5, "rmstop": 5, "calc_bleu_scor": 5, "ft_pred": 5, "inplac": 5, "hf_bleu": 5, "ft_bleu": 5, "bleu": 5, "compar": 5, "four": 5, "20": 5, "10": 5, "pylint": 5, "checker": 5, "interfac": 5, "_is_constant_empty_str": 5, "node": 5, "isinst": 5, "const": 5, "valu": 5, "comparetoemptystringcheck": 5, "basecheck": 5, "comparison": 5, "empti": 5, "string": 5, "most": 5, "time": 5, "should": 5, "fact": 5, "why": 5, "helper": 5, "isn": 5, "t": 5, "anywher": 5, "place": 5, "14": 5, "7": 5, "net": 5, "sourceforg": 5, "pmd": 5, "ruleviol": 5, "A": 5, "link": 5, "implement": 5, "immut": 5, "therefor": 5, "cach": 5, "friendli": 5, "public": 5, "cachedruleviol": 5, "privat": 5, "cachedrulemapp": 5, "mapper": 5, "did": 5, "modif": 5, "225": 5, "18": 5, "bokehplot": 5, "dimensionedplot": 5, "converted_data": 5, "stream": 5, "determin": 5, "cd": 5, "full": 5, "simpli": 5, "done": 5, "whether": 5, "newli": 5, "ad": 5, "same": 5, "new_length": 5, "v": 5, "ndarrai": 5, "not_upd": 5, "k": 5, "new": 5, "n": 5, "0": 5, "don": 5, "think": 5, "right": 5, "wai": 5, "o": 5, "eric89gxl": 5, "mluessi": 5, "128": 5, "restrequest": 5, "method": 5, "post": 5, "delet": 5, "etc": 5, "uri": 5, "automat": 5, "resolv": 5, "against": 5, "current": 5, "instanc": 5, "host": 5, "httpentiti": 5, "one": 5, "null": 5, "requestent": 5, "restmethod": 5, "chang": 5, "37": 5, "modul": 5, "selenium": 5, "bridge_class": 5, "bridg": 5, "end": 5, "print_pag": 5, "option": 5, "page_rang": 5, "arrai": 5, "driver": 5, "firefox": 5, "webdriv": 5, "extra": 5, "line": 5, "de": 5, "2": 5, "indent": 5, "an": 5, "expr": 5, "1127": 5, "1134": 5, "registri": 5, "eslint": 5, "stylish": 5, "applyto": 5, "applytokind": 5, "alldocu": 5, "fileloc": 5, "filelocationkind": 5, "absolut": 5, "pattern": 5, "defaultpattern": 5, "No": 5, "newlin": 5, "typescript": 5, "rel": 5, "fileprefix": 5, "cwd": 5, "am": 5, "sure": 5, "good": 5, "idea": 5, "make": 5, "me": 5, "wonder": 5, "shoul": 5, "46": 5, "xml": 5, "version": 5, "encod": 5, "utf": 5, "compon": 5, "desktop": 5, "id": 5, "metadata_licens": 5, "cc": 5, "BY": 5, "sa": 5, "3": 5, "project_licens": 5, "mit": 5, "name_long": 5, "homepag": 5, "visualstudio": 5, "com": 5, "summari": 5, "editor": 5, "develop": 5, "support": 5, "integr": 5, "exist": 5, "tool": 5, "descript": 5, "visual": 5, "studio": 5, "lightweight": 5, "power": 5, "which": 5, "na": 5, "onli": 5, "171": 5, "export": 5, "distinct": 5, "keyfn": 5, "fn": 5, "item": 5, "boolean": 5, "notfoundvalu": 5, "element": 5, "wouldn": 5, "clearer": 5, "iworkspacecontextservic": 5, "vs": 5, "platform": 5, "workspac": 5, "common": 5, "iworkbenchcontribut": 5, "iworkbenchcontributionsregistri": 5, "extens": 5, "workbenchextens": 5, "workbench": 5, "contribut": 5, "iworkbenchcontrib": 5, "somewher": 5, "74": 5, "inexteditorgroupsaccessor": 5, "getgroup": 5, "identifi": 5, "groupidentifi": 5, "inexteditorgroupview": 5, "addgroup": 5, "locat": 5, "direct": 5, "groupdirect": 5, "iaddgroupopt": 5, "inexteditorgroup": 5, "removegroup": 5, "void": 5, "178": 5, "181": 5, "abstract": 5, "abstractdebugtest": 5, "codegentestcas": 5, "var": 5, "inboxmethod": 5, "vmloop": 5, "while": 5, "val": 5, "eventset": 5, "virtualmachin": 5, "eventqueu": 5, "fix": 5, "bug": 5, "describ": 5, "th": 5, "kara": 5, "intern": 5, "typo": 5, "descr": 5, "word": 5, "somth": 5, "fun": 5, "wrte": 5, "childen": 5, "unused_vari": 5, "variabl": 5, "come": 5, "never": 5, "children": 5, "ignore_backend": 5, "jvm": 5, "kotlin_configuration_flag": 5, "assertions_mod": 5, "outer": 5, "inner": 5, "assert": 5, "statu": 5, "base": 5, "top": 5, "level": 5, "ldc": 5, "louter": 5, "clinit": 5, "invokevirtu": 5, "java": 5, "lang": 5, "desiredassertionstatu": 5, "z": 5, "putstat": 5, "assertionsdis": 5, "getstat": 5, "Is": 5, "reason": 5, "doesn": 5, "src": 5, "247": 5, "245": 5, "indiceshandl": 5, "protect": 5, "context": 5, "commonbackendcont": 5, "arrayindiceshandl": 5, "commonbackendcontext": 5, "arraybackend": 5, "17": 5, "diagnost": 5, "unused_paramet": 5, "overloads_priv": 5, "overload": 5, "foo": 5, "ok": 5, "d": 5, "object": 5, "shouldn": 5, "expecterror": 5, "expecttyp": 5, "tsd": 5, "defaultstor": 5, "store": 5, "default": 5, "uiplugin": 5, "successrespons": 5, "uppyfil": 5, "uploadeduppyfil": 5, "faileduppyfil": 5, "pluginopt": 5, "ex": 5, "what": 5, "1076": 5, "currentprogress": 5, "getfil": 5, "progress": 5, "setfilest": 5, "assign": 5, "postprocess": 5, "postprocessor": 5, "getpostprocessor": 5, "w": 5, "681": 5, "tu": 5, "extend": 5, "plugin": 5, "reset": 5, "handleresetprogress": 5, "autoretri": 5, "log": 5, "mai": 5, "2347": 5, "altern": 5, "b": 5, "broken": 5, "142": 5, "dragdrop": 5, "restrict": 5, "input": 5, "matter": 5, "prefix": 5, "ar_sa": 5, "\u0627\u0644\u062a\u0634\u0641\u064a\u0631": 5, "entercorrecturl": 5, "\u062e\u0637\u0623": 5, "\u0641\u064a": 5, "\u0627\u0644\u0631\u0627\u0628\u0637": 5, "\u0627\u0631\u062c\u0648": 5, "\u0627\u0644\u062a\u0623\u0643\u062f": 5, "\u0645\u0646": 5, "\u0627\u062f\u062e\u0627\u0644": 5, "\u0631\u0627\u0628\u0637": 5, "\u0645\u0628\u0627\u0634\u0631": 5, "\u0644\u0644\u0645\u0644\u0641": 5, "enterurltoimport": 5, "\u0627\u062f\u062e\u0644": 5, "\u0644\u0627\u0633\u062a\u064a\u0631\u0627\u062f": 5, "\u0627\u0644\u0645\u0644\u0641\u0627\u062a": 5, "exceedss": 5, "\u0627\u0644\u0645\u0644\u0641": 5, "\u0627\u0643\u0628\u0631": 5, "\u0627\u0644\u062d\u062c\u0645": 5, "\u0627\u0644\u0645\u0633\u0645\u0648\u062d": 5, "size": 5, "As": 5, "produc": 5, "better": 5, "than": 5, "much": 5, "insight": 5, "detail": 5, "tend": 5, "specif": 5, "thing": 5, "weird": 5, "codereview": 5, "sometim": 5, "cut": 5, "shown": 5, "due": 5, "sentenc": 5, "pretti": 5, "high": 5, "unfortun": 5, "up": 5, "futur": 5, "figur": 5, "behind": 5, "calcul": 5, "score": 5, "measur": 5, "higher": 5, "10169": 5, "25": 5, "71": 5, "91": 5, "84": 5, "perform": 5, "all": 5, "nevertheless": 5, "still": 5, "low": 5, "mean": 5, "hard": 5, "semant": 5}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"codereview": [0, 2, 4], "ml": [0, 2], "perform": [0, 2], "tabl": 2, "content": 2, "bibliographi": 2, "collect": 3, "code": 3, "review": 3, "data": [3, 4], "model": 4, "infer": 4, "1": 4, "token": 4, "dataset": 4, "2": 4, "load": 4, "3": 4, "predict": [4, 5], "function": 4, "huggingfac": 4, "pre": 4, "train": 4, "checkpoint": 4, "fine": 4, "tune": 4, "evalu": 5, "qualit": 5, "quantit": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["README", "docs/conclusion", "docs/intro", "notebooks/1_collect_reviews", "notebooks/2_inference", "notebooks/3_evaluation"], "filenames": ["README.md", "docs/conclusion.md", "docs/intro.md", "notebooks/1_collect_reviews.ipynb", "notebooks/2_inference.ipynb", "notebooks/3_evaluation.ipynb"], "titles": ["CodeReviewer ML Performance", "<no title>", "CodeReviewer ML Performance", "Collecting Code Review Data", "CodeReviewer Model Inference", "Predictions Evaluation"], "terms": {"thi": [2, 3, 4, 5], "small": 2, "sampl": [2, 5], "book": 2, "give": 2, "you": [2, 3, 4, 5], "feel": [2, 5], "how": [2, 5], "structur": 2, "It": [2, 5], "show": [2, 5], "off": 2, "few": 2, "major": 2, "file": [2, 3, 5], "type": [2, 5], "well": 2, "some": [2, 4, 5], "doe": [2, 5], "go": 2, "depth": 2, "ani": [2, 5], "particular": 2, "topic": 2, "check": 2, "out": 2, "jupyt": [2, 3], "document": 2, "more": [2, 5], "inform": [2, 5], "page": 2, "bundl": 2, "see": [2, 5], "collect": 2, "code": [2, 4, 5], "review": [2, 4, 5], "data": [2, 5], "model": [2, 5], "infer": 2, "predict": 2, "evalu": 2, "p4v": [2, 4], "codebert": 2, "hug": 2, "face": 2, "space": [2, 5], "p4vv37": 2, "url": 2, "http": [2, 4, 5], "huggingfac": 2, "co": [2, 4], "codebert_codereview": 2, "visit": 2, "2023": 2, "09": 2, "13": [2, 5], "llg": [2, 3, 4, 5], "22": [2, 3, 4, 5], "zhiyu": 2, "li": 2, "shuai": 2, "lu": 2, "daya": 2, "guo": 2, "nan": [2, 5], "duan": 2, "shailesh": 2, "jannu": 2, "grant": 2, "jenk": 2, "deep": 2, "majumd": 2, "jare": 2, "green": 2, "alexei": 2, "svyatkovskii": 2, "shengyu": 2, "fu": 2, "other": [2, 3, 5], "pre": 2, "train": 2, "autom": 2, "activ": 2, "arxiv": 2, "preprint": 2, "2203": 2, "09095": 2, "2022": 2, "In": [3, 5], "notebook": 3, "we": [3, 4, 5], "from": [3, 4, 5], "github": [3, 5], "us": [3, 4, 5], "pygithub": 3, "librari": 3, "interact": 3, "api": 3, "getpass": 3, "import": [3, 4, 5], "auth": 3, "panda": [3, 4, 5], "pd": [3, 4, 5], "tqdm": [3, 4], "autonotebook": [3, 4], "c": [3, 5], "user": [3, 5], "akovr": 3, "appdata": 3, "local": 3, "temp": 3, "ipykernel_15472": 3, "323726258": 3, "py": 3, "5": [3, 4, 5], "tqdmexperimentalwarn": 3, "mode": 3, "instead": [3, 5], "forc": 3, "consol": 3, "e": [3, 4], "g": [3, 4, 5], "although": 3, "can": [3, 4, 5], "without": 3, "authent": [3, 5], "need": [3, 5], "increas": 3, "rate": 3, "limit": 3, "access": [3, 5], "token": 3, "enter": 3, "below": [3, 5], "If": 3, "do": [3, 5], "run": [3, 5], "60": 3, "request": [3, 5], "per": 3, "hour": [3, 4], "your": 3, "els": 3, "warn": 3, "possibl": 3, "next": 3, "defin": [3, 5], "function": [3, 5], "repositori": 3, "def": [3, 4, 5], "collect_review": 3, "repo_nam": 3, "str": 3, "num_com": 3, "int": 3, "1000": [3, 5], "skip_author": 3, "true": [3, 4, 5], "allow_thread": 3, "fals": [3, 4, 5], "save": 3, "max_length": [3, 4], "512": [3, 4], "crawl": 3, "repo": 3, "param": 3, "name": [3, 5], "format": 3, "owner": 3, "number": [3, 5], "comment": [3, 5], "load": 3, "skip": 3, "made": [3, 4], "author": [3, 4, 5], "pull": 3, "allow": [3, 5], "ar": [3, 4, 5], "repli": 3, "csv": [3, 4, 5], "maximum": 3, "length": 3, "diff": 3, "hunk": 3, "return": [3, 4, 5], "datafram": [3, 4, 5], "column": 3, "diff_hunk": [3, 4], "human_review": [3, 4], "created_at": 3, "count": 3, "set": [3, 5], "get_repo": 3, "comment_pag": 3, "get_pulls_review_com": 3, "iter": 3, "over": 3, "progress_bar": 3, "total": [3, 5], "len": [3, 4, 5], "have": [3, 4], "enough": 3, "stop": 3, "break": 3, "alreadi": 3, "continu": 3, "too": 3, "long": [3, 4], "get": [3, 5], "commit": 3, "commit_author": 3, "get_git_commit": 3, "commit_id": 3, "add": [3, 4, 5], "along": 3, "ground": 3, "truth": 3, "append": [3, 4], "bodi": [3, 5], "updat": [3, 5], "1": [3, 5], "df": [3, 4, 5], "remov": [3, 5], "keep": [3, 5], "first": [3, 5], "loc": 3, "groupbi": 3, "idxmin": 3, "to_csv": [3, 4], "f": [3, 5], "replac": [3, 5], "_": [3, 5], "final": [3, 5], "follow": [3, 4, 5], "microsoft": [3, 4], "vscode": [3, 5], "jetbrain": [3, 5], "kotlin": [3, 5], "transloadit": 3, "uppi": [3, 5], "i": [3, 4, 5], "chosen": 3, "becaus": 3, "thei": [3, 5], "popular": 3, "larg": 3, "The": [3, 4, 5], "also": [3, 5], "similar": [3, 5], "criteria": 3, "select": 3, "studi": 3, "folder": 3, "addition": 3, "test": [3, 4, 5], "dataset": [3, 5], "zenodo": 3, "avail": [3, 4], "msg": [3, 4, 5], "let": 4, "s": [4, 5], "gener": [4, 5], "pathlib": [4, 5], "path": [4, 5], "numpi": [4, 5], "np": [4, 5], "torch": 4, "util": [4, 5], "dataload": 4, "transform": 4, "autotoken": 4, "automodelforseq2seqlm": 4, "p": 4, "enorm": 4, "thank": 4, "provid": [4, 5], "open": 4, "sourc": 4, "work": [4, 5], "download": 4, "from_pretrain": 4, "requir": [4, 5], "special": 4, "process_token": 4, "class": [4, 5], "reviewsdataset": 4, "__init__": 4, "self": 4, "y": 4, "x": 4, "tensor": 4, "appli": 4, "lambda": 4, "row": 4, "encode_diff": 4, "axi": 4, "dtype": 4, "cpu": 4, "__len__": 4, "__getitem__": 4, "idx": 4, "here": [4, 5], "creat": 4, "each": [4, 5], "project": [4, 5], "filenam": 4, "jetbrains_kotlin_1000": [4, 5], "microsoft_vscode_1000": [4, 5], "transloadit_uppy_1000": [4, 5], "read_csv": [4, 5], "batch_siz": 4, "16": [4, 5], "shuffl": 4, "6": [4, 5], "8gb": 4, "gpu": 4, "now": [4, 5], "two": 4, "devic": 4, "cuda": 4, "eval": 4, "result": [4, 5], "inputs_mask": 4, "ne": 4, "pad_id": 4, "pred": [4, 5], "attention_mask": 4, "use_cach": 4, "num_beam": 4, "early_stop": 4, "num_return_sequ": 4, "decod": 4, "preds_np": 4, "detach": 4, "preds_decod": 4, "skip_special_token": 4, "clean_up_tokenization_spac": 4, "hub": 4, "pretrain": 4, "hf_model": 4, "zip": [4, 5], "df_pred": 4, "target": [4, 5], "with_suffix": [4, 5], "hf_pred": [4, 5], "head": [4, 5], "100": 4, "636": 4, "11": [4, 5], "27": 4, "00": 4, "08": 4, "63": 4, "03": 4, "37": [4, 5], "45": [4, 5], "02": [4, 5], "01": 4, "93": 4, "46": [4, 5], "64": 4, "task": [4, 5], "instruct": 4, "For": [4, 5], "paramet": 4, "learning_r": 4, "3e": 4, "4": [4, 5], "max_source_length": 4, "took": 4, "about": [4, 5], "12": 4, "singl": 4, "nvidia": 4, "geforc": 4, "a100": 4, "wa": [4, 5], "epoch": 4, "waleko": 4, "finetun": 4, "ft_model": 4, "finetuned_pr": [4, 5], "weight": 4, "were": 4, "when": [4, 5], "initi": 4, "t5forconditionalgener": 4, "cls_head": 4, "bia": 4, "IS": 4, "expect": [4, 5], "anoth": 4, "architectur": 4, "bertforsequenceclassif": 4, "bertforpretrain": 4, "NOT": 4, "exactli": [4, 5], "ident": 4, "15": 4, "51": 4, "50": 4, "40": 4, "59": 4, "32": [4, 5], "48": 4, "26": 4, "38": 4, "smooth_bleu": 5, "bleu_fromstr": 5, "analyze_pr": 5, "base_fil": 5, "sample_s": 5, "read": 5, "hf_preds_fil": 5, "fine_tuned_fil": 5, "fine_tun": 5, "put": 5, "fine_tuned_pr": 5, "regex": 5, "print": 5, "random_st": 5, "42": 5, "to_numpi": 5, "human": 5, "hf": 5, "fine": 5, "tune": 5, "calc_bleu": 5, "ref": 5, "list": 5, "rang": 5, "char": 5, "join": 5, "split": 5, "rmstop": 5, "calc_bleu_scor": 5, "ft_pred": 5, "inplac": 5, "hf_bleu": 5, "ft_bleu": 5, "bleu": 5, "compar": 5, "four": 5, "7": 5, "frozen_string_liter": 5, "hocon": 5, "bolt": 5, "error": 5, "transportconfig": 5, "attr_accessor": 5, "host": 5, "port": 5, "ssl_cert": 5, "ssl_kei": 5, "ssl_ca_cert": 5, "ssl_cipher_suit": 5, "look": 5, "like": 5, "isn": 5, "t": 5, "why": 5, "am": 5, "surpris": 5, "don": 5, "92": 5, "public": 5, "oauth2authorizedclientargumentresolv": 5, "implement": 5, "handlermeth": 5, "clientregistrationid": 5, "flatmap": 5, "id": 5, "mono": 5, "new": 5, "illegalstateexcept": 5, "unabl": 5, "resolv": 5, "client": 5, "registr": 5, "identifi": 5, "an": 5, "unauthent": 5, "session": 5, "To": 5, "ensur": 5, "serverhttpsecur": 5, "anonym": 5, "configur": 5, "string": 5, "registrationid": 5, "gett1": 5, "usernam": 5, "gett2": 5, "think": 5, "should": 5, "chang": 5, "sinc": 5, "reactiv": 5, "side": 5, "support": 5, "messag": 5, "clear": 5, "me": 5, "3": 5, "const": 5, "share": 5, "assert": 5, "setupdatabas": 5, "script": 5, "vm": 5, "chai": 5, "normalizedfunctionstr": 5, "bson": 5, "lib": 5, "parser": 5, "buffer": 5, "safe": 5, "free": 5, "edit": 5, "section": 5, "introduc": 5, "modern": 5, "featur": 5, "object": 5, "destructur": 5, "what": 5, "whitespac": 5, "0": 5, "17": 5, "php": 5, "copyright": 5, "bold": 5, "brand": 5, "commerc": 5, "sp": 5, "z": 5, "o": 5, "all": 5, "right": 5, "reserv": 5, "licens": 5, "txt": 5, "detail": 5, "declar": 5, "strict_typ": 5, "namespac": 5, "ergonod": 5, "core": 5, "infrastructur": 5, "except": 5, "serializationexcept": 5, "extend": 5, "serializerexcept": 5, "__construct": 5, "throwabl": 5, "previou": 5, "null": 5, "parent": 5, "been": 5, "sharedkernel": 5, "modul": 5, "miss": 5, "header": 5, "separ": 5, "seem": 5, "veri": 5, "specif": 5, "elgg": 5, "473": 5, "describ": 5, "gridf": 5, "stream": 5, "fail": 5, "tri": 5, "abort": 5, "uploadstream": 5, "tostr": 5, "equal": 5, "todo": 5, "node": 5, "3405": 5, "mongostreamclosederror": 5, "mongodrivererror": 5, "cannot": 5, "call": 5, "twice": 5, "close": 5, "done": 5, "ve": 5, "depend": 5, "Is": 5, "still": 5, "relev": 5, "problem": 5, "340": 5, "341": 5, "export": 5, "debugeditorcontribut": 5, "idebugeditorcontribut": 5, "runonceschedul": 5, "hoverwidget": 5, "hide": 5, "hover_delai": 5, "memoiz": 5, "privat": 5, "providenondebughoverschedul": 5, "where": 5, "dispos": 5, "runnabl": 5, "53": 5, "sasspars": 5, "cssparser": 5, "sass": 5, "variabl": 5, "font": 5, "size": 5, "12px": 5, "_parsevariabledeclar": 5, "panic": 5, "scanner": 5, "tokentyp": 5, "variabledeclar": 5, "var": 5, "cssvariabledeclar": 5, "super": 5, "_parsecssvariabledeclar": 5, "That": 5, "wrong": 5, "Not": 5, "place": 5, "suit": 5, "css": 5, "nit": 5, "after": 5, "minor": 5, "style": 5, "issu": 5, "befor": 5, "537": 5, "23": 5, "interfac": 5, "completionlist": 5, "item": 5, "completionitem": 5, "contain": 5, "addit": 5, "context": 5, "which": 5, "complet": 5, "completionitemprovid": 5, "providecompletionitem": 5, "trigger": 5, "completioncontext": 5, "manual": 5, "invoc": 5, "24x7": 5, "intellisens": 5, "incomplet": 5, "just": 5, "reason": 5, "439": 5, "441": 5, "debugservic": 5, "debug": 5, "idebugservic": 5, "debounceddisplaythread": 5, "rawdebugsess": 5, "timer": 5, "displaythreadstim": 5, "getid": 5, "cleartimeout": 5, "settimeout": 5, "fetchthread": 5, "undefin": 5, "onunexpectederror": 5, "make": 5, "m": 5, "sure": 5, "best": 5, "wai": 5, "would": 5, "better": 5, "differ": 5, "between": 5, "265": 5, "266": 5, "33": 5, "mddocumentcontentprovid": 5, "textdocumentcontentprovid": 5, "300": 5, "documentheadingsprovid": 5, "documentsymbolprovid": 5, "daringfirebal": 5, "net": 5, "markdown": 5, "syntax": 5, "static": 5, "_atxpattern": 5, "_settext": 5, "pick": 5, "up": 5, "_atxregex": 5, "directli": 5, "symbolprovid": 5, "24": 5, "10": 5, "abstract": 5, "abstractirlinenumbertest": 5, "abstractlinenumbertest": 5, "overrid": 5, "fun": 5, "comparecustom": 5, "psifil": 5, "ktfile": 5, "wholefil": 5, "val": 5, "filetext": 5, "text": 5, "expectedlinenumb": 5, "normal": 5, "substr": 5, "indexof": 5, "2": 5, "trim": 5, "map": 5, "tomutablelist": 5, "n": 5, "toregex": 5, "filter": 5, "line": 5, "simplifi": 5, "d": 5, "find": 5, "start": 5, "Then": 5, "digit": 5, "exclud": 5, "consist": 5, "old": 5, "backend": 5, "did": 5, "org": 5, "kotlintyp": 5, "typeutil": 5, "isprimitivenumbertyp": 5, "makenotnul": 5, "makenul": 5, "java": 5, "lang": 5, "assertionerror": 5, "pl": 5, "pr": 5, "kotlinconst": 5, "abstractchecklocalvariablestabletest": 5, "codegentestcas": 5, "assertnotnul": 5, "couldn": 5, "pattern": 5, "classfileregex": 5, "pathsstr": 5, "outputfil": 5, "actuallocalvari": 5, "readlocalvari": 5, "classread": 5, "asbytearrai": 5, "methodnam": 5, "checklocalvariabletyp": 5, "could": 5, "reus": 5, "reader": 5, "assertequ": 5, "expectedlocalvari": 5, "purpos": 5, "ignorecas": 5, "boolean": 5, "charact": 5, "unicod": 5, "surrog": 5, "unit": 5, "issurrog": 5, "min_surrog": 5, "max_surrog": 5, "minimum": 5, "valu": 5, "href": 5, "doc": 5, "oracl": 5, "com": 5, "javas": 5, "tutori": 5, "nutsandbolt": 5, "datatyp": 5, "html": 5, "link": 5, "tag": 5, "anywai": 5, "pleas": 5, "175": 5, "195": 5, "9": 5, "expressioncodegen": 5, "visitblockbodi": 5, "irblockbodi": 5, "blockinfo": 5, "stackvalu": 5, "statement": 5, "fold": 5, "none": 5, "exp": 5, "accept": 5, "irexpress": 5, "markendofstatementifneed": 5, "unrel": 5, "doesn": 5, "blockinforesolv": 5, "28": 5, "thumbnail": 5, "29": 5, "classnam": 5, "cuid": 5, "drag": 5, "drop": 5, "uuid": 5, "packag": 5, "actual": 5, "gonna": 5, "suggest": 5, "switch": 5, "ai": 5, "nanoid": 5, "everywher": 5, "smaller": 5, "fix": 5, "version": 5, "49": 5, "8": 5, "tab": 5, "compon": 5, "tabindex": 5, "multipl": 5, "prop": 5, "maxnumberoffil": 5, "same": 5, "shouldnt": 5, "onli": 5, "496": 5, "494": 5, "25": 5, "providerview": 5, "listallfil": 5, "promis": 5, "re": 5, "catch": 5, "otherwis": 5, "happen": 5, "unnecessari": 5, "422": 5, "handleauth": 5, "authstat": 5, "btoa": 5, "json": 5, "stringifi": 5, "origin": 5, "locat": 5, "redirect": 5, "localhost": 5, "authurl": 5, "state": 5, "protocol": 5, "hardcod": 5, "14": 5, "echo": 5, "prepar": 5, "end": 5, "copi": 5, "build": 5, "js": 5, "rm": 5, "rf": 5, "endtoend": 5, "dist": 5, "mkdir": 5, "cp": 5, "min": 5, "src": 5, "index": 5, "node_modul": 5, "uppy_vers": 5, "cat": 5, "grep": 5, "awk": 5, "sed": 5, "tr": 5, "archiv": 5, "npm": 5, "pack": 5, "prepublishonli": 5, "so": 5, "publish": 5, "one": 5, "thought": 5, "automat": 5, "insid": 5, "want": 5, "upgrad": 5, "newer": 5, "As": 5, "pretti": 5, "mediocr": 5, "understand": 5, "But": 5, "tend": 5, "produc": 5, "while": 5, "both": 5, "someth": 5, "sensibl": 5, "struggl": 5, "pin": 5, "point": 5, "calcul": 5, "score": 5, "measur": 5, "higher": 5, "10169": 5, "34": 5, "39": 5, "55": 5, "84": 5, "perform": 5, "slightli": 5, "than": 5, "nevertheless": 5, "low": 5, "hard": 5}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"codereview": [0, 2, 4], "ml": [0, 2], "perform": [0, 2], "tabl": 2, "content": 2, "bibliographi": 2, "collect": 3, "code": 3, "review": 3, "data": [3, 4], "model": 4, "infer": 4, "1": 4, "token": 4, "dataset": 4, "2": 4, "load": 4, "3": 4, "predict": [4, 5], "function": 4, "huggingfac": 4, "pre": 4, "train": 4, "checkpoint": 4, "fine": 4, "tune": 4, "evalu": 5, "qualit": 5, "quantit": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file