Skip to content

Commit

Permalink
run predictions on fixed datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
waleko committed Sep 16, 2023
1 parent 36af64c commit 052946f
Show file tree
Hide file tree
Showing 10 changed files with 38,057 additions and 212,022 deletions.
65,397 changes: 6,334 additions & 59,063 deletions data/JetBrains_kotlin_1000.finetuned_pred.csv

Large diffs are not rendered by default.

65,389 changes: 6,330 additions & 59,059 deletions data/JetBrains_kotlin_1000.hf_pred.csv

Large diffs are not rendered by default.

30,537 changes: 7,235 additions & 23,302 deletions data/microsoft_vscode_1000.finetuned_pred.csv

Large diffs are not rendered by default.

30,557 changes: 7,245 additions & 23,312 deletions data/microsoft_vscode_1000.hf_pred.csv

Large diffs are not rendered by default.

246 changes: 123 additions & 123 deletions data/msg-test.finetuned_pred.csv

Large diffs are not rendered by default.

84 changes: 42 additions & 42 deletions data/msg-test.hf_pred.csv

Large diffs are not rendered by default.

28,437 changes: 5,015 additions & 23,422 deletions data/transloadit_uppy_1000.finetuned_pred.csv

Large diffs are not rendered by default.

28,783 changes: 5,188 additions & 23,595 deletions data/transloadit_uppy_1000.hf_pred.csv

Large diffs are not rendered by default.

74 changes: 63 additions & 11 deletions notebooks/2_inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "initial_id",
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"from pathlib import Path\n",
"\n",
Expand Down Expand Up @@ -49,12 +58,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "ad4d16d13804be69",
"metadata": {
"collapsed": false
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading (…)okenizer_config.json: 100%|██████████| 1.29k/1.29k [00:00<00:00, 169kB/s]\n",
"Downloading (…)olve/main/vocab.json: 100%|██████████| 575k/575k [00:00<00:00, 5.89MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100%|██████████| 294k/294k [00:00<00:00, 1.09MB/s]\n",
"Downloading (…)in/added_tokens.json: 100%|██████████| 1.87k/1.87k [00:00<00:00, 1.13MB/s]\n",
"Downloading (…)cial_tokens_map.json: 100%|██████████| 913/913 [00:00<00:00, 515kB/s]\n"
]
}
],
"source": [
"# download tokenizer from huggingface\n",
"tokenizer = AutoTokenizer.from_pretrained(\"microsoft/codereviewer\")\n",
Expand All @@ -65,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "cb003d6d8f578da1",
"metadata": {
"collapsed": false
Expand Down Expand Up @@ -99,7 +120,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "d06f51b2150c61c4",
"metadata": {
"collapsed": false
Expand Down Expand Up @@ -146,7 +167,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "7a5b97449733bbc6",
"metadata": {
"collapsed": false
Expand Down Expand Up @@ -194,12 +215,26 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "c508661efcdcad40",
"metadata": {
"collapsed": false
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 478kB/s]\n",
"Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:02<00:00, 306MB/s] \n",
"Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 36.7kB/s]\n",
"100%|██████████| 636/636 [11:08<00:00, 1.05s/it]\n",
"100%|██████████| 63/63 [03:31<00:00, 3.35s/it]\n",
"100%|██████████| 63/63 [01:57<00:00, 1.87s/it]\n",
"100%|██████████| 63/63 [02:39<00:00, 2.53s/it]\n"
]
}
],
"source": [
"# download the pretrained model from huggingface\n",
"hf_model = AutoModelForSeq2SeqLM.from_pretrained(\"microsoft/codereviewer\")\n",
Expand Down Expand Up @@ -235,12 +270,29 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "851255e54c49484a",
"metadata": {
"collapsed": false
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 290kB/s]\n",
"Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:22<00:00, 39.4MB/s] \n",
"Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.bias', 'cls_head.weight']\n",
"- This IS expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 49.2kB/s]\n",
"100%|██████████| 636/636 [15:58<00:00, 1.51s/it]\n",
"100%|██████████| 63/63 [01:41<00:00, 1.61s/it]\n",
"100%|██████████| 63/63 [01:32<00:00, 1.47s/it]\n",
"100%|██████████| 63/63 [01:27<00:00, 1.39s/it]\n"
]
}
],
"source": [
"# download the fine-tuned model\n",
"ft_model = AutoModelForSeq2SeqLM.from_pretrained(\"waleko/codereviewer-finetuned-msg\")\n",
Expand Down
Loading

0 comments on commit 052946f

Please sign in to comment.