Skip to content

Commit

Permalink
fix strings lengths bug
Browse files Browse the repository at this point in the history
  • Loading branch information
waleko committed Sep 16, 2023
1 parent 052946f commit b691824
Show file tree
Hide file tree
Showing 10 changed files with 12,399 additions and 12,393 deletions.
942 changes: 471 additions & 471 deletions data/JetBrains_kotlin_1000.finetuned_pred.csv

Large diffs are not rendered by default.

830 changes: 415 additions & 415 deletions data/JetBrains_kotlin_1000.hf_pred.csv

Large diffs are not rendered by default.

1,050 changes: 525 additions & 525 deletions data/microsoft_vscode_1000.finetuned_pred.csv

Large diffs are not rendered by default.

986 changes: 493 additions & 493 deletions data/microsoft_vscode_1000.hf_pred.csv

Large diffs are not rendered by default.

9,286 changes: 4,643 additions & 4,643 deletions data/msg-test.finetuned_pred.csv

Large diffs are not rendered by default.

9,248 changes: 4,624 additions & 4,624 deletions data/msg-test.hf_pred.csv

Large diffs are not rendered by default.

924 changes: 462 additions & 462 deletions data/transloadit_uppy_1000.finetuned_pred.csv

Large diffs are not rendered by default.

902 changes: 451 additions & 451 deletions data/transloadit_uppy_1000.hf_pred.csv

Large diffs are not rendered by default.

71 changes: 22 additions & 49 deletions notebooks/2_inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,12 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 57,
"id": "initial_id",
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
Expand Down Expand Up @@ -58,24 +49,12 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 58,
"id": "ad4d16d13804be69",
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading (…)okenizer_config.json: 100%|██████████| 1.29k/1.29k [00:00<00:00, 169kB/s]\n",
"Downloading (…)olve/main/vocab.json: 100%|██████████| 575k/575k [00:00<00:00, 5.89MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100%|██████████| 294k/294k [00:00<00:00, 1.09MB/s]\n",
"Downloading (…)in/added_tokens.json: 100%|██████████| 1.87k/1.87k [00:00<00:00, 1.13MB/s]\n",
"Downloading (…)cial_tokens_map.json: 100%|██████████| 913/913 [00:00<00:00, 515kB/s]\n"
]
}
],
"outputs": [],
"source": [
"# download tokenizer from huggingface\n",
"tokenizer = AutoTokenizer.from_pretrained(\"microsoft/codereviewer\")\n",
Expand All @@ -86,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 59,
"id": "cb003d6d8f578da1",
"metadata": {
"collapsed": false
Expand Down Expand Up @@ -120,7 +99,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 60,
"id": "d06f51b2150c61c4",
"metadata": {
"collapsed": false
Expand Down Expand Up @@ -167,7 +146,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 61,
"id": "7a5b97449733bbc6",
"metadata": {
"collapsed": false
Expand All @@ -192,11 +171,11 @@
" )\n",
" # decode the predictions\n",
" preds_np = preds.detach().cpu().numpy()\n",
" preds_decoded = np.apply_along_axis(lambda row: tokenizer.decode(\n",
" row[2:], skip_special_tokens=True, clean_up_tokenization_spaces=False\n",
" ), 1, preds_np)\n",
" preds_decoded = [tokenizer.decode(row[2:],\n",
" skip_special_tokens=True,\n",
" clean_up_tokenization_spaces=False) for row in preds_np]\n",
" # add the decoded predictions to the result\n",
" result += list(preds_decoded)\n",
" result += preds_decoded\n",
" return result"
]
},
Expand All @@ -215,7 +194,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 62,
"id": "c508661efcdcad40",
"metadata": {
"collapsed": false
Expand All @@ -225,13 +204,10 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 478kB/s]\n",
"Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:02<00:00, 306MB/s] \n",
"Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 36.7kB/s]\n",
"100%|██████████| 636/636 [11:08<00:00, 1.05s/it]\n",
"100%|██████████| 63/63 [03:31<00:00, 3.35s/it]\n",
"100%|██████████| 63/63 [01:57<00:00, 1.87s/it]\n",
"100%|██████████| 63/63 [02:39<00:00, 2.53s/it]\n"
"100%|██████████| 636/636 [11:27<00:00, 1.08s/it]\n",
"100%|██████████| 63/63 [03:37<00:00, 3.45s/it]\n",
"100%|██████████| 63/63 [02:01<00:00, 1.93s/it]\n",
"100%|██████████| 63/63 [02:46<00:00, 2.64s/it]\n"
]
}
],
Expand Down Expand Up @@ -270,7 +246,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 63,
"id": "851255e54c49484a",
"metadata": {
"collapsed": false
Expand All @@ -280,16 +256,13 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading (…)lve/main/config.json: 100%|██████████| 2.13k/2.13k [00:00<00:00, 290kB/s]\n",
"Downloading pytorch_model.bin: 100%|██████████| 892M/892M [00:22<00:00, 39.4MB/s] \n",
"Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.bias', 'cls_head.weight']\n",
"Some weights of the model checkpoint at waleko/codereviewer-finetuned-msg were not used when initializing T5ForConditionalGeneration: ['cls_head.weight', 'cls_head.bias']\n",
"- This IS expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Downloading (…)neration_config.json: 100%|██████████| 168/168 [00:00<00:00, 49.2kB/s]\n",
"100%|██████████| 636/636 [15:58<00:00, 1.51s/it]\n",
"100%|██████████| 63/63 [01:41<00:00, 1.61s/it]\n",
"100%|██████████| 63/63 [01:32<00:00, 1.47s/it]\n",
"100%|██████████| 63/63 [01:27<00:00, 1.39s/it]\n"
"100%|██████████| 636/636 [15:51<00:00, 1.50s/it]\n",
"100%|██████████| 63/63 [01:40<00:00, 1.59s/it]\n",
"100%|██████████| 63/63 [01:32<00:00, 1.48s/it]\n",
"100%|██████████| 63/63 [01:26<00:00, 1.38s/it]\n"
]
}
],
Expand Down
Loading

0 comments on commit b691824

Please sign in to comment.