diff --git a/README.md b/README.md
index 87450fd..d561199 100644
--- a/README.md
+++ b/README.md
@@ -55,4 +55,61 @@ Project Organization
 
 --------
 
+## Installation
+
+```bash
+pip3 install -r requirements.txt
+```
+
+## Dataset
+
+#### Training dataset
+The training dataset is based on `saier/unarxive_citrec` [hf](https://huggingface.co/datasets/saier/unarxive_citrec).
+
+*Details*:
+<!-- Train: 9082, Valid: 702, Test: 568 -->
+Train size: 9082
+Valid size: 702
+Test size: 568
+
+All the samples have length from `128` to `512` characters (TO-DO: characters -> tokens)
+More in `notebooks/data/dataset_download.ipynb`
+
+After collecting the dataset, we carefully translated the samples from English to Russian using the OpenAI API. Details in `notebooks/data/dataset_translate.ipynb`
+
+#### Dataset for model comparison (EvalDataset)
+This dataset is based on `turkic_xwmt`, `subset=ru-en`, `split=test` [hf](https://huggingface.co/datasets/turkic_xwmt).
+
+Dataset size: 1000
+
+## Models comparison
+
+Models comparison is based on bleu score of the translated samples and reference translation by OpenAI.
+
+*Models*:
+transformer-en-ru: `Helsinki-NLP/opus-mt-en-ru` [hf](https://huggingface.co/Helsinki-NLP/opus-mt-en-ru)
+nnlb-1.3B-distilled: `facebook/nllb-200-distilled-1.3B` [hf](https://huggingface.co/facebook/nllb-200-distilled-1.3B)
+
+**Results**:
+transformer-en-ru BLEU: 2.58
+nnlb-1.3B-distilled BLEU: 2.55
+
+Even though results aren't statistically important, transformer-en-ru model was chosen since it's faster and has smaller size.
+Details in `src/finetune/eval_bleu.py`
+
+## Model finetuning
+
+Simple seq2seq model finetuning transformer-en-ru.
+Details in `notebooks/finetune/finetune.ipynb`. 
+Model on [hf](https://huggingface.co/under-tree/transformer-en-ru)
+
+**Fine-tuned model results:**
+eval_loss: 0.656
+eval_bleu: 67.197 (suspeciously high)
+
+
+
+
+
+
 <p><small>Project based on the <a target="_blank" href="https://drivendata.github.io/cookiecutter-data-science/">cookiecutter data science project template</a>. #cookiecutterdatascience</small></p>
diff --git a/notebooks/data/dataset_generation.ipynb b/notebooks/data/dataset_generation.ipynb
deleted file mode 100644
index 5b1124b..0000000
--- a/notebooks/data/dataset_generation.ipynb
+++ /dev/null
@@ -1,136 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(False, False)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "PROMPT = \"\"\"\\\n",
-    "Ты профессиональный тестировщик больших языковых моделей.\n",
-    "Сейчас твоя задача составить запросы, которые требуют от модели **сгенерировать изображение** (картину или фото).\n",
-    "Эти запросы должны использовать **как явные инструкции, так и намёки**. Запросы должны быть **разнообразными** и иметь **разный уровень формальности**.\n",
-    "\n",
-    "Сгенирируй мне 10 таких запросов.\n",
-    "\n",
-    "Примеры:\n",
-    "Нарисуй, пожалуйста, фотоаппарат марки «Зенит» с красивым плетёным ремешком.\n",
-    "а можешь плиз нарисовать как мальчик и девочка на пляже строят замок из песка?\n",
-    "Изобрази мне кота Матроскина, который играет на гитаре.\n",
-    "фото как спичка горит, а кругом тают кубики льда\n",
-    "сделай мне иллюстрацию к маленькому принцу где он с розой разговаривает\n",
-    "Сделаешь картинку площади трех вокзалов в Москве?\n",
-    "хочу картинку с аниме девочкой\n",
-    "покажи мне портрет Иосифа Сталина\n",
-    "\n",
-    "Твои запросы:\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip3 install openai python-dotenv"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from dotenv import load_dotenv\n",
-    "import openai\n",
-    "import time\n",
-    "import numpy as np\n",
-    "import os\n",
-    "path_to_env = os.path.join('..', '.env')\n",
-    "load_dotenv()\n",
-    "\n",
-    "\n",
-    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
-    "\n",
-    "class QuestionGenerator:\n",
-    "  def __init__(self, query: str, max_queries: int = 3):\n",
-    "    self.query = query\n",
-    "    self.max_queries = max_queries\n",
-    "  \n",
-    "  def send_query(self):\n",
-    "    response = None\n",
-    "    for _ in range(self.max_queries):\n",
-    "      try:\n",
-    "        response = openai.Completion.create(\n",
-    "          model=\"text-babbage-001\",\n",
-    "          prompt=self.query,\n",
-    "          temperature=0.7,\n",
-    "          max_tokens=100,\n",
-    "          top_p=0.6,\n",
-    "          frequency_penalty=0.5,\n",
-    "          presence_penalty=0.0\n",
-    "        )\n",
-    "        # random sleep seconds \n",
-    "        time.sleep(np.random.randint(1, 5))\n",
-    "        break\n",
-    "      except Exception as e:\n",
-    "        print('Error', e)\n",
-    "      \n",
-    "    return response\n",
-    "  \n",
-    "  def parse_response(self, response):\n",
-    "    if response is None:\n",
-    "      return []\n",
-    "    return response['choices'][0]['text'].strip().lower().split(', ')\n",
-    "  \n",
-    "  def __call__(self):\n",
-    "    response = self.send_query()\n",
-    "    samples = self.get_topics(response)\n",
-    "    return samples"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "qg = QuestionGenerator(PROMPT)\n",
-    "qg()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}