diff --git a/.buildinfo b/.buildinfo
index dfe5091..0b00430 100644
--- a/.buildinfo
+++ b/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 6abd9db5fd37460aa5245f6c288121e5
+config: 30fb53e24ec79acb57e46fdcb411aef9
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/README.html b/README.html
index c6e65c7..ec6a997 100644
--- a/README.html
+++ b/README.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
 
-    <title>CodeReviewer ML Performance &#8212; CodeReviewer ML Performance</title>
+    <title>Code Review Automation with Language Models &#8212; Code Review Automation with Language Models</title>
   
   
   
@@ -145,7 +145,7 @@
   
   
   
-    <p class="title logo__title">CodeReviewer ML Performance</p>
+    <p class="title logo__title">Code Review Automation with Language Models</p>
   
 </a></div>
         <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
@@ -154,7 +154,7 @@
         <ul class="nav bd-sidenav bd-sidenav__home-link">
             <li class="toctree-l1">
                 <a class="reference internal" href="docs/intro.html">
-                    CodeReviewer ML Performance
+                    Code Review Automation with Language Models
                 </a>
             </li>
         </ul>
@@ -162,6 +162,7 @@
 <li class="toctree-l1"><a class="reference internal" href="notebooks/1_collect_reviews.html">Collecting Code Review Data</a></li>
 <li class="toctree-l1"><a class="reference internal" href="notebooks/2_inference.html">CodeReviewer Model Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="notebooks/3_evaluation.html">Predictions Evaluation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/conclusion.html">Conclusion</a></li>
 </ul>
 
     </div>
@@ -205,7 +206,7 @@
 <div class="article-header-buttons">
 
 
-<a href="https://github.com/waleko/CodeReviewer-ML-Performance" target="_blank"
+<a href="https://github.com/waleko/Code-Review-Automation-LM" target="_blank"
    class="btn btn-sm btn-source-repository-button"
    title="Source repository"
    data-bs-placement="bottom" data-bs-toggle="tooltip"
@@ -300,7 +301,9 @@
   </button>
 `);
 </script>
-
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
 </div></div>
       
     </div>
@@ -311,11 +314,28 @@
               
 
 <div id="jb-print-docs-body" class="onlyprint">
-    <h1>CodeReviewer ML Performance</h1>
+    <h1>Code Review Automation with Language Models</h1>
     <!-- Table of contents -->
     <div id="print-main-content">
         <div id="jb-print-toc">
             
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#key-features">Key Features</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#data-collection">1. Data Collection</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-inference-and-fine-tuning">2. Model Inference and Fine-Tuning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-metrics">3. Evaluation Metrics</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#getting-started">Getting Started</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#license">License</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contact">Contact</a></li>
+</ul>
+            </nav>
         </div>
     </div>
 </div>
@@ -325,10 +345,70 @@ <h1>CodeReviewer ML Performance</h1>
 <div id="searchbox"></div>
                 <article class="bd-article" role="main">
                   
-  <section class="tex2jax_ignore mathjax_ignore" id="codereviewer-ml-performance">
-<h1>CodeReviewer ML Performance<a class="headerlink" href="#codereviewer-ml-performance" title="Permalink to this heading">#</a></h1>
-<p><img alt="Static Badge" src="https://img.shields.io/badge/docs-available-orange?style=flat-square" />
+  <section class="tex2jax_ignore mathjax_ignore" id="code-review-automation-with-language-models">
+<h1>Code Review Automation with Language Models<a class="headerlink" href="#code-review-automation-with-language-models" title="Permalink to this heading">#</a></h1>
+<p><a class="reference external" href="https://alexkovrigin.me/Code-Review-Automation-LM"><img alt="Static Badge" src="https://img.shields.io/badge/docs-available-orange?style=flat-square" /></a>
 <a class="reference external" href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square" /></a></p>
+<section id="overview">
+<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this heading">#</a></h2>
+<p>Code review is a crucial aspect of the software development process, ensuring that code changes are thoroughly examined
+for quality, security, and adherence to coding standards. However, the code review process can be time-consuming, and
+human reviewers may overlook certain issues. To address these challenges, we have developed a Code Review Automation
+system powered by language models.</p>
+<p>Our system leverages state-of-the-art language models to generate code reviews automatically. These models are trained
+on a vast corpus of code and can provide insightful feedback on code changes. By automating part of the code review
+process, our system aims to:</p>
+<ul class="simple">
+<li><p>Speed up the code review process.</p></li>
+<li><p>Identify common code issues and provide recommendations.</p></li>
+<li><p>Assist developers in producing higher-quality code.</p></li>
+</ul>
+</section>
+<section id="key-features">
+<h2>Key Features<a class="headerlink" href="#key-features" title="Permalink to this heading">#</a></h2>
+<section id="data-collection">
+<h3>1. Data Collection<a class="headerlink" href="#data-collection" title="Permalink to this heading">#</a></h3>
+<p>Our system collects code review data from popular GitHub repositories. This data includes code changes and associated
+human-authored code reviews. By leveraging this data, our models learn to generate contextually relevant code reviews.</p>
+</section>
+<section id="model-inference-and-fine-tuning">
+<h3>2. Model Inference and Fine-Tuning<a class="headerlink" href="#model-inference-and-fine-tuning" title="Permalink to this heading">#</a></h3>
+<p>We use pre-trained language models and fine-tune them on code review datasets. Fine-tuning allows the models to
+specialize in generating code reviews, making them more effective in this task.</p>
+<p>Once the models are trained, they can generate code reviews for new code changes. These generated reviews can highlight
+potential issues, suggest improvements, and provide feedback to developers.</p>
+</section>
+<section id="evaluation-metrics">
+<h3>3. Evaluation Metrics<a class="headerlink" href="#evaluation-metrics" title="Permalink to this heading">#</a></h3>
+<p>We use the BLEU-4 score metric to assess the quality of generated code reviews. This metric measures the similarity
+between model-generated reviews and target human reviews. While our models provide valuable assistance, they are
+designed to complement human reviewers.</p>
+</section>
+</section>
+<section id="getting-started">
+<h2>Getting Started<a class="headerlink" href="#getting-started" title="Permalink to this heading">#</a></h2>
+<p>To get started with our Code Review Automation system, follow these steps:</p>
+<ol class="arabic">
+<li><p>Clone this repository to your local machine:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/waleko/Code-Review-Automation-LM.git
+<span class="nb">cd</span><span class="w"> </span>Code-Review-Automation-LM
+</pre></div>
+</div>
+</li>
+<li><p>Set up the required dependencies and environment (see <code class="docutils literal notranslate"><span class="pre">requirements.txt</span></code>).</p></li>
+<li><p>Run the provided notebooks to explore data collection, model inference, and evaluation.</p></li>
+<li><p>Integrate the code review automation system into your development workflow. You can use our pre-trained models or
+fine-tune them on your specific codebase for even better results.</p></li>
+</ol>
+</section>
+<section id="license">
+<h2>License<a class="headerlink" href="#license" title="Permalink to this heading">#</a></h2>
+<p>This project is licensed under the Apache 2.0 License - see the <a class="reference download internal" download="" href="_downloads/9879d6db96fd29134fc802214163b95a/LICENSE"><span class="xref download myst">LICENSE</span></a> file for details.</p>
+</section>
+<section id="contact">
+<h2>Contact<a class="headerlink" href="#contact" title="Permalink to this heading">#</a></h2>
+<p>For any questions or inquiries, please contact <a class="reference external" href="mailto:inbox&#37;&#52;&#48;alexkovrigin&#46;me">inbox<span>&#64;</span>alexkovrigin<span>&#46;</span>me</a>.</p>
+</section>
 </section>
 
     <script type="text/x-thebe-config">
@@ -368,6 +448,29 @@ <h1>CodeReviewer ML Performance<a class="headerlink" href="#codereviewer-ml-perf
             
             
               
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#key-features">Key Features</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#data-collection">1. Data Collection</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-inference-and-fine-tuning">2. Model Inference and Fine-Tuning</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-metrics">3. Evaluation Metrics</a></li>
+</ul>
+</li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#getting-started">Getting Started</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#license">License</a></li>
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#contact">Contact</a></li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
             
           </div>
           <footer class="bd-footer-content">
diff --git a/_downloads/9879d6db96fd29134fc802214163b95a/LICENSE b/_downloads/9879d6db96fd29134fc802214163b95a/LICENSE
new file mode 100644
index 0000000..04a91a3
--- /dev/null
+++ b/_downloads/9879d6db96fd29134fc802214163b95a/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2023 Alexander Kovrigin
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/_sources/README.md b/_sources/README.md
index 6de261a..80f8566 100644
--- a/_sources/README.md
+++ b/_sources/README.md
@@ -1,4 +1,66 @@
-# CodeReviewer ML Performance
+# Code Review Automation with Language Models
 
-![Static Badge](https://img.shields.io/badge/docs-available-orange?style=flat-square)
+[![Static Badge](https://img.shields.io/badge/docs-available-orange?style=flat-square)](https://alexkovrigin.me/Code-Review-Automation-LM)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/psf/black)
+
+## Overview
+
+Code review is a crucial aspect of the software development process, ensuring that code changes are thoroughly examined
+for quality, security, and adherence to coding standards. However, the code review process can be time-consuming, and
+human reviewers may overlook certain issues. To address these challenges, we have developed a Code Review Automation
+system powered by language models.
+
+Our system leverages state-of-the-art language models to generate code reviews automatically. These models are trained
+on a vast corpus of code and can provide insightful feedback on code changes. By automating part of the code review
+process, our system aims to:
+
+- Speed up the code review process.
+- Identify common code issues and provide recommendations.
+- Assist developers in producing higher-quality code.
+
+## Key Features
+
+### 1. Data Collection
+
+Our system collects code review data from popular GitHub repositories. This data includes code changes and associated
+human-authored code reviews. By leveraging this data, our models learn to generate contextually relevant code reviews.
+
+### 2. Model Inference and Fine-Tuning
+
+We use pre-trained language models and fine-tune them on code review datasets. Fine-tuning allows the models to
+specialize in generating code reviews, making them more effective in this task.
+
+Once the models are trained, they can generate code reviews for new code changes. These generated reviews can highlight
+potential issues, suggest improvements, and provide feedback to developers.
+
+### 3. Evaluation Metrics
+
+We use the BLEU-4 score metric to assess the quality of generated code reviews. This metric measures the similarity
+between model-generated reviews and target human reviews. While our models provide valuable assistance, they are
+designed to complement human reviewers.
+
+## Getting Started
+
+To get started with our Code Review Automation system, follow these steps:
+
+1. Clone this repository to your local machine:
+
+   ```bash
+   git clone https://github.com/waleko/Code-Review-Automation-LM.git
+   cd Code-Review-Automation-LM
+   ```
+
+2. Set up the required dependencies and environment (see `requirements.txt`).
+
+3. Run the provided notebooks to explore data collection, model inference, and evaluation.
+
+4. Integrate the code review automation system into your development workflow. You can use our pre-trained models or
+   fine-tune them on your specific codebase for even better results.
+
+## License
+
+This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
+
+## Contact
+
+For any questions or inquiries, please contact [inbox@alexkovrigin.me](mailto:inbox@alexkovrigin.me).
diff --git a/_sources/docs/conclusion.md b/_sources/docs/conclusion.md
index e69de29..9de81f5 100644
--- a/_sources/docs/conclusion.md
+++ b/_sources/docs/conclusion.md
@@ -0,0 +1,17 @@
+# Conclusion
+In our exploration of code review data collection and model inference, we have gained valuable insights into the capabilities and limitations of language models in the context of code review. This journey has encompassed various notebooks, each focusing on a specific aspect of the process. Here, we summarize our key findings and the implications of our work:
+
+- Language models show promise in generating code reviews, but there is ample room for improvement in terms of review quality, context, and relevance.
+
+- Fine-tuning models on code review datasets is a valuable approach to enhance their performance, but further research is needed to optimize fine-tuning techniques.
+
+- While models can assist in code reviews, they should be viewed as complementary tools to human reviewers rather than replacements. Human expertise remains invaluable in the code review process.
+
+- Future work may involve exploring more advanced language models, experimenting with different fine-tuning strategies, and incorporating user feedback to refine predictions.
+
+In conclusion, our journey through code review data collection and model inference has provided valuable insights into the potential of language models in code review automation. While challenges remain, these models have the potential to augment the code review process, helping developers produce higher-quality code. As technology continues to advance, we anticipate exciting developments in this field and a continued focus on improving the effectiveness of code review automation.
+
+## Bibliography
+
+```{bibliography}
+```
diff --git a/_sources/docs/intro.md b/_sources/docs/intro.md
index 1e5919d..a0b6219 100644
--- a/_sources/docs/intro.md
+++ b/_sources/docs/intro.md
@@ -1,11 +1,51 @@
-# CodeReviewer ML Performance
+# Code Review Automation with Language Models
 
-This is a small sample book to give you a feel for how book content is
-structured.
-It shows off a few of the major file types, as well as some sample content.
-It does not go in-depth into any particular topic - check out [the Jupyter Book documentation](https://jupyterbook.org) for more information.
+## Introduction
 
-Check out the content pages bundled with this sample book to see more.
+In this series of Jupyter notebooks, we embark on a journey to collect code review data from GitHub repositories and
+perform code review predictions using language models. Our primary goal is to explore the capabilities of different
+models in generating code reviews and evaluate their performance.
+
+### Collecting Code Review Data
+
+In this initial notebook, we dive into the process of collecting code review data from GitHub repositories. We leverage
+the PyGithub library to interact with the GitHub API, ensuring seamless data retrieval.
+
+We establish a function to collect code review data from a GitHub repository, allowing us to specify parameters such as
+the number of comments to load, skipping author comments, and more. The collected data is structured into a Pandas
+DataFrame for further analysis and processing.
+
+Three prominent repositories, namely `microsoft/vscode`, `JetBrains/kotlin`, and `transloadit/uppy`, are selected for
+data collection due to their popularity and rich code review history. Additionally, we are going to use data from the
+original CodeReviewer dataset `msg-test` that is provided by the authors of {cite}`li2022codereviewer`.
+
+### CodeReviewer Model Inference
+
+The second notebook focuses on generating code reviews using the `microsoft/codereviewer` model. We delve into the
+tokenization and dataset preparation process, emphasizing the importance of specialized tokens.
+
+A custom `ReviewsDataset` class is introduced to facilitate data loading and transformation, making it compatible with
+model inference. We load data from various sources, creating DataLoader instances for efficient model input.
+
+We explore the model inference process, employing both a HuggingFace pre-trained checkpoint and a fine-tuned
+CodeReviewer model. The fine-tuning process details are outlined, showcasing parameters and resources used. Model
+predictions are saved.
+
+### Predictions Evaluation
+
+In this notebook, we assess the quality of code review predictions generated by the models. Both HuggingFace pre-trained and
+fine-tuned models are evaluated across different datasets, shedding light on their performance.
+
+Qualitative assessment is conducted to gain insights into how the models generate code reviews. We present samples of
+code, along with predictions from both models, enabling a visual comparison with human reviews. This helps in
+understanding the nuances of model-generated reviews.
+
+Lastly, we quantitatively evaluate the models' performance using BLEU-4 scores. We calculate scores for each dataset,
+providing a comprehensive overview of how well the models align with human reviews. This quantitative analysis helps in
+drawing conclusions about the effectiveness of the models in code review prediction.
+
+Throughout this journey, we aim to explore the capabilities and limitations of language models in the context of code
+review, shedding light on their potential applications and areas for improvement.
 
 ## Table of Contents
 
diff --git a/docs/conclusion.html b/docs/conclusion.html
index 29260cd..318706a 100644
--- a/docs/conclusion.html
+++ b/docs/conclusion.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
 
-    <title>&lt;no title&gt; &#8212; CodeReviewer ML Performance</title>
+    <title>Conclusion &#8212; Code Review Automation with Language Models</title>
   
   
   
@@ -146,7 +146,7 @@
   
   
   
-    <p class="title logo__title">CodeReviewer ML Performance</p>
+    <p class="title logo__title">Code Review Automation with Language Models</p>
   
 </a></div>
         <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
@@ -155,14 +155,15 @@
         <ul class="nav bd-sidenav bd-sidenav__home-link">
             <li class="toctree-l1">
                 <a class="reference internal" href="intro.html">
-                    CodeReviewer ML Performance
+                    Code Review Automation with Language Models
                 </a>
             </li>
         </ul>
-        <ul class="nav bd-sidenav">
+        <ul class="current nav bd-sidenav">
 <li class="toctree-l1"><a class="reference internal" href="../notebooks/1_collect_reviews.html">Collecting Code Review Data</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../notebooks/2_inference.html">CodeReviewer Model Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../notebooks/3_evaluation.html">Predictions Evaluation</a></li>
+<li class="toctree-l1 current active"><a class="current reference internal" href="#">Conclusion</a></li>
 </ul>
 
     </div>
@@ -206,7 +207,7 @@
 <div class="article-header-buttons">
 
 
-<a href="https://github.com/waleko/CodeReviewer-ML-Performance" target="_blank"
+<a href="https://github.com/waleko/Code-Review-Automation-LM" target="_blank"
    class="btn btn-sm btn-source-repository-button"
    title="Source repository"
    data-bs-placement="bottom" data-bs-toggle="tooltip"
@@ -314,7 +315,7 @@
               
 
 <div id="jb-print-docs-body" class="onlyprint">
-    <h1><no title></h1>
+    <h1>Conclusion</h1>
     <!-- Table of contents -->
     <div id="print-main-content">
         <div id="jb-print-toc">
@@ -323,9 +324,9 @@ <h1><no title></h1>
                 <h2> Contents </h2>
             </div>
             <nav aria-label="Page">
-                <ul class="simple visible nav section-nav flex-column">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#bibliography">Bibliography</a></li>
 </ul>
-
             </nav>
         </div>
     </div>
@@ -336,7 +337,32 @@ <h2> Contents </h2>
 <div id="searchbox"></div>
                 <article class="bd-article" role="main">
                   
-  
+  <section class="tex2jax_ignore mathjax_ignore" id="conclusion">
+<h1>Conclusion<a class="headerlink" href="#conclusion" title="Permalink to this heading">#</a></h1>
+<p>In our exploration of code review data collection and model inference, we have gained valuable insights into the capabilities and limitations of language models in the context of code review. This journey has encompassed various notebooks, each focusing on a specific aspect of the process. Here, we summarize our key findings and the implications of our work:</p>
+<ul class="simple">
+<li><p>Language models show promise in generating code reviews, but there is ample room for improvement in terms of review quality, context, and relevance.</p></li>
+<li><p>Fine-tuning models on code review datasets is a valuable approach to enhance their performance, but further research is needed to optimize fine-tuning techniques.</p></li>
+<li><p>While models can assist in code reviews, they should be viewed as complementary tools to human reviewers rather than replacements. Human expertise remains invaluable in the code review process.</p></li>
+<li><p>Future work may involve exploring more advanced language models, experimenting with different fine-tuning strategies, and incorporating user feedback to refine predictions.</p></li>
+</ul>
+<p>In conclusion, our journey through code review data collection and model inference has provided valuable insights into the potential of language models in code review automation. While challenges remain, these models have the potential to augment the code review process, helping developers produce higher-quality code. As technology continues to advance, we anticipate exciting developments in this field and a continued focus on improving the effectiveness of code review automation.</p>
+<section id="bibliography">
+<h2>Bibliography<a class="headerlink" href="#bibliography" title="Permalink to this heading">#</a></h2>
+<div class="docutils container" id="id1">
+<div class="citation" id="id4" role="doc-biblioentry">
+<span class="label"><span class="fn-bracket">[</span>p4v<span class="fn-bracket">]</span></span>
+<p>CodeBERT CodeReviewer - a Hugging Face Space by p4vv37. URL: <a class="reference external" href="https://huggingface.co/spaces/p4vv37/CodeBERT_CodeReviewer">https://huggingface.co/spaces/p4vv37/CodeBERT_CodeReviewer</a> (visited on 2023-09-13).</p>
+</div>
+<div class="citation" id="id2" role="doc-biblioentry">
+<span class="label"><span class="fn-bracket">[</span>LLG+22<span class="fn-bracket">]</span></span>
+<p>Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. <em>arXiv preprint arXiv:2203.09095</em>, 2022.</p>
+</div>
+</div>
+</div>
+</section>
+</section>
+
     <script type="text/x-thebe-config">
     {
         requestKernel: true,
@@ -390,9 +416,9 @@ <h2> Contents </h2>
     <i class="fa-solid fa-list"></i> Contents
   </div>
   <nav class="bd-toc-nav page-toc">
-    <ul class="simple visible nav section-nav flex-column">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#bibliography">Bibliography</a></li>
 </ul>
-
   </nav></div>
 
 </div></div>
diff --git a/docs/intro.html b/docs/intro.html
index 3aa86e9..c75ab9a 100644
--- a/docs/intro.html
+++ b/docs/intro.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
 
-    <title>CodeReviewer ML Performance &#8212; CodeReviewer ML Performance</title>
+    <title>Code Review Automation with Language Models &#8212; Code Review Automation with Language Models</title>
   
   
   
@@ -146,7 +146,7 @@
   
   
   
-    <p class="title logo__title">CodeReviewer ML Performance</p>
+    <p class="title logo__title">Code Review Automation with Language Models</p>
   
 </a></div>
         <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
@@ -155,7 +155,7 @@
         <ul class="nav bd-sidenav bd-sidenav__home-link">
             <li class="toctree-l1 current active">
                 <a class="reference internal" href="#">
-                    CodeReviewer ML Performance
+                    Code Review Automation with Language Models
                 </a>
             </li>
         </ul>
@@ -163,6 +163,7 @@
 <li class="toctree-l1"><a class="reference internal" href="../notebooks/1_collect_reviews.html">Collecting Code Review Data</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../notebooks/2_inference.html">CodeReviewer Model Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../notebooks/3_evaluation.html">Predictions Evaluation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="conclusion.html">Conclusion</a></li>
 </ul>
 
     </div>
@@ -206,7 +207,7 @@
 <div class="article-header-buttons">
 
 
-<a href="https://github.com/waleko/CodeReviewer-ML-Performance" target="_blank"
+<a href="https://github.com/waleko/Code-Review-Automation-LM" target="_blank"
    class="btn btn-sm btn-source-repository-button"
    title="Source repository"
    data-bs-placement="bottom" data-bs-toggle="tooltip"
@@ -314,7 +315,7 @@
               
 
 <div id="jb-print-docs-body" class="onlyprint">
-    <h1>CodeReviewer ML Performance</h1>
+    <h1>Code Review Automation with Language Models</h1>
     <!-- Table of contents -->
     <div id="print-main-content">
         <div id="jb-print-toc">
@@ -324,6 +325,12 @@ <h2> Contents </h2>
             </div>
             <nav aria-label="Page">
                 <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#collecting-code-review-data">Collecting Code Review Data</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#codereviewer-model-inference">CodeReviewer Model Inference</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#predictions-evaluation">Predictions Evaluation</a></li>
+</ul>
+</li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#table-of-contents">Table of Contents</a></li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#bibliography">Bibliography</a></li>
 </ul>
@@ -337,13 +344,48 @@ <h2> Contents </h2>
 <div id="searchbox"></div>
                 <article class="bd-article" role="main">
                   
-  <section class="tex2jax_ignore mathjax_ignore" id="codereviewer-ml-performance">
-<h1>CodeReviewer ML Performance<a class="headerlink" href="#codereviewer-ml-performance" title="Permalink to this heading">#</a></h1>
-<p>This is a small sample book to give you a feel for how book content is
-structured.
-It shows off a few of the major file types, as well as some sample content.
-It does not go in-depth into any particular topic - check out <a class="reference external" href="https://jupyterbook.org">the Jupyter Book documentation</a> for more information.</p>
-<p>Check out the content pages bundled with this sample book to see more.</p>
+  <section class="tex2jax_ignore mathjax_ignore" id="code-review-automation-with-language-models">
+<h1>Code Review Automation with Language Models<a class="headerlink" href="#code-review-automation-with-language-models" title="Permalink to this heading">#</a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading">#</a></h2>
+<p>In this series of Jupyter notebooks, we embark on a journey to collect code review data from GitHub repositories and
+perform code review predictions using language models. Our primary goal is to explore the capabilities of different
+models in generating code reviews and evaluate their performance.</p>
+<section id="collecting-code-review-data">
+<h3>Collecting Code Review Data<a class="headerlink" href="#collecting-code-review-data" title="Permalink to this heading">#</a></h3>
+<p>In this initial notebook, we dive into the process of collecting code review data from GitHub repositories. We leverage
+the PyGithub library to interact with the GitHub API, ensuring seamless data retrieval.</p>
+<p>We establish a function to collect code review data from a GitHub repository, allowing us to specify parameters such as
+the number of comments to load, skipping author comments, and more. The collected data is structured into a Pandas
+DataFrame for further analysis and processing.</p>
+<p>Three prominent repositories, namely <code class="docutils literal notranslate"><span class="pre">microsoft/vscode</span></code>, <code class="docutils literal notranslate"><span class="pre">JetBrains/kotlin</span></code>, and <code class="docutils literal notranslate"><span class="pre">transloadit/uppy</span></code>, are selected for
+data collection due to their popularity and rich code review history. Additionally, we are going to use data from the
+original CodeReviewer dataset <code class="docutils literal notranslate"><span class="pre">msg-test</span></code> that is provided by the authors of <span id="id1">[<a class="reference internal" href="#id3" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span>.</p>
+</section>
+<section id="codereviewer-model-inference">
+<h3>CodeReviewer Model Inference<a class="headerlink" href="#codereviewer-model-inference" title="Permalink to this heading">#</a></h3>
+<p>The second notebook focuses on generating code reviews using the <code class="docutils literal notranslate"><span class="pre">microsoft/codereviewer</span></code> model. We delve into the
+tokenization and dataset preparation process, emphasizing the importance of specialized tokens.</p>
+<p>A custom <code class="docutils literal notranslate"><span class="pre">ReviewsDataset</span></code> class is introduced to facilitate data loading and transformation, making it compatible with
+model inference. We load data from various sources, creating DataLoader instances for efficient model input.</p>
+<p>We explore the model inference process, employing both a HuggingFace pre-trained checkpoint and a fine-tuned
+CodeReviewer model. The fine-tuning process details are outlined, showcasing parameters and resources used. Model
+predictions are saved.</p>
+</section>
+<section id="predictions-evaluation">
+<h3>Predictions Evaluation<a class="headerlink" href="#predictions-evaluation" title="Permalink to this heading">#</a></h3>
+<p>In this notebook, we assess the quality of code review predictions generated by the models. Both HuggingFace pre-trained and
+fine-tuned models are evaluated across different datasets, shedding light on their performance.</p>
+<p>Qualitative assessment is conducted to gain insights into how the models generate code reviews. We present samples of
+code, along with predictions from both models, enabling a visual comparison with human reviews. This helps in
+understanding the nuances of model-generated reviews.</p>
+<p>Lastly, we quantitatively evaluate the models’ performance using BLEU-4 scores. We calculate scores for each dataset,
+providing a comprehensive overview of how well the models align with human reviews. This quantitative analysis helps in
+drawing conclusions about the effectiveness of the models in code review prediction.</p>
+<p>Throughout this journey, we aim to explore the capabilities and limitations of language models in the context of code
+review, shedding light on their potential applications and areas for improvement.</p>
+</section>
+</section>
 <section id="table-of-contents">
 <h2>Table of Contents<a class="headerlink" href="#table-of-contents" title="Permalink to this heading">#</a></h2>
 <div class="toctree-wrapper compound">
@@ -351,18 +393,19 @@ <h2>Table of Contents<a class="headerlink" href="#table-of-contents" title="Perm
 <li class="toctree-l1"><a class="reference internal" href="../notebooks/1_collect_reviews.html">Collecting Code Review Data</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../notebooks/2_inference.html">CodeReviewer Model Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../notebooks/3_evaluation.html">Predictions Evaluation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="conclusion.html">Conclusion</a></li>
 </ul>
 </div>
 </section>
 <section id="bibliography">
 <h2>Bibliography<a class="headerlink" href="#bibliography" title="Permalink to this heading">#</a></h2>
-<div class="docutils container" id="id1">
-<div class="citation" id="id4" role="doc-biblioentry">
+<div class="docutils container" id="id2">
+<div class="citation" id="id5" role="doc-biblioentry">
 <span class="label"><span class="fn-bracket">[</span>p4v<span class="fn-bracket">]</span></span>
 <p>CodeBERT CodeReviewer - a Hugging Face Space by p4vv37. URL: <a class="reference external" href="https://huggingface.co/spaces/p4vv37/CodeBERT_CodeReviewer">https://huggingface.co/spaces/p4vv37/CodeBERT_CodeReviewer</a> (visited on 2023-09-13).</p>
 </div>
-<div class="citation" id="id2" role="doc-biblioentry">
-<span class="label"><span class="fn-bracket">[</span>LLG+22<span class="fn-bracket">]</span></span>
+<div class="citation" id="id3" role="doc-biblioentry">
+<span class="label"><span class="fn-bracket">[</span><a role="doc-backlink" href="#id1">LLG+22</a><span class="fn-bracket">]</span></span>
 <p>Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. <em>arXiv preprint arXiv:2203.09095</em>, 2022.</p>
 </div>
 </div>
@@ -424,6 +467,12 @@ <h2>Bibliography<a class="headerlink" href="#bibliography" title="Permalink to t
   </div>
   <nav class="bd-toc-nav page-toc">
     <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#introduction">Introduction</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#collecting-code-review-data">Collecting Code Review Data</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#codereviewer-model-inference">CodeReviewer Model Inference</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#predictions-evaluation">Predictions Evaluation</a></li>
+</ul>
+</li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#table-of-contents">Table of Contents</a></li>
 <li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#bibliography">Bibliography</a></li>
 </ul>
diff --git a/genindex.html b/genindex.html
index 55f3dbc..3c975f4 100644
--- a/genindex.html
+++ b/genindex.html
@@ -8,7 +8,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Index &#8212; CodeReviewer ML Performance</title>
+    <title>Index &#8212; Code Review Automation with Language Models</title>
   
   
   
@@ -144,7 +144,7 @@
   
   
   
-    <p class="title logo__title">CodeReviewer ML Performance</p>
+    <p class="title logo__title">Code Review Automation with Language Models</p>
   
 </a></div>
         <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
@@ -153,7 +153,7 @@
         <ul class="nav bd-sidenav bd-sidenav__home-link">
             <li class="toctree-l1">
                 <a class="reference internal" href="docs/intro.html">
-                    CodeReviewer ML Performance
+                    Code Review Automation with Language Models
                 </a>
             </li>
         </ul>
@@ -161,6 +161,7 @@
 <li class="toctree-l1"><a class="reference internal" href="notebooks/1_collect_reviews.html">Collecting Code Review Data</a></li>
 <li class="toctree-l1"><a class="reference internal" href="notebooks/2_inference.html">CodeReviewer Model Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="notebooks/3_evaluation.html">Predictions Evaluation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/conclusion.html">Conclusion</a></li>
 </ul>
 
     </div>
@@ -204,7 +205,7 @@
 <div class="article-header-buttons">
 
 
-<a href="https://github.com/waleko/CodeReviewer-ML-Performance" target="_blank"
+<a href="https://github.com/waleko/Code-Review-Automation-LM" target="_blank"
    class="btn btn-sm btn-source-repository-button"
    title="Source repository"
    data-bs-placement="bottom" data-bs-toggle="tooltip"
diff --git a/notebooks/1_collect_reviews.html b/notebooks/1_collect_reviews.html
index 88c9c65..4d81e5c 100644
--- a/notebooks/1_collect_reviews.html
+++ b/notebooks/1_collect_reviews.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
 
-    <title>Collecting Code Review Data &#8212; CodeReviewer ML Performance</title>
+    <title>Collecting Code Review Data &#8212; Code Review Automation with Language Models</title>
   
   
   
@@ -66,7 +66,7 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="CodeReviewer Model Inference" href="2_inference.html" />
-    <link rel="prev" title="CodeReviewer ML Performance" href="../docs/intro.html" />
+    <link rel="prev" title="Code Review Automation with Language Models" href="../docs/intro.html" />
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
   </head>
@@ -147,7 +147,7 @@
   
   
   
-    <p class="title logo__title">CodeReviewer ML Performance</p>
+    <p class="title logo__title">Code Review Automation with Language Models</p>
   
 </a></div>
         <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
@@ -156,7 +156,7 @@
         <ul class="nav bd-sidenav bd-sidenav__home-link">
             <li class="toctree-l1">
                 <a class="reference internal" href="../docs/intro.html">
-                    CodeReviewer ML Performance
+                    Code Review Automation with Language Models
                 </a>
             </li>
         </ul>
@@ -164,6 +164,7 @@
 <li class="toctree-l1 current active"><a class="current reference internal" href="#">Collecting Code Review Data</a></li>
 <li class="toctree-l1"><a class="reference internal" href="2_inference.html">CodeReviewer Model Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="3_evaluation.html">Predictions Evaluation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../docs/conclusion.html">Conclusion</a></li>
 </ul>
 
     </div>
@@ -207,7 +208,7 @@
 <div class="article-header-buttons">
 
 
-<a href="https://github.com/waleko/CodeReviewer-ML-Performance" target="_blank"
+<a href="https://github.com/waleko/Code-Review-Automation-LM" target="_blank"
    class="btn btn-sm btn-source-repository-button"
    title="Source repository"
    data-bs-placement="bottom" data-bs-toggle="tooltip"
@@ -422,7 +423,7 @@ <h1>Collecting Code Review Data<a class="headerlink" href="#collecting-code-revi
 <li><p><a class="reference external" href="https://github.com/JetBrains/kotlin">JetBrains/kotlin</a></p></li>
 <li><p><a class="reference external" href="https://github.com/transloadit/uppy">transloadit/uppy</a></p></li>
 </ul>
-<p>I have chosen these repositories because they are popular, and they have a large number of pull requests with code review comments. The authors of <span id="id1">[<a class="reference internal" href="../docs/intro.html#id2" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span> have also used similar criteria to select repositories for their study.</p>
+<p>I have chosen these repositories because they are popular, and they have a large number of pull requests with code review comments. The authors of <span id="id1">[<a class="reference internal" href="../docs/intro.html#id3" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span> have also used similar criteria to select repositories for their study.</p>
 <p>The data will be saved to the <code class="docutils literal notranslate"><span class="pre">data</span></code> folder.</p>
 <div class="cell docutils container">
 <div class="cell_input docutils container">
@@ -441,7 +442,7 @@ <h1>Collecting Code Review Data<a class="headerlink" href="#collecting-code-revi
 <div class="cell_output docutils container">
 <script type="application/vnd.jupyter.widget-view+json">{"version_major": 2, "version_minor": 0, "model_id": "9eb04ed386bd42408400c9e678d7dc03"}</script><script type="application/vnd.jupyter.widget-view+json">{"version_major": 2, "version_minor": 0, "model_id": "a5d97e03b80241c1b3bed5f92fcc2b04"}</script><script type="application/vnd.jupyter.widget-view+json">{"version_major": 2, "version_minor": 0, "model_id": "febb271ebb0f4e779e2703be84355c2a"}</script></div>
 </div>
-<p>Additionally, we will be using the test data from <span id="id2">[<a class="reference internal" href="../docs/intro.html#id2" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span> and their <a class="reference external" href="https://zenodo.org/record/6900648/preview/Comment_Generation.zip">dataset on zenodo</a>. This dataset is available at <code class="docutils literal notranslate"><span class="pre">data/msg-test.csv</span></code>.</p>
+<p>Additionally, we will be using the test data from <span id="id2">[<a class="reference internal" href="../docs/intro.html#id3" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span> and their <a class="reference external" href="https://zenodo.org/record/6900648/preview/Comment_Generation.zip">dataset on zenodo</a>. This dataset is available at <code class="docutils literal notranslate"><span class="pre">data/msg-test.csv</span></code>.</p>
 </section>
 
     <script type="text/x-thebe-config">
@@ -480,7 +481,7 @@ <h1>Collecting Code Review Data<a class="headerlink" href="#collecting-code-revi
       <i class="fa-solid fa-angle-left"></i>
       <div class="prev-next-info">
         <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">CodeReviewer ML Performance</p>
+        <p class="prev-next-title">Code Review Automation with Language Models</p>
       </div>
     </a>
     <a class="right-next"
diff --git a/notebooks/2_inference.html b/notebooks/2_inference.html
index 53b45db..2c4432f 100644
--- a/notebooks/2_inference.html
+++ b/notebooks/2_inference.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
 
-    <title>CodeReviewer Model Inference &#8212; CodeReviewer ML Performance</title>
+    <title>CodeReviewer Model Inference &#8212; Code Review Automation with Language Models</title>
   
   
   
@@ -147,7 +147,7 @@
   
   
   
-    <p class="title logo__title">CodeReviewer ML Performance</p>
+    <p class="title logo__title">Code Review Automation with Language Models</p>
   
 </a></div>
         <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
@@ -156,7 +156,7 @@
         <ul class="nav bd-sidenav bd-sidenav__home-link">
             <li class="toctree-l1">
                 <a class="reference internal" href="../docs/intro.html">
-                    CodeReviewer ML Performance
+                    Code Review Automation with Language Models
                 </a>
             </li>
         </ul>
@@ -164,6 +164,7 @@
 <li class="toctree-l1"><a class="reference internal" href="1_collect_reviews.html">Collecting Code Review Data</a></li>
 <li class="toctree-l1 current active"><a class="current reference internal" href="#">CodeReviewer Model Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="3_evaluation.html">Predictions Evaluation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../docs/conclusion.html">Conclusion</a></li>
 </ul>
 
     </div>
@@ -207,7 +208,7 @@
 <div class="article-header-buttons">
 
 
-<a href="https://github.com/waleko/CodeReviewer-ML-Performance" target="_blank"
+<a href="https://github.com/waleko/Code-Review-Automation-LM" target="_blank"
    class="btn btn-sm btn-source-repository-button"
    title="Source repository"
    data-bs-placement="bottom" data-bs-toggle="tooltip"
@@ -346,7 +347,7 @@ <h2> Contents </h2>
                   
   <section class="tex2jax_ignore mathjax_ignore" id="codereviewer-model-inference">
 <h1>CodeReviewer Model Inference<a class="headerlink" href="#codereviewer-model-inference" title="Permalink to this heading">#</a></h1>
-<p>Let’s generate code reviews using <code class="docutils literal notranslate"><span class="pre">microsoft/codereviewer</span></code> model <span id="id1">[<a class="reference internal" href="../docs/intro.html#id2" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span>.</p>
+<p>Let’s generate code reviews using <code class="docutils literal notranslate"><span class="pre">microsoft/codereviewer</span></code> model <span id="id1">[<a class="reference internal" href="../docs/intro.html#id3" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span>.</p>
 <div class="cell docutils container">
 <div class="cell_input docutils container">
 <div class="highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
@@ -365,7 +366,7 @@ <h1>CodeReviewer Model Inference<a class="headerlink" href="#codereviewer-model-
 </div>
 <section id="tokenizers-and-datasets">
 <h2>1 Tokenizers and Datasets<a class="headerlink" href="#tokenizers-and-datasets" title="Permalink to this heading">#</a></h2>
-<p>P.S. Enormous thanks to the authors of <span id="id2">[<a class="reference internal" href="../docs/intro.html#id4" title="CodeBERT CodeReviewer - a Hugging Face Space by p4vv37. URL: https://huggingface.co/spaces/p4vv37/CodeBERT_CodeReviewer (visited on 2023-09-13).">p4v</a>]</span> for providing open-source for working with the tokenizer and the dataset.</p>
+<p>P.S. Enormous thanks to the authors of <span id="id2">[<a class="reference internal" href="../docs/intro.html#id5" title="CodeBERT CodeReviewer - a Hugging Face Space by p4vv37. URL: https://huggingface.co/spaces/p4vv37/CodeBERT_CodeReviewer (visited on 2023-09-13).">p4v</a>]</span> for providing open-source for working with the tokenizer and the dataset.</p>
 <div class="cell docutils container">
 <div class="cell_input docutils container">
 <div class="highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># download tokenizer from huggingface</span>
@@ -419,7 +420,7 @@ <h2>2 Load data<a class="headerlink" href="#load-data" title="Permalink to this
 <h2>3 Predict<a class="headerlink" href="#predict" title="Permalink to this heading">#</a></h2>
 <p>Now we can generate code reviews for each project. We will use two models:</p>
 <ul class="simple">
-<li><p>Pre-trained model from HuggingFace provided by the authors of <span id="id3">[<a class="reference internal" href="../docs/intro.html#id2" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span></p></li>
+<li><p>Pre-trained model from HuggingFace provided by the authors of <span id="id3">[<a class="reference internal" href="../docs/intro.html#id3" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span></p></li>
 <li><p>Fine-tuned model on the CodeReviewer dataset</p></li>
 </ul>
 <section id="predict-function">
@@ -483,7 +484,7 @@ <h3>HuggingFace pre-trained checkpoint<a class="headerlink" href="#huggingface-p
 </section>
 <section id="fine-tuned-codereviewer">
 <h3>Fine-tuned CodeReviewer<a class="headerlink" href="#fine-tuned-codereviewer" title="Permalink to this heading">#</a></h3>
-<p>I fine-tuned the model on the CodeReviewer dataset on the <code class="docutils literal notranslate"><span class="pre">msg</span></code> task using the <a class="reference external" href="https://github.com/microsoft/CodeBERT/tree/master/CodeReviewer#3-finetuneinference">instructions</a> from the authors of <span id="id4">[<a class="reference internal" href="../docs/intro.html#id2" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span>.</p>
+<p>I fine-tuned the model on the CodeReviewer dataset on the <code class="docutils literal notranslate"><span class="pre">msg</span></code> task using the <a class="reference external" href="https://github.com/microsoft/CodeBERT/tree/master/CodeReviewer#3-finetuneinference">instructions</a> from the authors of <span id="id4">[<a class="reference internal" href="../docs/intro.html#id3" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span>.</p>
 <p>For the fine-tuning I used the following parameters:</p>
 <ul class="simple">
 <li><p><code class="docutils literal notranslate"><span class="pre">batch_size=16</span></code></p></li>
diff --git a/notebooks/3_evaluation.html b/notebooks/3_evaluation.html
index 8c4ba67..e4cbbeb 100644
--- a/notebooks/3_evaluation.html
+++ b/notebooks/3_evaluation.html
@@ -9,7 +9,7 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
 
-    <title>Predictions Evaluation &#8212; CodeReviewer ML Performance</title>
+    <title>Predictions Evaluation &#8212; Code Review Automation with Language Models</title>
   
   
   
@@ -65,7 +65,7 @@
     <script>DOCUMENTATION_OPTIONS.pagename = 'notebooks/3_evaluation';</script>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
-    <link rel="next" title="&lt;no title&gt;" href="../docs/conclusion.html" />
+    <link rel="next" title="Conclusion" href="../docs/conclusion.html" />
     <link rel="prev" title="CodeReviewer Model Inference" href="2_inference.html" />
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
@@ -147,7 +147,7 @@
   
   
   
-    <p class="title logo__title">CodeReviewer ML Performance</p>
+    <p class="title logo__title">Code Review Automation with Language Models</p>
   
 </a></div>
         <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
@@ -156,7 +156,7 @@
         <ul class="nav bd-sidenav bd-sidenav__home-link">
             <li class="toctree-l1">
                 <a class="reference internal" href="../docs/intro.html">
-                    CodeReviewer ML Performance
+                    Code Review Automation with Language Models
                 </a>
             </li>
         </ul>
@@ -164,6 +164,7 @@
 <li class="toctree-l1"><a class="reference internal" href="1_collect_reviews.html">Collecting Code Review Data</a></li>
 <li class="toctree-l1"><a class="reference internal" href="2_inference.html">CodeReviewer Model Inference</a></li>
 <li class="toctree-l1 current active"><a class="current reference internal" href="#">Predictions Evaluation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../docs/conclusion.html">Conclusion</a></li>
 </ul>
 
     </div>
@@ -207,7 +208,7 @@
 <div class="article-header-buttons">
 
 
-<a href="https://github.com/waleko/CodeReviewer-ML-Performance" target="_blank"
+<a href="https://github.com/waleko/Code-Review-Automation-LM" target="_blank"
    class="btn btn-sm btn-source-repository-button"
    title="Source repository"
    data-bs-placement="bottom" data-bs-toggle="tooltip"
@@ -815,7 +816,7 @@ <h2>Quantitative Evaluation<a class="headerlink" href="#quantitative-evaluation"
 </div>
 </div>
 <p>As we can see, the fine-tuned model performs slightly better than the HF model on all datasets.</p>
-<p>Nevertheless, the score is still pretty low (as the authors of <span id="id1">[<a class="reference internal" href="../docs/intro.html#id2" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span> put it: “it is a hard task”).</p>
+<p>Nevertheless, the score is still pretty low (as the authors of <span id="id1">[<a class="reference internal" href="../docs/intro.html#id3" title="Zhiyu Li, Shuai Lu, Daya Guo, Nan Duan, Shailesh Jannu, Grant Jenks, Deep Majumder, Jared Green, Alexey Svyatkovskiy, Shengyu Fu, and others. Codereviewer: pre-training for automating code review activities. arXiv preprint arXiv:2203.09095, 2022.">LLG+22</a>]</span> put it: “it is a hard task”).</p>
 </section>
 </section>
 
@@ -863,7 +864,7 @@ <h2>Quantitative Evaluation<a class="headerlink" href="#quantitative-evaluation"
        title="next page">
       <div class="prev-next-info">
         <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">&lt;no title&gt;</p>
+        <p class="prev-next-title">Conclusion</p>
       </div>
       <i class="fa-solid fa-angle-right"></i>
     </a>
diff --git a/objects.inv b/objects.inv
index 089da45..446caaf 100644
Binary files a/objects.inv and b/objects.inv differ
diff --git a/search.html b/search.html
index b026459..1999cd8 100644
--- a/search.html
+++ b/search.html
@@ -7,7 +7,7 @@
 
   <head>
     <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><title>Search - CodeReviewer ML Performance</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><title>Search - Code Review Automation with Language Models</title>
   
   
   
@@ -146,7 +146,7 @@
   
   
   
-    <p class="title logo__title">CodeReviewer ML Performance</p>
+    <p class="title logo__title">Code Review Automation with Language Models</p>
   
 </a></div>
         <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
@@ -155,7 +155,7 @@
         <ul class="nav bd-sidenav bd-sidenav__home-link">
             <li class="toctree-l1">
                 <a class="reference internal" href="docs/intro.html">
-                    CodeReviewer ML Performance
+                    Code Review Automation with Language Models
                 </a>
             </li>
         </ul>
@@ -163,6 +163,7 @@
 <li class="toctree-l1"><a class="reference internal" href="notebooks/1_collect_reviews.html">Collecting Code Review Data</a></li>
 <li class="toctree-l1"><a class="reference internal" href="notebooks/2_inference.html">CodeReviewer Model Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="notebooks/3_evaluation.html">Predictions Evaluation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="docs/conclusion.html">Conclusion</a></li>
 </ul>
 
     </div>
@@ -206,7 +207,7 @@
 <div class="article-header-buttons">
 
 
-<a href="https://github.com/waleko/CodeReviewer-ML-Performance" target="_blank"
+<a href="https://github.com/waleko/Code-Review-Automation-LM" target="_blank"
    class="btn btn-sm btn-source-repository-button"
    title="Source repository"
    data-bs-placement="bottom" data-bs-toggle="tooltip"
diff --git a/searchindex.js b/searchindex.js
index 6685ffb..6bb713e 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["README", "docs/conclusion", "docs/intro", "notebooks/1_collect_reviews", "notebooks/2_inference", "notebooks/3_evaluation"], "filenames": ["README.md", "docs/conclusion.md", "docs/intro.md", "notebooks/1_collect_reviews.ipynb", "notebooks/2_inference.ipynb", "notebooks/3_evaluation.ipynb"], "titles": ["CodeReviewer ML Performance", "&lt;no title&gt;", "CodeReviewer ML Performance", "Collecting Code Review Data", "CodeReviewer Model Inference", "Predictions Evaluation"], "terms": {"thi": [2, 3, 4, 5], "small": 2, "sampl": [2, 5], "book": 2, "give": 2, "you": [2, 3, 4, 5], "feel": [2, 5], "how": [2, 5], "structur": 2, "It": [2, 5], "show": [2, 5], "off": 2, "few": 2, "major": 2, "file": [2, 3, 5], "type": [2, 5], "well": 2, "some": [2, 4, 5], "doe": [2, 5], "go": 2, "depth": 2, "ani": [2, 5], "particular": 2, "topic": 2, "check": 2, "out": 2, "jupyt": [2, 3], "document": 2, "more": [2, 5], "inform": [2, 5], "page": 2, "bundl": 2, "see": [2, 5], "collect": 2, "code": [2, 4, 5], "review": [2, 4, 5], "data": [2, 5], "model": [2, 5], "infer": 2, "predict": 2, "evalu": 2, "p4v": [2, 4], "codebert": 2, "hug": 2, "face": 2, "space": [2, 5], "p4vv37": 2, "url": 2, "http": [2, 4, 5], "huggingfac": 2, "co": [2, 4], "codebert_codereview": 2, "visit": 2, "2023": 2, "09": 2, "13": [2, 5], "llg": [2, 3, 4, 5], "22": [2, 3, 4, 5], "zhiyu": 2, "li": 2, "shuai": 2, "lu": 2, "daya": 2, "guo": 2, "nan": [2, 5], "duan": 2, "shailesh": 2, "jannu": 2, "grant": 2, "jenk": 2, "deep": 2, "majumd": 2, "jare": 2, "green": 2, "alexei": 2, "svyatkovskii": 2, "shengyu": 2, "fu": 2, "other": [2, 3, 5], "pre": 2, "train": 2, "autom": 2, "activ": 2, "arxiv": 2, "preprint": 2, "2203": 2, "09095": 2, "2022": 2, "In": [3, 5], "notebook": 3, "we": [3, 4, 5], "from": [3, 4, 5], "github": [3, 5], "us": [3, 4, 5], "pygithub": 3, "librari": 3, "interact": 3, "api": 3, "getpass": 3, "import": [3, 4, 5], "auth": 3, "panda": [3, 4, 5], "pd": [3, 4, 5], "tqdm": [3, 4], "autonotebook": [3, 4], "c": [3, 5], "user": [3, 5], "akovr": 3, "appdata": 3, "local": 3, "temp": 3, "ipykernel_15472": 3, "323726258": 3, "py": 3, "5": [3, 4, 5], "tqdmexperimentalwarn": 3, "mode": 3, "instead": [3, 5], "forc": 3, "consol": 3, "e": [3, 4], "g": [3, 4, 5], "although": 3, "can": [3, 4, 5], "without": 3, "authent": [3, 5], "need": [3, 5], "increas": 3, "rate": 3, "limit": 3, "access": [3, 5], "token": 3, "enter": 3, "below": [3, 5], "If": 3, "do": [3, 5], "run": [3, 5], "60": 3, "request": [3, 5], "per": 3, "hour": [3, 4], "your": 3, "els": 3, "warn": 3, "possibl": 3, "next": 3, "defin": [3, 5], "function": [3, 5], "repositori": 3, "def": [3, 4, 5], "collect_review": 3, "repo_nam": 3, "str": 3, "num_com": 3, "int": 3, "1000": [3, 5], "skip_author": 3, "true": [3, 4, 5], "allow_thread": 3, "fals": [3, 4, 5], "save": 3, "max_length": [3, 4], "512": [3, 4], "crawl": 3, "repo": 3, "param": 3, "name": [3, 5], "format": 3, "owner": 3, "number": [3, 5], "comment": [3, 5], "load": 3, "skip": 3, "made": [3, 4], "author": [3, 4, 5], "pull": 3, "allow": [3, 5], "ar": [3, 4, 5], "repli": 3, "csv": [3, 4, 5], "maximum": 3, "length": 3, "diff": 3, "hunk": 3, "return": [3, 4, 5], "datafram": [3, 4, 5], "column": 3, "diff_hunk": [3, 4], "human_review": [3, 4], "created_at": 3, "count": 3, "set": [3, 5], "get_repo": 3, "comment_pag": 3, "get_pulls_review_com": 3, "iter": 3, "over": 3, "progress_bar": 3, "total": [3, 5], "len": [3, 4, 5], "have": [3, 4], "enough": 3, "stop": 3, "break": 3, "alreadi": 3, "continu": 3, "too": 3, "long": [3, 4], "get": [3, 5], "commit": 3, "commit_author": 3, "get_git_commit": 3, "commit_id": 3, "add": [3, 4, 5], "along": 3, "ground": 3, "truth": 3, "append": [3, 4], "bodi": [3, 5], "updat": [3, 5], "1": [3, 5], "df": [3, 4, 5], "remov": [3, 5], "keep": [3, 5], "first": [3, 5], "loc": 3, "groupbi": 3, "idxmin": 3, "to_csv": [3, 4], "f": [3, 5], "replac": [3, 5], "_": [3, 5], "final": [3, 5], "follow": [3, 4, 5], "microsoft": [3, 4], "vscode": [3, 5], "jetbrain": [3, 5], "kotlin": [3, 5], "transloadit": 3, "uppi": [3, 5], "i": [3, 4, 5], "chosen": 3, "becaus": 3, "thei": [3, 5], "popular": 3, "larg": 3, "The": [3, 4, 5], "also": [3, 5], "similar": [3, 5], "criteria": 3, "select": 3, "studi": 3, "folder": 3, "addition": 3, "test": [3, 4, 5], "dataset": [3, 5], "zenodo": 3, "avail": [3, 4], "msg": [3, 4, 5], "let": 4, "s": [4, 5], "gener": [4, 5], "pathlib": [4, 5], "path": [4, 5], "numpi": [4, 5], "np": [4, 5], "torch": 4, "util": [4, 5], "dataload": 4, "transform": 4, "autotoken": 4, "automodelforseq2seqlm": 4, "p": 4, "enorm": 4, "thank": 4, "provid": [4, 5], "open": 4, "sourc": 4, "work": [4, 5], "download": 4, "from_pretrain": 4, "requir": [4, 5], "special": 4, "process_token": 4, "class": [4, 5], "reviewsdataset": 4, "__init__": 4, "self": 4, "y": 4, "x": 4, "tensor": 4, "appli": 4, "lambda": 4, "row": 4, "encode_diff": 4, "axi": 4, "dtype": 4, "cpu": 4, "__len__": 4, "__getitem__": 4, "idx": 4, "here": [4, 5], "creat": 4, "each": [4, 5], "project": [4, 5], "filenam": 4, "jetbrains_kotlin_1000": [4, 5], "microsoft_vscode_1000": [4, 5], "transloadit_uppy_1000": [4, 5], "read_csv": [4, 5], "batch_siz": 4, "16": [4, 5], "shuffl": 4, "6": [4, 5], "8gb": 4, "gpu": 4, "now": [4, 5], "two": 4, "devic": 4, "cuda": 4, "eval": 4, "result": [4, 5], "inputs_mask": 4, "ne": 4, "pad_id": 4, "pred": [4, 5], "attention_mask": 4, "use_cach": 4, "num_beam": 4, "early_stop": 4, "num_return_sequ": 4, "decod": 4, "preds_np": 4, "detach": 4, "preds_decod": 4, "skip_special_token": 4, "clean_up_tokenization_spac": 4, "hub": 4, "pretrain": 4, "hf_model": 4, "zip": [4, 5], "df_pred": 4, "target": [4, 5], "with_suffix": [4, 5], "hf_pred": [4, 5], "head": [4, 5], "100": 4, "636": 4, "11": [4, 5], "27": 4, "00": 4, "08": 4, "63": 4, "03": 4, "37": [4, 5], "45": [4, 5], "02": [4, 5], "01": 4, "93": 4, "46": [4, 5], "64": 4, "task": [4, 5], "instruct": 4, "For": [4, 5], "paramet": 4, "learning_r": 4, "3e": 4, "4": [4, 5], "max_source_length": 4, "took": 4, "about": [4, 5], "12": 4, "singl": 4, "nvidia": 4, "geforc": 4, "a100": 4, "wa": [4, 5], "epoch": 4, "waleko": 4, "finetun": 4, "ft_model": 4, "finetuned_pr": [4, 5], "weight": 4, "were": 4, "when": [4, 5], "initi": 4, "t5forconditionalgener": 4, "cls_head": 4, "bia": 4, "IS": 4, "expect": [4, 5], "anoth": 4, "architectur": 4, "bertforsequenceclassif": 4, "bertforpretrain": 4, "NOT": 4, "exactli": [4, 5], "ident": 4, "15": 4, "51": 4, "50": 4, "40": 4, "59": 4, "32": [4, 5], "48": 4, "26": 4, "38": 4, "smooth_bleu": 5, "bleu_fromstr": 5, "analyze_pr": 5, "base_fil": 5, "sample_s": 5, "read": 5, "hf_preds_fil": 5, "fine_tuned_fil": 5, "fine_tun": 5, "put": 5, "fine_tuned_pr": 5, "regex": 5, "print": 5, "random_st": 5, "42": 5, "to_numpi": 5, "human": 5, "hf": 5, "fine": 5, "tune": 5, "calc_bleu": 5, "ref": 5, "list": 5, "rang": 5, "char": 5, "join": 5, "split": 5, "rmstop": 5, "calc_bleu_scor": 5, "ft_pred": 5, "inplac": 5, "hf_bleu": 5, "ft_bleu": 5, "bleu": 5, "compar": 5, "four": 5, "7": 5, "frozen_string_liter": 5, "hocon": 5, "bolt": 5, "error": 5, "transportconfig": 5, "attr_accessor": 5, "host": 5, "port": 5, "ssl_cert": 5, "ssl_kei": 5, "ssl_ca_cert": 5, "ssl_cipher_suit": 5, "look": 5, "like": 5, "isn": 5, "t": 5, "why": 5, "am": 5, "surpris": 5, "don": 5, "92": 5, "public": 5, "oauth2authorizedclientargumentresolv": 5, "implement": 5, "handlermeth": 5, "clientregistrationid": 5, "flatmap": 5, "id": 5, "mono": 5, "new": 5, "illegalstateexcept": 5, "unabl": 5, "resolv": 5, "client": 5, "registr": 5, "identifi": 5, "an": 5, "unauthent": 5, "session": 5, "To": 5, "ensur": 5, "serverhttpsecur": 5, "anonym": 5, "configur": 5, "string": 5, "registrationid": 5, "gett1": 5, "usernam": 5, "gett2": 5, "think": 5, "should": 5, "chang": 5, "sinc": 5, "reactiv": 5, "side": 5, "support": 5, "messag": 5, "clear": 5, "me": 5, "3": 5, "const": 5, "share": 5, "assert": 5, "setupdatabas": 5, "script": 5, "vm": 5, "chai": 5, "normalizedfunctionstr": 5, "bson": 5, "lib": 5, "parser": 5, "buffer": 5, "safe": 5, "free": 5, "edit": 5, "section": 5, "introduc": 5, "modern": 5, "featur": 5, "object": 5, "destructur": 5, "what": 5, "whitespac": 5, "0": 5, "17": 5, "php": 5, "copyright": 5, "bold": 5, "brand": 5, "commerc": 5, "sp": 5, "z": 5, "o": 5, "all": 5, "right": 5, "reserv": 5, "licens": 5, "txt": 5, "detail": 5, "declar": 5, "strict_typ": 5, "namespac": 5, "ergonod": 5, "core": 5, "infrastructur": 5, "except": 5, "serializationexcept": 5, "extend": 5, "serializerexcept": 5, "__construct": 5, "throwabl": 5, "previou": 5, "null": 5, "parent": 5, "been": 5, "sharedkernel": 5, "modul": 5, "miss": 5, "header": 5, "separ": 5, "seem": 5, "veri": 5, "specif": 5, "elgg": 5, "473": 5, "describ": 5, "gridf": 5, "stream": 5, "fail": 5, "tri": 5, "abort": 5, "uploadstream": 5, "tostr": 5, "equal": 5, "todo": 5, "node": 5, "3405": 5, "mongostreamclosederror": 5, "mongodrivererror": 5, "cannot": 5, "call": 5, "twice": 5, "close": 5, "done": 5, "ve": 5, "depend": 5, "Is": 5, "still": 5, "relev": 5, "problem": 5, "340": 5, "341": 5, "export": 5, "debugeditorcontribut": 5, "idebugeditorcontribut": 5, "runonceschedul": 5, "hoverwidget": 5, "hide": 5, "hover_delai": 5, "memoiz": 5, "privat": 5, "providenondebughoverschedul": 5, "where": 5, "dispos": 5, "runnabl": 5, "53": 5, "sasspars": 5, "cssparser": 5, "sass": 5, "variabl": 5, "font": 5, "size": 5, "12px": 5, "_parsevariabledeclar": 5, "panic": 5, "scanner": 5, "tokentyp": 5, "variabledeclar": 5, "var": 5, "cssvariabledeclar": 5, "super": 5, "_parsecssvariabledeclar": 5, "That": 5, "wrong": 5, "Not": 5, "place": 5, "suit": 5, "css": 5, "nit": 5, "after": 5, "minor": 5, "style": 5, "issu": 5, "befor": 5, "537": 5, "23": 5, "interfac": 5, "completionlist": 5, "item": 5, "completionitem": 5, "contain": 5, "addit": 5, "context": 5, "which": 5, "complet": 5, "completionitemprovid": 5, "providecompletionitem": 5, "trigger": 5, "completioncontext": 5, "manual": 5, "invoc": 5, "24x7": 5, "intellisens": 5, "incomplet": 5, "just": 5, "reason": 5, "439": 5, "441": 5, "debugservic": 5, "debug": 5, "idebugservic": 5, "debounceddisplaythread": 5, "rawdebugsess": 5, "timer": 5, "displaythreadstim": 5, "getid": 5, "cleartimeout": 5, "settimeout": 5, "fetchthread": 5, "undefin": 5, "onunexpectederror": 5, "make": 5, "m": 5, "sure": 5, "best": 5, "wai": 5, "would": 5, "better": 5, "differ": 5, "between": 5, "265": 5, "266": 5, "33": 5, "mddocumentcontentprovid": 5, "textdocumentcontentprovid": 5, "300": 5, "documentheadingsprovid": 5, "documentsymbolprovid": 5, "daringfirebal": 5, "net": 5, "markdown": 5, "syntax": 5, "static": 5, "_atxpattern": 5, "_settext": 5, "pick": 5, "up": 5, "_atxregex": 5, "directli": 5, "symbolprovid": 5, "24": 5, "10": 5, "abstract": 5, "abstractirlinenumbertest": 5, "abstractlinenumbertest": 5, "overrid": 5, "fun": 5, "comparecustom": 5, "psifil": 5, "ktfile": 5, "wholefil": 5, "val": 5, "filetext": 5, "text": 5, "expectedlinenumb": 5, "normal": 5, "substr": 5, "indexof": 5, "2": 5, "trim": 5, "map": 5, "tomutablelist": 5, "n": 5, "toregex": 5, "filter": 5, "line": 5, "simplifi": 5, "d": 5, "find": 5, "start": 5, "Then": 5, "digit": 5, "exclud": 5, "consist": 5, "old": 5, "backend": 5, "did": 5, "org": 5, "kotlintyp": 5, "typeutil": 5, "isprimitivenumbertyp": 5, "makenotnul": 5, "makenul": 5, "java": 5, "lang": 5, "assertionerror": 5, "pl": 5, "pr": 5, "kotlinconst": 5, "abstractchecklocalvariablestabletest": 5, "codegentestcas": 5, "assertnotnul": 5, "couldn": 5, "pattern": 5, "classfileregex": 5, "pathsstr": 5, "outputfil": 5, "actuallocalvari": 5, "readlocalvari": 5, "classread": 5, "asbytearrai": 5, "methodnam": 5, "checklocalvariabletyp": 5, "could": 5, "reus": 5, "reader": 5, "assertequ": 5, "expectedlocalvari": 5, "purpos": 5, "ignorecas": 5, "boolean": 5, "charact": 5, "unicod": 5, "surrog": 5, "unit": 5, "issurrog": 5, "min_surrog": 5, "max_surrog": 5, "minimum": 5, "valu": 5, "href": 5, "doc": 5, "oracl": 5, "com": 5, "javas": 5, "tutori": 5, "nutsandbolt": 5, "datatyp": 5, "html": 5, "link": 5, "tag": 5, "anywai": 5, "pleas": 5, "175": 5, "195": 5, "9": 5, "expressioncodegen": 5, "visitblockbodi": 5, "irblockbodi": 5, "blockinfo": 5, "stackvalu": 5, "statement": 5, "fold": 5, "none": 5, "exp": 5, "accept": 5, "irexpress": 5, "markendofstatementifneed": 5, "unrel": 5, "doesn": 5, "blockinforesolv": 5, "28": 5, "thumbnail": 5, "29": 5, "classnam": 5, "cuid": 5, "drag": 5, "drop": 5, "uuid": 5, "packag": 5, "actual": 5, "gonna": 5, "suggest": 5, "switch": 5, "ai": 5, "nanoid": 5, "everywher": 5, "smaller": 5, "fix": 5, "version": 5, "49": 5, "8": 5, "tab": 5, "compon": 5, "tabindex": 5, "multipl": 5, "prop": 5, "maxnumberoffil": 5, "same": 5, "shouldnt": 5, "onli": 5, "496": 5, "494": 5, "25": 5, "providerview": 5, "listallfil": 5, "promis": 5, "re": 5, "catch": 5, "otherwis": 5, "happen": 5, "unnecessari": 5, "422": 5, "handleauth": 5, "authstat": 5, "btoa": 5, "json": 5, "stringifi": 5, "origin": 5, "locat": 5, "redirect": 5, "localhost": 5, "authurl": 5, "state": 5, "protocol": 5, "hardcod": 5, "14": 5, "echo": 5, "prepar": 5, "end": 5, "copi": 5, "build": 5, "js": 5, "rm": 5, "rf": 5, "endtoend": 5, "dist": 5, "mkdir": 5, "cp": 5, "min": 5, "src": 5, "index": 5, "node_modul": 5, "uppy_vers": 5, "cat": 5, "grep": 5, "awk": 5, "sed": 5, "tr": 5, "archiv": 5, "npm": 5, "pack": 5, "prepublishonli": 5, "so": 5, "publish": 5, "one": 5, "thought": 5, "automat": 5, "insid": 5, "want": 5, "upgrad": 5, "newer": 5, "As": 5, "pretti": 5, "mediocr": 5, "understand": 5, "But": 5, "tend": 5, "produc": 5, "while": 5, "both": 5, "someth": 5, "sensibl": 5, "struggl": 5, "pin": 5, "point": 5, "calcul": 5, "score": 5, "measur": 5, "higher": 5, "10169": 5, "34": 5, "39": 5, "55": 5, "84": 5, "perform": 5, "slightli": 5, "than": 5, "nevertheless": 5, "low": 5, "hard": 5}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"codereview": [0, 2, 4], "ml": [0, 2], "perform": [0, 2], "tabl": 2, "content": 2, "bibliographi": 2, "collect": 3, "code": 3, "review": 3, "data": [3, 4], "model": 4, "infer": 4, "1": 4, "token": 4, "dataset": 4, "2": 4, "load": 4, "3": 4, "predict": [4, 5], "function": 4, "huggingfac": 4, "pre": 4, "train": 4, "checkpoint": 4, "fine": 4, "tune": 4, "evalu": 5, "qualit": 5, "quantit": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}})
\ No newline at end of file
+Search.setIndex({"docnames": ["README", "docs/conclusion", "docs/intro", "notebooks/1_collect_reviews", "notebooks/2_inference", "notebooks/3_evaluation"], "filenames": ["README.md", "docs/conclusion.md", "docs/intro.md", "notebooks/1_collect_reviews.ipynb", "notebooks/2_inference.ipynb", "notebooks/3_evaluation.ipynb"], "titles": ["Code Review Automation with Language Models", "Conclusion", "Code Review Automation with Language Models", "Collecting Code Review Data", "CodeReviewer Model Inference", "Predictions Evaluation"], "terms": {"crucial": 0, "aspect": [0, 1], "softwar": 0, "develop": [0, 1], "process": [0, 1, 2], "ensur": [0, 2, 5], "chang": [0, 5], "ar": [0, 2, 3, 4, 5], "thoroughli": 0, "examin": 0, "qualiti": [0, 1, 2], "secur": 0, "adher": 0, "standard": 0, "howev": 0, "can": [0, 1, 3, 4, 5], "time": 0, "consum": 0, "human": [0, 1, 2, 5], "mai": [0, 1], "overlook": 0, "certain": 0, "issu": [0, 5], "To": [0, 5], "address": 0, "challeng": [0, 1], "we": [0, 1, 2, 3, 4, 5], "have": [0, 1, 3, 4], "system": 0, "power": 0, "our": [0, 1, 2], "leverag": [0, 2], "state": [0, 5], "art": 0, "gener": [0, 1, 2, 4, 5], "automat": [0, 5], "These": 0, "train": [0, 1, 2], "vast": 0, "corpu": 0, "provid": [0, 1, 2, 4, 5], "insight": [0, 1, 2], "feedback": [0, 1], "By": 0, "part": 0, "aim": [0, 2], "speed": 0, "up": [0, 5], "identifi": [0, 5], "common": 0, "recommend": 0, "assist": [0, 1], "produc": [0, 1, 5], "higher": [0, 1, 5], "from": [0, 2, 3, 4, 5], "popular": [0, 2, 3], "github": [0, 2, 3, 5], "repositori": [0, 2, 3], "thi": [0, 1, 2, 3, 4, 5], "includ": 0, "associ": 0, "author": [0, 2, 3, 4, 5], "learn": 0, "contextu": 0, "relev": [0, 1, 5], "us": [0, 2, 3, 4, 5], "pre": [0, 1, 2], "them": 0, "dataset": [0, 1, 2, 3, 5], "allow": [0, 2, 3, 5], "special": [0, 2, 4], "make": [0, 2, 5], "more": [0, 1, 2, 5], "effect": [0, 1, 2], "task": [0, 4, 5], "onc": 0, "thei": [0, 1, 3, 5], "new": [0, 5], "highlight": 0, "potenti": [0, 1, 2], "suggest": [0, 5], "improv": [0, 1, 2], "bleu": [0, 2, 5], "4": [0, 2, 4, 5], "score": [0, 2, 5], "assess": [0, 2], "measur": [0, 5], "similar": [0, 3, 5], "between": [0, 5], "target": [0, 4, 5], "while": [0, 1, 5], "valuabl": [0, 1], "design": 0, "complement": 0, "follow": [0, 3, 4, 5], "step": 0, "clone": 0, "your": [0, 3], "local": [0, 3], "machin": 0, "git": 0, "http": [0, 1, 2, 4, 5], "com": [0, 5], "waleko": [0, 4], "lm": 0, "cd": 0, "set": [0, 3, 5], "requir": [0, 4, 5], "depend": [0, 5], "environ": 0, "see": [0, 5], "txt": [0, 5], "run": [0, 3, 5], "notebook": [0, 1, 2, 3], "explor": [0, 1, 2], "integr": 0, "workflow": 0, "you": [0, 3, 4, 5], "specif": [0, 1, 5], "codebas": 0, "even": 0, "better": [0, 5], "result": [0, 4, 5], "project": [0, 4, 5], "under": 0, "apach": 0, "0": [0, 5], "file": [0, 3, 5], "detail": [0, 2, 5], "For": [0, 4, 5], "ani": [0, 5], "question": 0, "inquiri": 0, "pleas": [0, 5], "inbox": 0, "alexkovrigin": 0, "me": [0, 5], "In": [1, 2, 3, 5], "code": [1, 4, 5], "review": [1, 4, 5], "data": [1, 5], "collect": 1, "model": [1, 5], "infer": 1, "gain": [1, 2], "capabl": [1, 2], "limit": [1, 2, 3], "languag": 1, "context": [1, 2, 5], "journei": [1, 2], "ha": 1, "encompass": 1, "variou": [1, 2], "each": [1, 2, 4, 5], "focus": [1, 2], "here": [1, 4, 5], "summar": 1, "kei": 1, "find": [1, 5], "implic": 1, "work": [1, 4, 5], "show": [1, 5], "promis": [1, 5], "ampl": 1, "room": 1, "term": 1, "fine": [1, 2, 5], "tune": [1, 2, 5], "approach": 1, "enhanc": 1, "perform": [1, 2, 5], "further": [1, 2], "research": 1, "need": [1, 3, 5], "optim": 1, "techniqu": 1, "should": [1, 5], "view": 1, "complementari": 1, "tool": 1, "rather": 1, "than": [1, 5], "replac": [1, 3, 5], "expertis": 1, "remain": 1, "invalu": 1, "futur": 1, "involv": 1, "advanc": 1, "experi": 1, "differ": [1, 2, 5], "strategi": 1, "incorpor": 1, "user": [1, 3, 5], "refin": 1, "predict": 1, "through": 1, "autom": 1, "augment": 1, "help": [1, 2], "As": [1, 5], "technolog": 1, "continu": [1, 3], "anticip": 1, "excit": 1, "field": 1, "focu": 1, "p4v": [1, 2, 4], "codebert": [1, 2], "codereview": 1, "hug": [1, 2], "face": [1, 2], "space": [1, 2, 5], "p4vv37": [1, 2], "url": [1, 2], "huggingfac": [1, 2], "co": [1, 2, 4], "codebert_codereview": [1, 2], "visit": [1, 2], "2023": [1, 2], "09": [1, 2], "13": [1, 2, 5], "llg": [1, 2, 3, 4, 5], "22": [1, 2, 3, 4, 5], "zhiyu": [1, 2], "li": [1, 2], "shuai": [1, 2], "lu": [1, 2], "daya": [1, 2], "guo": [1, 2], "nan": [1, 2, 5], "duan": [1, 2], "shailesh": [1, 2], "jannu": [1, 2], "grant": [1, 2], "jenk": [1, 2], "deep": [1, 2], "majumd": [1, 2], "jare": [1, 2], "green": [1, 2], "alexei": [1, 2], "svyatkovskii": [1, 2], "shengyu": [1, 2], "fu": [1, 2], "other": [1, 2, 3, 5], "activ": [1, 2], "arxiv": [1, 2], "preprint": [1, 2], "2203": [1, 2], "09095": [1, 2], "2022": [1, 2], "seri": 2, "jupyt": [2, 3], "embark": 2, "primari": 2, "goal": 2, "initi": [2, 4], "dive": 2, "pygithub": [2, 3], "librari": [2, 3], "interact": [2, 3], "api": [2, 3], "seamless": 2, "retriev": 2, "establish": 2, "function": [2, 3, 5], "specifi": 2, "paramet": [2, 4], "number": [2, 3, 5], "comment": [2, 3, 5], "load": [2, 3], "skip": [2, 3], "The": [2, 3, 4, 5], "structur": 2, "panda": [2, 3, 4, 5], "datafram": [2, 3, 4, 5], "analysi": 2, "three": 2, "promin": 2, "name": [2, 3, 5], "microsoft": [2, 3, 4], "vscode": [2, 3, 5], "jetbrain": [2, 3, 5], "kotlin": [2, 3, 5], "transloadit": [2, 3], "uppi": [2, 3, 5], "select": [2, 3], "due": 2, "rich": 2, "histori": 2, "addition": [2, 3], "go": 2, "origin": [2, 5], "msg": [2, 3, 4, 5], "test": [2, 3, 4, 5], "second": 2, "delv": 2, "token": [2, 3], "prepar": [2, 5], "emphas": 2, "import": [2, 3, 4, 5], "A": 2, "custom": 2, "reviewsdataset": [2, 4], "class": [2, 4, 5], "introduc": [2, 5], "facilit": 2, "transform": [2, 4], "compat": 2, "sourc": [2, 4], "creat": [2, 4], "dataload": [2, 4], "instanc": 2, "effici": 2, "input": 2, "emploi": 2, "both": [2, 5], "checkpoint": 2, "outlin": 2, "showcas": 2, "resourc": 2, "save": [2, 3], "across": 2, "shed": 2, "light": 2, "qualit": 2, "conduct": 2, "how": [2, 5], "present": 2, "sampl": [2, 5], "along": [2, 3], "enabl": 2, "visual": 2, "comparison": 2, "understand": [2, 5], "nuanc": 2, "lastli": 2, "quantit": 2, "calcul": [2, 5], "comprehens": 2, "overview": 2, "well": 2, "align": 2, "draw": 2, "conclus": 2, "about": [2, 4, 5], "throughout": 2, "applic": 2, "area": 2, "getpass": 3, "auth": 3, "pd": [3, 4, 5], "tqdm": [3, 4], "autonotebook": [3, 4], "c": [3, 5], "akovr": 3, "appdata": 3, "temp": 3, "ipykernel_15472": 3, "323726258": 3, "py": 3, "5": [3, 4, 5], "tqdmexperimentalwarn": 3, "mode": 3, "instead": [3, 5], "forc": 3, "consol": 3, "e": [3, 4], "g": [3, 4, 5], "although": 3, "without": 3, "authent": [3, 5], "increas": 3, "rate": 3, "access": [3, 5], "enter": 3, "below": [3, 5], "If": 3, "do": [3, 5], "60": 3, "request": [3, 5], "per": 3, "hour": [3, 4], "els": 3, "warn": 3, "possibl": 3, "next": 3, "defin": [3, 5], "def": [3, 4, 5], "collect_review": 3, "repo_nam": 3, "str": 3, "num_com": 3, "int": 3, "1000": [3, 5], "skip_author": 3, "true": [3, 4, 5], "allow_thread": 3, "fals": [3, 4, 5], "max_length": [3, 4], "512": [3, 4], "crawl": 3, "repo": 3, "param": 3, "format": 3, "owner": 3, "made": [3, 4], "pull": 3, "repli": 3, "csv": [3, 4, 5], "maximum": 3, "length": 3, "diff": 3, "hunk": 3, "return": [3, 4, 5], "column": 3, "diff_hunk": [3, 4], "human_review": [3, 4], "created_at": 3, "count": 3, "get_repo": 3, "comment_pag": 3, "get_pulls_review_com": 3, "iter": 3, "over": 3, "progress_bar": 3, "total": [3, 5], "len": [3, 4, 5], "enough": 3, "stop": 3, "break": 3, "alreadi": 3, "too": 3, "long": [3, 4], "get": [3, 5], "commit": 3, "commit_author": 3, "get_git_commit": 3, "commit_id": 3, "add": [3, 4, 5], "ground": 3, "truth": 3, "append": [3, 4], "bodi": [3, 5], "updat": [3, 5], "1": [3, 5], "df": [3, 4, 5], "remov": [3, 5], "keep": [3, 5], "first": [3, 5], "loc": 3, "groupbi": 3, "idxmin": 3, "to_csv": [3, 4], "f": [3, 5], "_": [3, 5], "final": [3, 5], "i": [3, 4, 5], "chosen": 3, "becaus": 3, "larg": 3, "also": [3, 5], "criteria": 3, "studi": 3, "folder": 3, "zenodo": 3, "avail": [3, 4], "let": 4, "s": [4, 5], "pathlib": [4, 5], "path": [4, 5], "numpi": [4, 5], "np": [4, 5], "torch": 4, "util": [4, 5], "autotoken": 4, "automodelforseq2seqlm": 4, "p": 4, "enorm": 4, "thank": 4, "open": 4, "download": 4, "from_pretrain": 4, "process_token": 4, "__init__": 4, "self": 4, "y": 4, "x": 4, "tensor": 4, "appli": 4, "lambda": 4, "row": 4, "encode_diff": 4, "axi": 4, "dtype": 4, "cpu": 4, "__len__": 4, "__getitem__": 4, "idx": 4, "filenam": 4, "jetbrains_kotlin_1000": [4, 5], "microsoft_vscode_1000": [4, 5], "transloadit_uppy_1000": [4, 5], "read_csv": [4, 5], "batch_siz": 4, "16": [4, 5], "shuffl": 4, "6": [4, 5], "8gb": 4, "gpu": 4, "now": [4, 5], "two": 4, "devic": 4, "cuda": 4, "eval": 4, "inputs_mask": 4, "ne": 4, "pad_id": 4, "pred": [4, 5], "attention_mask": 4, "use_cach": 4, "num_beam": 4, "early_stop": 4, "num_return_sequ": 4, "decod": 4, "preds_np": 4, "detach": 4, "preds_decod": 4, "skip_special_token": 4, "clean_up_tokenization_spac": 4, "hub": 4, "pretrain": 4, "hf_model": 4, "zip": [4, 5], "df_pred": 4, "with_suffix": [4, 5], "hf_pred": [4, 5], "head": [4, 5], "100": 4, "636": 4, "11": [4, 5], "27": 4, "00": 4, "08": 4, "63": 4, "03": 4, "37": [4, 5], "45": [4, 5], "02": [4, 5], "01": 4, "93": 4, "46": [4, 5], "64": 4, "instruct": 4, "learning_r": 4, "3e": 4, "max_source_length": 4, "took": 4, "12": 4, "singl": 4, "nvidia": 4, "geforc": 4, "a100": 4, "wa": [4, 5], "epoch": 4, "finetun": 4, "ft_model": 4, "finetuned_pr": [4, 5], "some": [4, 5], "weight": 4, "were": 4, "when": [4, 5], "t5forconditionalgener": 4, "cls_head": 4, "bia": 4, "IS": 4, "expect": [4, 5], "anoth": 4, "architectur": 4, "bertforsequenceclassif": 4, "bertforpretrain": 4, "NOT": 4, "exactli": [4, 5], "ident": 4, "15": 4, "51": 4, "50": 4, "40": 4, "59": 4, "32": [4, 5], "48": 4, "26": 4, "38": 4, "smooth_bleu": 5, "bleu_fromstr": 5, "analyze_pr": 5, "base_fil": 5, "sample_s": 5, "read": 5, "hf_preds_fil": 5, "fine_tuned_fil": 5, "fine_tun": 5, "put": 5, "fine_tuned_pr": 5, "regex": 5, "print": 5, "random_st": 5, "42": 5, "to_numpi": 5, "hf": 5, "calc_bleu": 5, "ref": 5, "list": 5, "rang": 5, "char": 5, "join": 5, "split": 5, "rmstop": 5, "calc_bleu_scor": 5, "ft_pred": 5, "inplac": 5, "hf_bleu": 5, "ft_bleu": 5, "compar": 5, "four": 5, "7": 5, "frozen_string_liter": 5, "hocon": 5, "bolt": 5, "error": 5, "transportconfig": 5, "attr_accessor": 5, "host": 5, "port": 5, "ssl_cert": 5, "ssl_kei": 5, "ssl_ca_cert": 5, "ssl_cipher_suit": 5, "look": 5, "like": 5, "isn": 5, "t": 5, "why": 5, "am": 5, "surpris": 5, "don": 5, "92": 5, "public": 5, "oauth2authorizedclientargumentresolv": 5, "implement": 5, "handlermeth": 5, "clientregistrationid": 5, "flatmap": 5, "id": 5, "mono": 5, "illegalstateexcept": 5, "unabl": 5, "resolv": 5, "client": 5, "registr": 5, "an": 5, "unauthent": 5, "session": 5, "serverhttpsecur": 5, "anonym": 5, "configur": 5, "string": 5, "registrationid": 5, "gett1": 5, "usernam": 5, "gett2": 5, "think": 5, "sinc": 5, "reactiv": 5, "side": 5, "support": 5, "messag": 5, "It": 5, "clear": 5, "3": 5, "const": 5, "share": 5, "assert": 5, "setupdatabas": 5, "script": 5, "vm": 5, "chai": 5, "normalizedfunctionstr": 5, "bson": 5, "lib": 5, "parser": 5, "buffer": 5, "safe": 5, "feel": 5, "free": 5, "edit": 5, "section": 5, "modern": 5, "featur": 5, "object": 5, "destructur": 5, "what": 5, "whitespac": 5, "17": 5, "php": 5, "copyright": 5, "bold": 5, "brand": 5, "commerc": 5, "sp": 5, "z": 5, "o": 5, "all": 5, "right": 5, "reserv": 5, "licens": 5, "declar": 5, "strict_typ": 5, "namespac": 5, "ergonod": 5, "core": 5, "infrastructur": 5, "except": 5, "serializationexcept": 5, "extend": 5, "serializerexcept": 5, "__construct": 5, "throwabl": 5, "previou": 5, "null": 5, "parent": 5, "been": 5, "sharedkernel": 5, "modul": 5, "miss": 5, "header": 5, "separ": 5, "seem": 5, "veri": 5, "elgg": 5, "473": 5, "describ": 5, "gridf": 5, "stream": 5, "fail": 5, "tri": 5, "abort": 5, "uploadstream": 5, "tostr": 5, "equal": 5, "todo": 5, "node": 5, "3405": 5, "mongostreamclosederror": 5, "mongodrivererror": 5, "cannot": 5, "call": 5, "twice": 5, "close": 5, "done": 5, "ve": 5, "Is": 5, "still": 5, "problem": 5, "340": 5, "341": 5, "export": 5, "debugeditorcontribut": 5, "idebugeditorcontribut": 5, "runonceschedul": 5, "hoverwidget": 5, "hide": 5, "hover_delai": 5, "memoiz": 5, "privat": 5, "providenondebughoverschedul": 5, "where": 5, "dispos": 5, "runnabl": 5, "53": 5, "sasspars": 5, "cssparser": 5, "sass": 5, "variabl": 5, "font": 5, "size": 5, "12px": 5, "_parsevariabledeclar": 5, "panic": 5, "scanner": 5, "tokentyp": 5, "variabledeclar": 5, "var": 5, "cssvariabledeclar": 5, "super": 5, "_parsecssvariabledeclar": 5, "That": 5, "wrong": 5, "Not": 5, "place": 5, "suit": 5, "css": 5, "nit": 5, "after": 5, "minor": 5, "style": 5, "befor": 5, "537": 5, "23": 5, "interfac": 5, "completionlist": 5, "item": 5, "completionitem": 5, "contain": 5, "addit": 5, "inform": 5, "which": 5, "complet": 5, "completionitemprovid": 5, "providecompletionitem": 5, "trigger": 5, "completioncontext": 5, "manual": 5, "invoc": 5, "24x7": 5, "intellisens": 5, "incomplet": 5, "just": 5, "reason": 5, "439": 5, "441": 5, "debugservic": 5, "debug": 5, "idebugservic": 5, "debounceddisplaythread": 5, "rawdebugsess": 5, "timer": 5, "displaythreadstim": 5, "getid": 5, "cleartimeout": 5, "settimeout": 5, "fetchthread": 5, "undefin": 5, "onunexpectederror": 5, "m": 5, "sure": 5, "best": 5, "wai": 5, "would": 5, "265": 5, "266": 5, "33": 5, "mddocumentcontentprovid": 5, "textdocumentcontentprovid": 5, "300": 5, "documentheadingsprovid": 5, "documentsymbolprovid": 5, "daringfirebal": 5, "net": 5, "markdown": 5, "syntax": 5, "static": 5, "_atxpattern": 5, "_settext": 5, "doe": 5, "pick": 5, "_atxregex": 5, "directli": 5, "symbolprovid": 5, "24": 5, "10": 5, "abstract": 5, "abstractirlinenumbertest": 5, "abstractlinenumbertest": 5, "overrid": 5, "fun": 5, "comparecustom": 5, "psifil": 5, "ktfile": 5, "wholefil": 5, "val": 5, "filetext": 5, "text": 5, "expectedlinenumb": 5, "normal": 5, "substr": 5, "indexof": 5, "2": 5, "trim": 5, "map": 5, "tomutablelist": 5, "n": 5, "toregex": 5, "filter": 5, "line": 5, "simplifi": 5, "d": 5, "start": 5, "Then": 5, "digit": 5, "exclud": 5, "consist": 5, "old": 5, "backend": 5, "did": 5, "org": 5, "type": 5, "kotlintyp": 5, "typeutil": 5, "isprimitivenumbertyp": 5, "makenotnul": 5, "makenul": 5, "java": 5, "lang": 5, "assertionerror": 5, "pl": 5, "pr": 5, "kotlinconst": 5, "abstractchecklocalvariablestabletest": 5, "codegentestcas": 5, "assertnotnul": 5, "couldn": 5, "pattern": 5, "classfileregex": 5, "pathsstr": 5, "outputfil": 5, "actuallocalvari": 5, "readlocalvari": 5, "classread": 5, "asbytearrai": 5, "methodnam": 5, "checklocalvariabletyp": 5, "could": 5, "reus": 5, "reader": 5, "assertequ": 5, "expectedlocalvari": 5, "purpos": 5, "ignorecas": 5, "boolean": 5, "charact": 5, "unicod": 5, "surrog": 5, "unit": 5, "issurrog": 5, "min_surrog": 5, "max_surrog": 5, "minimum": 5, "valu": 5, "href": 5, "doc": 5, "oracl": 5, "javas": 5, "tutori": 5, "nutsandbolt": 5, "datatyp": 5, "html": 5, "link": 5, "tag": 5, "anywai": 5, "175": 5, "195": 5, "9": 5, "expressioncodegen": 5, "visitblockbodi": 5, "irblockbodi": 5, "blockinfo": 5, "stackvalu": 5, "statement": 5, "fold": 5, "none": 5, "exp": 5, "accept": 5, "irexpress": 5, "markendofstatementifneed": 5, "unrel": 5, "doesn": 5, "blockinforesolv": 5, "28": 5, "thumbnail": 5, "29": 5, "classnam": 5, "cuid": 5, "drag": 5, "drop": 5, "uuid": 5, "packag": 5, "actual": 5, "gonna": 5, "switch": 5, "ai": 5, "nanoid": 5, "everywher": 5, "smaller": 5, "fix": 5, "version": 5, "49": 5, "8": 5, "tab": 5, "compon": 5, "tabindex": 5, "multipl": 5, "prop": 5, "maxnumberoffil": 5, "same": 5, "shouldnt": 5, "onli": 5, "496": 5, "494": 5, "25": 5, "providerview": 5, "listallfil": 5, "re": 5, "catch": 5, "otherwis": 5, "happen": 5, "unnecessari": 5, "422": 5, "handleauth": 5, "authstat": 5, "btoa": 5, "json": 5, "stringifi": 5, "locat": 5, "redirect": 5, "localhost": 5, "authurl": 5, "protocol": 5, "hardcod": 5, "14": 5, "echo": 5, "end": 5, "copi": 5, "build": 5, "js": 5, "rm": 5, "rf": 5, "endtoend": 5, "dist": 5, "mkdir": 5, "cp": 5, "min": 5, "src": 5, "index": 5, "node_modul": 5, "uppy_vers": 5, "cat": 5, "grep": 5, "awk": 5, "sed": 5, "tr": 5, "archiv": 5, "npm": 5, "pack": 5, "prepublishonli": 5, "so": 5, "publish": 5, "one": 5, "thought": 5, "insid": 5, "want": 5, "upgrad": 5, "newer": 5, "pretti": 5, "mediocr": 5, "But": 5, "tend": 5, "someth": 5, "sensibl": 5, "struggl": 5, "pin": 5, "point": 5, "10169": 5, "34": 5, "39": 5, "55": 5, "84": 5, "slightli": 5, "nevertheless": 5, "low": 5, "hard": 5}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"code": [0, 2, 3], "review": [0, 2, 3], "autom": [0, 2], "languag": [0, 2], "model": [0, 2, 4], "overview": 0, "kei": 0, "featur": 0, "1": [0, 4], "data": [0, 2, 3, 4], "collect": [0, 2, 3], "2": [0, 4], "infer": [0, 2, 4], "fine": [0, 4], "tune": [0, 4], "3": [0, 4], "evalu": [0, 2, 5], "metric": 0, "get": 0, "start": 0, "licens": 0, "contact": 0, "conclus": 1, "bibliographi": [1, 2], "introduct": 2, "codereview": [2, 4], "predict": [2, 4, 5], "tabl": 2, "content": 2, "token": 4, "dataset": 4, "load": 4, "function": 4, "huggingfac": 4, "pre": 4, "train": 4, "checkpoint": 4, "qualit": 5, "quantit": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}})
\ No newline at end of file