From b1e6bdb14733835d8180bee300886422570daa9b Mon Sep 17 00:00:00 2001 From: Andrey Snytin Date: Fri, 2 Feb 2024 14:21:30 +0100 Subject: [PATCH] Script to add mypy per-line ignores (#273) --- .gitignore | 2 +- tools/mypy_ignores/README.md | 1 + tools/mypy_ignores/mypy_ignores.ipynb | 184 ++++++++++++++++++++++++++ 3 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 tools/mypy_ignores/README.md create mode 100644 tools/mypy_ignores/mypy_ignores.ipynb diff --git a/.gitignore b/.gitignore index b46120f94..8c1d53167 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,7 @@ build .tmp.env* .coverage .python-version -*.ipynb +.ipynb_checkpoints .ruff_cache .idea __pycache__ diff --git a/tools/mypy_ignores/README.md b/tools/mypy_ignores/README.md new file mode 100644 index 000000000..5e0c854ba --- /dev/null +++ b/tools/mypy_ignores/README.md @@ -0,0 +1 @@ +Simple script to parse mypy logs from CI and add per-line ignores to source code. \ No newline at end of file diff --git a/tools/mypy_ignores/mypy_ignores.ipynb b/tools/mypy_ignores/mypy_ignores.ipynb new file mode 100644 index 000000000..d4e761cd2 --- /dev/null +++ b/tools/mypy_ignores/mypy_ignores.ipynb @@ -0,0 +1,184 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "d4a46946-bc43-400d-a25f-cdf7564c702e", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import datetime\n", + "from collections import defaultdict\n", + "\n", + "import attr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "106a80be-5228-4962-8f1a-12a222404e03", + "metadata": {}, + "outputs": [], + "source": [ + "LOG_FILENAME = \"~/Downloads/mypy_20240130.log\"\n", + "REPO_BASE_PATH = \"~/dev/datalens-backend\"\n", + "\n", + "PKG_RUN_PREFIX = \"Cmd: ['mypy', '--cache-dir=/tmp/mypy_cache']; cwd=data/\"\n", + "CWD_PREFIX = \"/src/\" # \"/data/\"\n", + "pkg_run_cmd_re= re.compile(\n", + " \"\\d{4}\\-\\d{2}\\-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z Cmd: \\['mypy', '\\-\\-cache-dir=/tmp/mypy_cache'\\]; cwd=(.+)\"\n", + ")\n", + "\n", + "err_line_re = re.compile(\n", + " \"\\d{4}\\-\\d{2}\\-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z (?P[^:]+):(?P\\d+): (?P\\w+): (?P.*)\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0859bc96-d443-4b3e-a338-616a8ca04978", + "metadata": {}, + "outputs": [], + "source": [ + "@attr.s\n", + "class ErrInfo:\n", + " pkg: str = attr.ib()\n", + " path: str = attr.ib()\n", + " line: int = attr.ib() # starting from 0\n", + " msg_list: list[str] = attr.ib()\n", + "\n", + " def get_comment(self) -> str:\n", + " # Use only first err message if there are more than one per line\n", + " return f\" # type: ignore # {datetime.date.today().isoformat()} # TODO: {self.msg_list[0]}\"\n", + "\n", + " def get_full_path(self) -> str:\n", + " return os.path.join(self.pkg, self.path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ad5054d-88b7-473f-88ff-4683c4106c5c", + "metadata": {}, + "outputs": [], + "source": [ + "file_errs: defaultdict[str: list[ErrInfo]] = defaultdict(list)\n", + "\n", + "with open(LOG_FILENAME, \"r\") as log_file:\n", + " pkg_name = None\n", + " for line in log_file:\n", + " line = line.strip()\n", + " run_cmd_match = pkg_run_cmd_re.match(line)\n", + " if run_cmd_match:\n", + " pkg_name = run_cmd_match.groups()[0].replace(CWD_PREFIX, \"\")\n", + "\n", + " match = err_line_re.match(line)\n", + " if match:\n", + " groups = match.groupdict()\n", + "\n", + " if not pkg_name:\n", + " continue\n", + "\n", + " if groups[\"status\"] == \"error\":\n", + " err_info = ErrInfo(\n", + " pkg=pkg_name,\n", + " path=groups[\"path\"],\n", + " line=int(groups[\"line\"]) - 1,\n", + " msg_list=[groups[\"msg\"]],\n", + " )\n", + " assert err_info.line >= 0\n", + "\n", + " file_errs[err_info.get_full_path()].append(err_info)\n", + "\n", + "print(len(file_errs))\n", + "\n", + "# Merge single row errors\n", + "for file_path, ei_list in file_errs.items():\n", + " new_ei_list = []\n", + " prev_ei = None\n", + " for ei in ei_list:\n", + " if not prev_ei:\n", + " new_ei_list.append(ei)\n", + " prev_ei = ei\n", + " else:\n", + " if ei.line == prev_ei.line:\n", + " prev_ei.msg_list.extend(ei.msg_list)\n", + " else:\n", + " new_ei_list.append(ei)\n", + " prev_ei = ei\n", + " file_errs[file_path] = new_ei_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4f45bd1-5c56-4d0d-9937-5fa12350832b", + "metadata": {}, + "outputs": [], + "source": [ + "paths = list(\n", + " filter(\n", + " lambda t: (\n", + " # t.startswith(\"lib/\")\n", + " # (\n", + " # t.startswith(\"lib/\") or \n", + " # t.startswith(\"app/\")\n", + " # ) and \n", + " # not t.startswith(\"lib/dl_formula/dl_formula/parser/antlr/\")\n", + " ), \n", + " file_errs.keys(),\n", + " )\n", + ")\n", + "len(paths)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c74069c0-735f-46b1-b23f-8dbec456157c", + "metadata": {}, + "outputs": [], + "source": [ + "for filename in file_errs.keys():\n", + " path = os.path.join(REPO_BASE_PATH, filename)\n", + " # print(path)\n", + " # print(len(file_errs[filename]))\n", + " with open(path, 'r') as f:\n", + " lines = f.readlines()\n", + "\n", + " for ei in file_errs[filename]:\n", + " line = lines[ei.line].rstrip()\n", + " if \"#\" in line:\n", + " print(f\"!!! Multiple comments in one line: {path}:{ei.line + 1}\")\n", + " lines[ei.line] = f\"{line}{ei.get_comment()}\\n\"\n", + "\n", + " with open(path, 'w') as f:\n", + " f.writelines(lines)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}