equinor · devProdigy · Dec 9, 2020 · Dec 9, 2020 · Dec 9, 2020 · Dec 9, 2020
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -5,12 +5,67 @@ on:
   pull_request:
 
 jobs:
+  code_checks:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        code-check: [black, flakehell]
+        python-version: [3.7]
+    steps:
+      - uses: actions/checkout@v1
+
+      - uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: 'x64'
+
+      - uses: actions/cache@v2
+        if: startsWith(runner.os, 'Linux')
+        with:
+          path: ${{ env.pythonLocation }}
+          key: ${{ env.pythonLocation }}-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/*requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ matrix.python-version }}-pip-
+
+      - name: Extract branch name  # works for both 'push' and 'pull_request' events
+        shell: bash
+        run: echo "##[set-output name=branch;]$( if [ ${{ github.event_name }} = push ]; then echo ${GITHUB_REF#refs/heads/}; else echo ${{ github.event.pull_request.head.ref }}; fi )"
+        id: extract_branch
+
+      - name: Install test requirements
+        run: |
+          pip install --upgrade pip
+          pip install --upgrade pip-tools
+          pip install -r requirements/test_requirements.txt
+
+      - name: Run code quality checks
+        run: |
+          FILES_TO_CHECK=$(git diff origin/master..origin/${{ steps.extract_branch.outputs.branch }} --name-only --diff-filter=ACM '*py')
+          echo "> Changed Python files: ${FILES_TO_CHECK}"
+
+          if [ -z "${FILES_TO_CHECK}" ]
+          then
+            echo "> No Python files to check, exiting."
+            exit 0
+          fi
+
+          if [ ${{ matrix.code-check }} = black ]
+          then
+            echo "> Starting Black code-formatting check for files: ${FILES_TO_CHECK}"
+            black --check ${FILES_TO_CHECK}
+          else
+            echo "> Starting Flakehell code-quality check for diff in files: ${FILES_TO_CHECK}"
+            git diff origin/master..origin/${{ steps.extract_branch.outputs.branch }} -U0 --diff-filter=ACM '*.py' | flakehell lint --diff
+          fi
+
   test:
     runs-on: ubuntu-latest
+    needs: code_checks
     strategy:
       matrix:
         os: [ubuntu-latest]
-        component: [builder, cli, client, machine, reporters, serializer, server, util, workflow, formatting, allelse]
+        component: [builder, cli, client, machine, reporters, serializer, server, util, workflow, allelse]
         python-version: [3.7]
     steps:
       - uses: actions/checkout@v1
@@ -20,13 +75,13 @@ jobs:
           python-version: ${{ matrix.python-version }}
           architecture: 'x64'
 
-      - uses: actions/cache@v1
+      - uses: actions/cache@v2
         if: startsWith(runner.os, 'Linux')
         with:
-          path: ~/.cache/pip
-          key: ${{ runner.os }}-${{ matrix.python-version }}-pip-new-${{ hashFiles('requirements/*requirements.txt') }}
+          path: ${{ env.pythonLocation }}
+          key: ${{ env.pythonLocation }}-${{ runner.os }}-${{ matrix.python-version }}-env-${{ hashFiles('**/*requirements.txt') }}
           restore-keys: |
-            ${{ runner.os }}-${{ matrix.python-version }}-pip-new-
+            ${{ env.pythonLocation }}-${{ runner.os }}-${{ matrix.python-version }}-env-
 
       - name: Install
         run: |
@@ -69,6 +124,7 @@ jobs:
 
   build-docs:
     runs-on: ubuntu-latest
+    needs: code_checks
     steps:
       - uses: actions/checkout@v1
 
@@ -77,6 +133,13 @@ jobs:
           python-version: '3.7' # Version range or exact version of a Python version to use, using semvers version range syntax.
           architecture: 'x64'
 
+      - uses: actions/cache@v2
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-${{ matrix.python-version }}-pip-new-${{ hashFiles('**/*requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ matrix.python-version }}-pip-new-
+
       - name: Install deps
         run: |
           pip install --upgrade pip

diff --git a/Makefile b/Makefile
@@ -4,6 +4,8 @@ MODEL_SERVER_IMG_NAME  := gordo-model-server
 CLIENT_IMG_NAME := gordo-client
 WORKFLOW_GENERATOR_IMG_NAME := gordo-deploy
 
+.SILENT: code-quality-locally black-check flakehell-check
+
 base:
 	docker build . -f Dockerfile -t $(BASE_IMG_NAME)
 
@@ -95,6 +97,31 @@ sdist:
 
 images: model-builder model-server client
 
+code-quality-locally:
+	@echo "** Make sure that your branch is up to date with origin/master (not to have extra files to check)"
+	make black-check; make flakehell-check  ## run code quality check on changed files in the current branch
+
+black-check:  ## run black code formatting check on changed files in the current branch
+	@$(eval FILES_TO_CHECK=$(shell git diff origin/master --name-only --diff-filter=ACM '*.py'))
+
+	if [ -z "${FILES_TO_CHECK}" ]; then \
+		echo "> No Python files to check."; \
+	else \
+		echo "> Starting Black code-formatting check for files: ${FILES_TO_CHECK}"; \
+		black --diff ${FILES_TO_CHECK}; \
+	fi
+
+flakehell-check:  ## run flake8 and its plugins code checks on changes in the current branch
+	@$(eval FILES_TO_CHECK=$(shell git diff origin/master --name-only --diff-filter=ACM '*.py'))
+
+	if [ -z "${FILES_TO_CHECK}" ]; then \
+		echo "> No Python files to check."; \
+	else \
+		echo "> Starting Flakehell code-quality check for diff in files: ${FILES_TO_CHECK}"; \
+		git diff origin/master -U0 --diff-filter=ACM '*.py' | flakehell lint --diff; \
+		echo "> 'Flakehell' finished."; \
+	fi
+
 test:
 	python setup.py test
 
@@ -104,6 +131,20 @@ testall:
 docs:
 	cd ./docs && $(MAKE) html
 
+compose_requirements:  ## run pip-compile for requirements.in and test_requirements.in
+	pip install --upgrade pip
+	pip install --upgrade pip-tools
+	# to auto update requirements -> add "--upgrade" param to the lines below
+	cd requirements && \
+	pip-compile --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in && \
+	pip-compile --output-file=test_requirements.txt test_requirements.in
+
+install_app_requirements:  ## install requirements for app and tests
+	pip install --upgrade pip
+	pip install --upgrade pip-tools
+	pip install -r requirements/full_requirements.txt
+	pip install -r requirements/test_requirements.txt
+
 all: test images push-dev-images
 
 .PHONY: model-builder model-server client watchman push-server push-builder push-client push-dev-images push-prod-images images test all docs workflow-generator push-workflow-generator base
diff --git a/README.md b/README.md
@@ -37,6 +37,8 @@
         * [Tests system requirements](#Tests-system-requirements)
         * [Run tests](#Run-tests)
         * [How to run tests in debug mode](#How-to-run-tests-in-debug-mode)
+    * [Code quality checks](#Code-quality-checks)
+        * [Run code quality checks locally](#Run-code-quality-checks-locally)
 
 ## About
 
@@ -71,10 +73,7 @@ This section will explain how to start development of Gordo.
 ```shell script
 # create and activate virtualenv. Note: you should use python3.7 (project's tensorflow version is not compatible with python3.8)
 # then:
-pip install --upgrade pip
-pip install --upgrade pip-tools
-pip install -r requirements/full_requirements.txt
-pip install -r requirements/test_requirements.txt
+make install_app_requirements
 ```
 
 ### How to run tests locally
@@ -97,3 +96,16 @@ python3.7 -m pytest ...
 #### How to run tests in debug mode
 Note: this example is for Pycharm IDE to use `breakpoints` in the code of the tests.  
 On the configuration setup for test running add to `Additional arguments:` in `pytest` section following string: `--ignore benchmarks --cov-report= --no-cov `
+
+### Code quality checks
+This repo uses [black](https://black.readthedocs.io/en/stable/) and [flakehell](https://github.com/life4/flakehell/blob/master/README.md) (with plugins) for code formatting and analys.  
+Code quality checks on the CI automatically. Checks run only on *CHANGED* code (not on all files in the repo).  
+
+#### Run code quality checks locally
+Before pushing code to the "remote" - check if your changes are good:
+```shell
+# this will compare changes in your local branch with origin/master.
+# note that origin/master should be up to date and merged into current branch. 
+make code-quality-locally
+```
+In plans to add pre-commit hooks not ot do it manually.
diff --git a/gordo/builder/build_model.py b/gordo/builder/build_model.py
@@ -145,7 +145,7 @@ def build(
                     model=model, machine=machine, output_dir=output_dir  # type: ignore
                 )
                 logger.info(f"Built model, and deposited at {self.cached_model_path}")
-                logger.info(f"Writing model-location to model registry")
+                logger.info("Writing model-location to model registry")
                 disk_registry.write_key(  # type: ignore
                     model_register_dir, self.cache_key, self.cached_model_path
                 )

diff --git a/gordo/cli/cli.py b/gordo/cli/cli.py
@@ -160,7 +160,7 @@ def build(
 
         # Convert the config into a pipeline, and back into definition to ensure
         # all default parameters are part of the config.
-        logger.debug(f"Ensuring the passed model config is fully expanded.")
+        logger.debug("Ensuring the passed model config is fully expanded.")
         machine.model = serializer.into_definition(
             serializer.from_definition(machine.model)
         )

diff --git a/gordo/cli/client.py b/gordo/cli/client.py
@@ -227,7 +227,7 @@ def download_model(ctx: click.Context, output_dir: str, target: typing.List[str]
             f"Writing model '{model_name}' to directory: '{model_out_dir}'...", nl=False
         )
         serializer.dump(model, model_out_dir)
-        click.secho(f"done")
+        click.secho("done")
 
     click.secho(f"Wrote all models to directory: {output_dir}", fg="green")
 

diff --git a/gordo/cli/custom_types.py b/gordo/cli/custom_types.py
@@ -26,7 +26,7 @@ def convert(self, value, param, ctx):
             kwargs = yaml.safe_load(value)
 
         if "type" not in kwargs:
-            self.fail(f"Cannot create DataProvider without 'type' key defined")
+            self.fail("Cannot create DataProvider without 'type' key defined")
 
         kind = kwargs.pop("type")
 

diff --git a/gordo/machine/validators.py b/gordo/machine/validators.py
@@ -102,7 +102,7 @@ def __set__(self, instance, value):
         if value is not None and not any(
             isinstance(value, Obj) for Obj in (dict, Metadata)
         ):
-            raise ValueError(f"Can either be None or an instance of dict or Metadata")
+            raise ValueError("Can either be None or an instance of dict or Metadata")
         instance.__dict__[self.name] = value
 
 
@@ -118,7 +118,7 @@ def __set__(self, instance, value):
 
         if not isinstance(value, GordoBaseDataProvider):
             raise TypeError(
-                f"Expected value to be an instance of GordoBaseDataProvider, "
+                "Expected value to be an instance of GordoBaseDataProvider, "
                 f"found {value} "
             )
         instance.__dict__[self.name] = value
@@ -132,7 +132,7 @@ class ValidMachineRuntime(BaseDescriptor):
 
     def __set__(self, instance, value):
         if not isinstance(value, dict):
-            raise ValueError(f"Runtime must be an instance of dict")
+            raise ValueError("Runtime must be an instance of dict")
         value = self._verify_reporters(value)
         value = fix_runtime(value)
         instance.__dict__[self.name] = value
@@ -215,7 +215,7 @@ def fix_resource_limits(resources: dict) -> dict:
         logger.warning(
             f"Memory limit {limits_memory} can not be smaller than memory "
             f"request {request_memory}, increasing memory limit to be equal"
-            f" to request. "
+            " to request. "
         )
         limits["memory"] = request_memory
     if (
@@ -264,7 +264,7 @@ def __set__(self, instance, value):
             or not isinstance(value, list)
             or not any(isinstance(value[0], inst) for inst in (str, dict, SensorTag))
         ):
-            raise ValueError(f"Requires setting a non-empty list of strings")
+            raise ValueError("Requires setting a non-empty list of strings")
         instance.__dict__[self.name] = value
 
 

diff --git a/gordo/reporters/mlflow.py b/gordo/reporters/mlflow.py
@@ -473,7 +473,7 @@ def log_machine(mlflow_client: MlflowClient, run_id: str, machine: Machine):
     # Send configs as JSON artifacts
     try:
         with tempfile.TemporaryDirectory(dir="./") as tmp_dir:
-            fp = os.path.join(tmp_dir, f"metadata.json")
+            fp = os.path.join(tmp_dir, "metadata.json")
             with open(fp, "w") as fh:
                 json.dump(machine.to_dict(), fh, cls=MachineEncoder)
             mlflow_client.log_artifacts(run_id=run_id, local_dir=tmp_dir)

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,52 @@
+[tool.flakehell]
+format = "grouped"
+max_line_length = 120
+show_source = true
+no-isort-config = true
+max-line-complexity = 15
+
+exclude = [".cache", ".git", "__pycache__", "old", "build", "dist", "junk"]
+
+[tool.flakehell.plugins]
+flake8-broken-line = ["+*"]
+flake8-bugbear = ["+*"]
+flake8-builtins = ["+*"]
+# - C812 exclude trailing comma for one line expression (black conflicts)
+flake8-commas   = ["+*", "-C812"]
+flake8-comprehensions = ["+*"]
+flake8-darglint = ["+*"]
+flake8-debugger = ["+*"]
+flake8-docstrings = ["+*"]
+flake8-eradicate = ["+*"]
+flake8-isort = ["+*"]
+flake8-string-format = ["+*"]
+flake8-mutable = ["+*"]
+mccabe = ["+*"]
+pep8-naming = ["+*"]
+pycodestyle = ["+*"]
+pyflakes = ["+*"]
+
+[tool.black]
+target-version = ['py37']
+include = '\.pyi?$'
+exclude = '''
+(
+  /(
+      \.eggs         # exclude a few common directories in the
+    | \.git          # root of the project
+    | \.hg
+    | \.mypy_cache
+    | \.tox
+    | \.venv
+    | \.cache
+    | __pycache__
+    | _build
+    | buck-out
+    | build
+    | dist
+  )/
+)
+'''
+
+[tool.isort]
+skip = [".cache", ".venv"]
diff --git a/requirements/test_requirements.in b/requirements/test_requirements.in
@@ -18,3 +18,20 @@ nbconvert~=5.6
 urllib3>=1.24.2,<2.0
 pytest-benchmark~=3.2
 mock~=3.0
+flakehell~=0.7.0
+flake8-broken-line~=0.3
+flake8-bugbear~=20.11
+flake8-builtins~=1.5
+flake8-commas~=2.0
+flake8-comprehensions~=3.3
+darglint~=1.5
+flake8-debugger~=4.0
+flake8-docstrings~=1.5
+flake8-eradicate~=1.0
+flake8-isort~=4.0
+flake8-mutable~=1.2
+flake8-string-format~=0.3
+mccabe~=0.6
+pep8-naming~=0.11.1
+pycodestyle~=2.6
+pyflakes~=2.2