Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --exclude argument to CLI #261

Merged
merged 6 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
Yamale (ya·ma·lē)
=================
[![Build Status](https://github.com/23andMe/Yamale/actions/workflows/run-tests.yml/badge.svg)](https://github.com/23andMe/Yamale/actions/workflows/run-tests.yml)
[![PyPI](https://img.shields.io/pypi/v/yamale.svg)](https://pypi.python.org/pypi/yamale)
[![downloads](https://static.pepy.tech/badge/yamale/month)](https://pepy.tech/project/yamale)
[![versions](https://img.shields.io/pypi/pyversions/yamale.svg)](https://github.com/yamale/yamale)
[![license](https://img.shields.io/github/license/23andMe/yamale.svg)](https://github.com/23andMe/Yamale/blob/master/LICENSE)

| :warning: Ensure that your schema definitions come from internal or trusted sources. Yamale does not protect against intentionally malicious schemas. |
|:------------|
Expand All @@ -11,8 +16,6 @@ A schema and validator for YAML.
What's YAML? See the current spec [here](http://www.yaml.org/spec/1.2/spec.html) and an introduction
to the syntax [here](https://github.com/Animosity/CraftIRC/wiki/Complete-idiot's-introduction-to-yaml).

[![Build Status](https://github.com/23andMe/Yamale/actions/workflows/run-tests.yml/badge.svg)](https://github.com/23andMe/Yamale/actions/workflows/run-tests.yml)
[![PyPI](https://img.shields.io/pypi/v/yamale.svg)](https://pypi.python.org/pypi/yamale)

Requirements
------------
Expand All @@ -23,8 +26,10 @@ Requirements
Install
-------
### pip
```bash
```
$ pip install yamale
# or to include ruamel.yaml as a dependency
$ pip install yamale[ruamel]
```

NOTE: Some platforms, e.g., Mac OS, may ship with only Python 2 and may not have pip installed.
Expand All @@ -49,25 +54,28 @@ looking up the directory tree until it finds one. If Yamale can not find a schem

Usage:

```bash
usage: yamale [-h] [-s SCHEMA] [-n CPU_NUM] [-p PARSER] [--no-strict] [PATH]
```
usage: yamale [-h] [-s SCHEMA] [-e PATTERN] [-p PARSER] [-n CPU_NUM] [-x] [-v] [-V] [PATH ...]

Validate yaml files.

positional arguments:
PATH folder to validate. Default is current directory.
PATH Paths to validate, either directories or files. Default is the current directory.

optional arguments:
options:
-h, --help show this help message and exit
-s SCHEMA, --schema SCHEMA
filename of schema. Default is schema.yaml.
-n CPU_NUM, --cpu-num CPU_NUM
number of CPUs to use. Default is 4.
-e PATTERN, --exclude PATTERN
Python regex used to exclude files from validation. Any substring match of a file's absolute path will be excluded. Uses
default Python3 regex. Option can be supplied multiple times.
-p PARSER, --parser PARSER
YAML library to load files. Choices are "ruamel" or
"pyyaml" (default).
--no-strict Disable strict mode, unexpected elements in the data
will be accepted.
YAML library to load files. Choices are "ruamel" or "pyyaml" (default).
-n CPU_NUM, --cpu-num CPU_NUM
Number of child processes to spawn for validation. Default is 4. 'auto' to use CPU count.
-x, --no-strict Disable strict mode, unexpected elements in the data will be accepted.
-v, --verbose show verbose information
-V, --version show program's version number and exit
```

### API
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
packages=find_packages(),
include_package_data=True,
install_requires=["pyyaml"],
extras_requires={"ruamel": ["ruamel.yaml"]},
extras_require={"ruamel": ["ruamel.yaml"]},
cblakkan marked this conversation as resolved.
Show resolved Hide resolved
python_requires=">=3.8",
entry_points={
"console_scripts": ["yamale=yamale.command_line:main"],
Expand Down
2 changes: 1 addition & 1 deletion yamale/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5.3.0
6.0.0
91 changes: 68 additions & 23 deletions yamale/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import argparse
import glob
import os
from multiprocessing import Pool
import re
import multiprocessing
from .yamale_error import YamaleError
from .schema.validationresults import Result
from .version import __version__
Expand Down Expand Up @@ -64,31 +65,32 @@ def _find_schema(data_path, schema_name):
return _find_data_path_schema(data_path, schema_name)


def _validate_single(yaml_path, schema_name, parser, strict):
print("Validating %s..." % yaml_path)
def _validate_file(yaml_path, schema_name, parser, strict, should_exclude):
if should_exclude(yaml_path):
return
s = _find_schema(yaml_path, schema_name)
if not s:
raise ValueError("Invalid schema name for '{}' or schema not found.".format(schema_name))
_validate(s, yaml_path, parser, strict, True)


def _validate_dir(root, schema_name, cpus, parser, strict):
pool = Pool(processes=cpus)
def _validate_dir(root, schema_name, cpus, parser, strict, should_exclude):
pool = multiprocessing.Pool(processes=cpus)
res = []
error_messages = []
print("Finding yaml files...")
for root, dirs, files in os.walk(root):
for root, _, files in os.walk(root):
for f in files:
if (f.endswith(".yaml") or f.endswith(".yml")) and f != schema_name:
d = os.path.join(root, f)
s = _find_schema(d, schema_name)
if s:
res.append(pool.apply_async(_validate, (s, d, parser, strict, False)))
yaml_path = os.path.join(root, f)
if should_exclude(yaml_path):
continue
schema_path = _find_schema(yaml_path, schema_name)
if schema_path:
res.append(pool.apply_async(_validate, (schema_path, yaml_path, parser, strict, False)))
else:
print("No schema found for: %s" % d)
print(f"No schema found for: {yaml_path}")

print("Found %s yaml files." % len(res))
print("Validating...")
print(f"Found {len(res)} yaml files to validate...")
for r in res:
sub_results = r.get(timeout=300)
error_messages.extend([str(sub_result) for sub_result in sub_results if not sub_result.isValid()])
Expand All @@ -98,16 +100,34 @@ def _validate_dir(root, schema_name, cpus, parser, strict):
raise ValueError("\n----\n".join(set(error_messages)))


def _router(paths, schema_name, cpus, parser, strict=True):
def _router(paths, schema_name, cpus, parser, excludes=None, strict=True, verbose=False):
EXCLUDE_REGEXES = tuple(re.compile(e) for e in excludes) if excludes else tuple()

def should_exclude(yaml_path):
has_match = any(pattern.search(yaml_path) for pattern in EXCLUDE_REGEXES)
if has_match and verbose:
print("Skipping validation for %s due to exclude pattern" % yaml_path)
return has_match

for path in paths:
path = os.path.abspath(path)
if os.path.isdir(path):
_validate_dir(path, schema_name, cpus, parser, strict)
abs_path = os.path.abspath(path)
if os.path.exists(abs_path):
print(f"Validating {path}...")
else:
raise ValueError(f"Path does not exist: {path}")

if os.path.isdir(abs_path):
_validate_dir(abs_path, schema_name, cpus, parser, strict, should_exclude)
else:
_validate_single(path, schema_name, parser, strict)
_validate_file(abs_path, schema_name, parser, strict, should_exclude)


def main():
def int_or_auto(num_cpu):
if num_cpu == "auto":
return multiprocessing.cpu_count()
return int(num_cpu)

parser = argparse.ArgumentParser(description="Validate yaml files.")
parser.add_argument(
"paths",
Expand All @@ -116,21 +136,46 @@ def main():
nargs="*",
help="Paths to validate, either directories or files. Default is the current directory.",
)
parser.add_argument("-V", "--version", action="version", version=__version__)
parser.add_argument("-s", "--schema", default="schema.yaml", help="filename of schema. Default is schema.yaml.")
parser.add_argument("-n", "--cpu-num", default=4, type=int, help="number of CPUs to use. Default is 4.")
parser.add_argument(
"-e",
"--exclude",
metavar="PATTERN",
action="append",
help="Python regex used to exclude files from validation. Any substring match of a file's absolute path will be excluded. Uses default Python3 regex. Option can be supplied multiple times.",
)
parser.add_argument(
"-p",
"--parser",
default="pyyaml",
help='YAML library to load files. Choices are "ruamel" or "pyyaml" (default).',
)
parser.add_argument(
"--no-strict", action="store_true", help="Disable strict mode, unexpected elements in the data will be accepted."
"-n",
"--cpu-num",
default=4,
type=int_or_auto,
help="Number of child processes to spawn for validation. Default is 4. 'auto' to use CPU count.",
)
parser.add_argument(
"-x",
"--no-strict",
action="store_true",
help="Disable strict mode, unexpected elements in the data will be accepted.",
)
parser.add_argument("-v", "--verbose", action="store_true", help="show verbose information")
parser.add_argument("-V", "--version", action="version", version=__version__)
args = parser.parse_args()
try:
_router(args.paths, args.schema, args.cpu_num, args.parser, not args.no_strict)
_router(
paths=args.paths,
schema_name=args.schema,
cpus=args.cpu_num,
parser=args.parser,
excludes=args.exclude,
strict=not args.no_strict,
verbose=args.verbose,
)
except (SyntaxError, NameError, TypeError, ValueError) as e:
print("Validation failed!\n%s" % str(e))
exit(1)
Expand Down
24 changes: 24 additions & 0 deletions yamale/tests/test_command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,19 @@ def test_multiple_paths_bad_yaml():
assert "map.bad: '12.5' is not a int." in e.value.message


def test_excludes():
command_line._router(
paths=[
"yamale/tests/command_line_fixtures/yamls/good.yaml",
"yamale/tests/command_line_fixtures/yamls/bad.yaml",
],
schema_name="schema.yaml",
excludes="bad.yaml",
cpus=1,
parser="PyYAML",
)


@pytest.mark.parametrize("parser", parsers)
def test_good_relative_yaml(parser):
command_line._router(
Expand Down Expand Up @@ -126,6 +139,17 @@ def test_bad_dir():
command_line._router("yamale/tests/command_line_fixtures/yamls", "schema.yaml", 4, "PyYAML")


def test_bad_path_raises():
with pytest.raises(ValueError) as e:
command_line._router(
paths=["yamale/tests/command_line_fixtures/yamls/a path that does not exist.yaml"],
schema_name="schema.yaml",
cpus=1,
parser="PyYAML",
)
assert "Path does not exist" in str(e)


def test_bad_strict():
with pytest.raises(ValueError) as e:
command_line._router(
Expand Down
Loading