Skip to content

Commit

Permalink
Validator: Re-enable yamale validator. (#124)
Browse files Browse the repository at this point in the history
* Re-enable yamale validator.
* Update relaxed schema and use it for tributaries by using strict_titles arg to control strict schema.
* Allow bare AWS within code-style XML tags.
* Write unit tests for yamale validation.
  • Loading branch information
Laren-AWS authored Dec 4, 2024
1 parent 7425df6 commit 58ecee6
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 62 deletions.
55 changes: 19 additions & 36 deletions aws_doc_sdk_examples_tools/config/example_schema.yaml
Original file line number Diff line number Diff line change
@@ -1,60 +1,43 @@
# Yamale Schema for example metadata, which is all .yaml files in the metadata folder
# with a _metadata.yaml suffix.

map(include('example'), key=example_id())
map(include('example'), key=str())
---
# An example blocks all the languages together for a single example in a tab list. It is a navigable page on the code examples library. It is the top level "unit" of SoS content. This metadata is merged from tributaries with aws-doc-sdk-examples.
example:
# Human readable title. TODO: Defaults to slug-to-title of the ID if not provided. Overwritten by aws-doc-sdk-example when merging.
title: str(upper_start=True, no_end_punc=True, required=False)
# Used in the TOC. TODO: Defaults to slug-to-title of the ID if not provided. Overwritten by aws-doc-sdk-example when merging.
title_abbrev: str(upper_start=True, no_end_punc=True, required=False)
# String label categories. Categories inferred by cross-service with multiple services, and can be whatever else it wants. Controls where in the TOC it appears. Overwritten by aws-doc-sdk-example when merging.
category: str(upper_start=True, no_end_punc=True, required=False)
# Link to additional topic places. Overwritten by aws-doc-sdk-example when merging.
guide_topic: include('guide_topic', required=False) # TODO Make this a list or a single.
# TODO how to add a language here and require it in sdks_schema. TODO: Keys merged by aws-doc-sdk-example when merging.
languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
# TODO document service_main and services. Not to be used by tributaries. Part of Cross Service.
# List of services used by the examples. Lines up with those in services.yaml. Overwritten by aws-doc-sdk-example when merging.
title: str(required=False, upper_start=True, no_end_punc=True)
title_abbrev: str(required=False, upper_start=True, no_end_punc=True)
synopsis: str(required=False)
synopsis_list: list(str(upper_start=True), required=False)
category: str(required=False, upper_start=True, no_end_punc=True)
guide_topic: include('guide_topic', required=False)
languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
service_main: service_name(required=False)
services: map(map(key=str(), required=False), key=service_name())
synopsis: str(required=False, lower_start=True, end_punc_or_semicolon=True, required=False)
synopsis_list: list(str(upper_start=True, end_punc=True), required=False)
services: map(key=service_name())

# Used for creating links in the block.
guide_topic:
title: str(upper_start=True, no_end_punc=True)
url: include('doc_url', required=False)

# Language Version configuration. Likely just the single list item.
language:
versions: list(include('version'))

# Example for a single Language.
# Per-language excerpts for the example. Languages and SDK versions are defined in .doc_gen/metadata/sdk_metadata.yaml
version:
sdk_version: int(min=1)
# Additional ZonBook XML to include in the tab for this sample.
block_content: block_content(required=False)
# The specific code samples to include in the example.
excerpts: list(include('excerpt'), required=False)
# Link to the source code for this example. TODO rename.
sdk_version: any(int(min=1), str(check_aws=False))
github: str(required=False)
github_name: str(required=False)
github_note_at_bottom: bool(required=False)
add_services: map(key=service_name(), required=False)
# Deprecated. Replace with guide_topic list.
sdkguide: include('doc_url', required=False)
# Link to additional topic places. TODO: Overwritten by aws-doc-sdk-example when merging.
more_info: list(include('guide_topic', required=False))
excerpts: list(include('excerpt'), required=False)
block_content: block_content(required=False)
add_services: map(key=service_name(), required=False)

# One language example can have several excerpts, each having a description block and one or more snippets.
# An excerpt may have either snippet_files OR snippet_tags, but not both.
# The references to code content that will be included in the example's content.
excerpt:
description: str(required=False, upper_start=True, end_punc=True)
# A path within the repo to extract the entire file as a snippet.
snippet_files: list(str(), required=False)
# Tags embedded in source files to extract as snippets.
description: str(required=False)
genai: enum('none', 'some', 'most', 'all', required=False)
snippet_tags: list(str(), required=False)
snippet_files: list(str(), required=False)

service_slug_regex: regex('^[-a-z0-9]+$', name='service slug')
doc_url: regex('^(?!https://docs.aws.amazon.com/).+', name="relative documentation URL")
7 changes: 4 additions & 3 deletions aws_doc_sdk_examples_tools/config/example_strict_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
map(include('example'), key=example_id())
---
example:
title: str(upper_start=True, no_end_punc=True)
title_abbrev: str(upper_start=True, no_end_punc=True)
title: str(required=False, upper_start=True, no_end_punc=True)
title_abbrev: str(required=False, upper_start=True, no_end_punc=True)
synopsis: str(required=False, lower_start=True, end_punc_or_semicolon=True)
synopsis_list: list(str(upper_start=True, end_punc=True), required=False)
category: str(required=False, upper_start=True, no_end_punc=True)
guide_topic: include('guide_topic', required=False)
languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
service_main: service_name(required=False)
services: map(map(key=str(), required=False), key=service_name())

Expand All @@ -34,6 +34,7 @@ version:
# The references to code content that will be included in the example's content.
excerpt:
description: str(required=False, upper_start=True, end_punc=True)
genai: enum('none', 'some', 'most', 'all', required=False)
snippet_tags: list(str(), required=False)
snippet_files: list(str(), required=False)

Expand Down
4 changes: 2 additions & 2 deletions aws_doc_sdk_examples_tools/config/sdks_schema.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Yamale Schema for SDK metadata, which is the sdks.yaml file in the metadata folder.

map(include('sdk'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
map(include('sdk'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
---
sdk:
property: include('syntax_enum')
Expand Down Expand Up @@ -30,6 +30,6 @@ title_override:
title: str()
title_abbrev: str()

syntax_enum: enum('bash', 'cli', 'none', 'cpp', 'go', 'java', 'javascript', 'kotlin', 'csharp', 'php', 'python', 'ruby', 'rust', 'sap-abap', 'sh', 'swift')
syntax_enum: enum('bash', 'cli', 'none', 'cpp', 'go', 'java', 'javascript', 'kotlin', 'csharp', 'php', 'powershell', 'python', 'ruby', 'rust', 'sap-abap', 'sh', 'swift')
entity_regex: regex('^&[-_a-zA-Z0-9]+;$', name='valid entity')
entity_with_version_regex: regex('^&[-_a-zA-Z0-9]+;', name='valid entity with version')
6 changes: 3 additions & 3 deletions aws_doc_sdk_examples_tools/config/services_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
map(include('service'), key=regex('^[-a-z0-9]+$', name='service slug'))
---
service:
long: include('long_entity_regex')
short: include('entity_regex')
long: str()
short: str()
sort: regex('^[^&]\\w', name='non-entity')
chapter_override: include('chapter_override', required=False)
expanded:
Expand All @@ -16,7 +16,7 @@ service:
url: include('doc_url')
api_client: service_name(required=False)
api_ref: include('doc_url')
version: service_version()
version: str()
caveat: str(required=False, upper_start=True, end_punc=True)
bundle: service_name(required=False)
tags: map(key=enum('product_categories'))
Expand Down
2 changes: 1 addition & 1 deletion aws_doc_sdk_examples_tools/doc_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def validate(self):
service.validate(self.errors)
for example in self.examples.values():
example.validate(self.errors, self.root)
validate_metadata(self.root, self.errors)
validate_metadata(self.root, self.validation.strict_titles, self.errors)
validate_no_duplicate_api_examples(self.examples.values(), self.errors)
validate_snippets(
[*self.examples.values()],
Expand Down
72 changes: 55 additions & 17 deletions aws_doc_sdk_examples_tools/metadata_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import datetime
import os
import re
import xml.etree.ElementTree as xml_tree
import yaml
from dataclasses import dataclass, field
from pathlib import Path
Expand Down Expand Up @@ -122,8 +123,7 @@ def _is_valid(self, value: str):
return True
valid = True
if self.check_aws:
# All occurrences of AWS must be entities or within a word.
valid = len(re.findall("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])", value)) == 0
valid = self._validate_aws_entity_usage(value)
if not valid:
self.last_err = 'valid string: it contains a non-entity usage of "AWS"'
if valid and self.upper_start:
Expand All @@ -135,27 +135,51 @@ def _is_valid(self, value: str):
if not valid:
self.last_err = "valid string: it must start with a lowercase letter"
if valid and self.end_punc:
valid = value[-1] in "!.?"
valid = value.rstrip()[-1] in "!.?"
if not valid:
self.last_err = "valid sentence or phrase: it must end with punctuation"
if valid and self.no_end_punc:
valid = value[-1] not in "!.?"
valid = value.rstrip()[-1] not in "!.?"
if not valid:
self.last_err = "valid string: it must not end with punctuation"
if valid and self.end_punc_or_colon:
valid = value[-1] in "!.?:"
valid = value.rstrip()[-1] in "!.?:"
if not valid:
self.last_err = (
"valid sentence or phrase: it must end with punctuation or a colon"
)
if valid and self.end_punc_or_semicolon:
valid = value[-1] in "!.?;"
valid = value.rstrip()[-1] in "!.?;"
if not valid:
self.last_err = "valid sentence or phrase: it must end with punctuation or a semicolon"
if valid:
valid = super()._is_valid(value)
return valid

@staticmethod
def _validate_aws_entity_usage(value: str) -> bool:
"""
All occurrences of AWS must be entities or within a word or within a programlisting or code or noloc block.
Count all bare AWS occurrences within accepted XML tags.
Count all bare AWS occurrences overall.
If these counts differ, there's an invalid usage.
"""
xval = value.replace("&", "&amp;")
xtree = xml_tree.fromstring(f"<fake><para>{xval}</para></fake>")
blocks = (
xtree.findall(".//programlisting")
+ xtree.findall(".//code")
+ xtree.findall(".//noloc")
)
aws_in_blocks = 0
for element in blocks:
aws_in_blocks += len(
re.findall("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])", str(element.text))
)
aws_everywhere = len(re.findall("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])", value))
return aws_everywhere == aws_in_blocks


@dataclass
class ValidateYamaleError(MetadataParseError):
Expand All @@ -169,6 +193,7 @@ def validate_files(
schema_name: Path,
meta_names: Iterable[Path],
validators: Dict[str, Validator],
strict: bool,
errors: MetadataErrors,
):
"""Iterate a list of files and validate each one against a schema."""
Expand All @@ -177,14 +202,16 @@ def validate_files(
for meta_name in meta_names:
try:
data = yamale.make_data(meta_name)
yamale.validate(schema, data)
yamale.validate(schema, data, strict=strict)
print(f"{meta_name.resolve()} validation success! 👍")
except YamaleError as e:
errors.append(ValidateYamaleError(file=meta_name, yamale_error=e))
return errors


def validate_metadata(doc_gen_root: Path, errors: MetadataErrors) -> MetadataErrors:
def validate_metadata(
doc_gen_root: Path, strict: bool, errors: MetadataErrors
) -> MetadataErrors:
config = Path(__file__).parent / "config"
with open(config / "sdks.yaml") as sdks_file:
sdks_yaml: Dict[str, Any] = yaml.safe_load(sdks_file)
Expand All @@ -206,20 +233,28 @@ def validate_metadata(doc_gen_root: Path, errors: MetadataErrors) -> MetadataErr
validators[BlockContent.tag] = BlockContent
validators[String.tag] = StringExtension

schema_root = Path(__file__).parent / "config"
config_root = Path(__file__).parent / "config"
if strict:
example_schema = "example_strict_schema.yaml"
else:
example_schema = "example_schema.yaml"

to_validate = [
# (schema, metadata_glob)
("sdks_schema.yaml", "sdks.yaml"),
("services_schema.yaml", "services.yaml"),
# TODO: Switch between strict schema for aws-doc-sdk-examples and loose schema for tributaries
("example_strict_schema.yaml", "*_metadata.yaml"),
(config_root / "sdks_schema.yaml", config_root, "sdks.yaml"),
(config_root / "services_schema.yaml", config_root, "services.yaml"),
(
config_root / example_schema,
doc_gen_root / ".doc_gen" / "metadata",
"*_metadata.yaml",
),
]
for schema, metadata in to_validate:
for schema, meta_root, metadata in to_validate:
validate_files(
schema_root / schema,
(doc_gen_root / "metadata").glob(metadata),
schema,
meta_root.glob(metadata),
validators,
strict,
errors,
)

Expand All @@ -234,9 +269,12 @@ def main():
help="The folder that contains schema and metadata files.",
required=False,
)
parser.add_argument(
"--strict", default=True, help="Use strict schema.", required=False
)
args = parser.parse_args()

errors = validate_metadata(Path(args.doc_gen), MetadataErrors())
errors = validate_metadata(Path(args.doc_gen), args.strict, MetadataErrors())

if len(errors) == 0:
print("Validation succeeded! 👍👍👍")
Expand Down
31 changes: 31 additions & 0 deletions aws_doc_sdk_examples_tools/metadata_validator_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python3
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

from pathlib import Path

import pytest

from .metadata_errors import MetadataErrors
from .metadata_validator import validate_metadata


@pytest.mark.parametrize("strict", [True, False])
def test_aws_entity_usage(strict):
errors = MetadataErrors()
validate_metadata(
Path(Path(__file__).parent / "test_resources/doc_gen_test"), strict, errors
)

e_str = str(errors)
assert "Title has AWS" in e_str
assert "Title Abbrev has AWS" in e_str
assert "Synopsis has AWS" in e_str
assert "Synopsis list has AWS" in e_str
assert "Description has AWS" in e_str

assert "Title has &AWS;" not in e_str
assert "Title Abbrev has &AWS;" not in e_str
assert "Synopsis programlisting has AWS" not in e_str
assert "Synopsis list code has <code>AWS" not in e_str
assert "Description programlisting has AWS" not in e_str
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
sns_EntityFailures:
title: Title has AWS using an &AWS; SDK
title_abbrev: Title Abbrev has AWS in it
synopsis: "Synopsis has AWS in it."
synopsis_list:
- "Synopsis list has AWS in it."
category: Cat
languages:
Java:
versions:
- sdk_version: 1
github: java/example_code/svc_EntityFailures
sdkguide:
excerpts:
- description: Description has AWS in it.
snippet_tags:
- java.example_code.svc_EntityFailures.Test
services:
sns:
sns_EntitySuccesses:
title: Title has &AWS; using an &AWS; SDK
title_abbrev: Title Abbrev has &AWS; in it
synopsis: "this <programlisting>Synopsis programlisting has AWS in it.</programlisting>."
synopsis_list:
- "Synopsis list code has <code>AWS</code> in it."
category: Cat
languages:
Java:
versions:
- sdk_version: 1
github: java/example_code/svc_EntityFailures
sdkguide:
excerpts:
- description: This <emphasis><programlisting>Description programlisting has AWS in it</programlisting></emphasis> doesn't it.
snippet_tags:
- java.example_code.svc_EntityFailures.Test
services:
sns:

0 comments on commit 58ecee6

Please sign in to comment.