From 58ecee61f181dc2f0ffd9b8f5924cb61e11ae552 Mon Sep 17 00:00:00 2001 From: Laren-AWS <57545972+Laren-AWS@users.noreply.github.com> Date: Wed, 4 Dec 2024 11:51:10 -0800 Subject: [PATCH] Validator: Re-enable yamale validator. (#124) * Re-enable yamale validator. * Update relaxed schema and use it for tributaries by using strict_titles arg to control strict schema. * Allow bare AWS within code-style XML tags. * Write unit tests for yamale validation. --- .../config/example_schema.yaml | 55 +++++--------- .../config/example_strict_schema.yaml | 7 +- .../config/sdks_schema.yaml | 4 +- .../config/services_schema.yaml | 6 +- aws_doc_sdk_examples_tools/doc_gen.py | 2 +- .../metadata_validator.py | 72 ++++++++++++++----- .../metadata_validator_test.py | 31 ++++++++ .../metadata/aws_entity_metadata.yaml | 38 ++++++++++ 8 files changed, 153 insertions(+), 62 deletions(-) create mode 100644 aws_doc_sdk_examples_tools/metadata_validator_test.py create mode 100644 aws_doc_sdk_examples_tools/test_resources/doc_gen_test/.doc_gen/metadata/aws_entity_metadata.yaml diff --git a/aws_doc_sdk_examples_tools/config/example_schema.yaml b/aws_doc_sdk_examples_tools/config/example_schema.yaml index df55c42..e9e9099 100644 --- a/aws_doc_sdk_examples_tools/config/example_schema.yaml +++ b/aws_doc_sdk_examples_tools/config/example_schema.yaml @@ -1,60 +1,43 @@ # Yamale Schema for example metadata, which is all .yaml files in the metadata folder # with a _metadata.yaml suffix. -map(include('example'), key=example_id()) +map(include('example'), key=str()) --- -# An example blocks all the languages together for a single example in a tab list. It is a navigable page on the code examples library. It is the top level "unit" of SoS content. This metadata is merged from tributaries with aws-doc-sdk-examples. example: - # Human readable title. TODO: Defaults to slug-to-title of the ID if not provided. Overwritten by aws-doc-sdk-example when merging. - title: str(upper_start=True, no_end_punc=True, required=False) - # Used in the TOC. TODO: Defaults to slug-to-title of the ID if not provided. Overwritten by aws-doc-sdk-example when merging. - title_abbrev: str(upper_start=True, no_end_punc=True, required=False) - # String label categories. Categories inferred by cross-service with multiple services, and can be whatever else it wants. Controls where in the TOC it appears. Overwritten by aws-doc-sdk-example when merging. - category: str(upper_start=True, no_end_punc=True, required=False) - # Link to additional topic places. Overwritten by aws-doc-sdk-example when merging. - guide_topic: include('guide_topic', required=False) # TODO Make this a list or a single. - # TODO how to add a language here and require it in sdks_schema. TODO: Keys merged by aws-doc-sdk-example when merging. - languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift')) - # TODO document service_main and services. Not to be used by tributaries. Part of Cross Service. - # List of services used by the examples. Lines up with those in services.yaml. Overwritten by aws-doc-sdk-example when merging. + title: str(required=False, upper_start=True, no_end_punc=True) + title_abbrev: str(required=False, upper_start=True, no_end_punc=True) + synopsis: str(required=False) + synopsis_list: list(str(upper_start=True), required=False) + category: str(required=False, upper_start=True, no_end_punc=True) + guide_topic: include('guide_topic', required=False) + languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift')) service_main: service_name(required=False) - services: map(map(key=str(), required=False), key=service_name()) - synopsis: str(required=False, lower_start=True, end_punc_or_semicolon=True, required=False) - synopsis_list: list(str(upper_start=True, end_punc=True), required=False) + services: map(key=service_name()) -# Used for creating links in the block. guide_topic: title: str(upper_start=True, no_end_punc=True) url: include('doc_url', required=False) -# Language Version configuration. Likely just the single list item. language: versions: list(include('version')) -# Example for a single Language. +# Per-language excerpts for the example. Languages and SDK versions are defined in .doc_gen/metadata/sdk_metadata.yaml version: - sdk_version: int(min=1) - # Additional ZonBook XML to include in the tab for this sample. - block_content: block_content(required=False) - # The specific code samples to include in the example. - excerpts: list(include('excerpt'), required=False) - # Link to the source code for this example. TODO rename. + sdk_version: any(int(min=1), str(check_aws=False)) github: str(required=False) + github_name: str(required=False) github_note_at_bottom: bool(required=False) - add_services: map(key=service_name(), required=False) - # Deprecated. Replace with guide_topic list. sdkguide: include('doc_url', required=False) - # Link to additional topic places. TODO: Overwritten by aws-doc-sdk-example when merging. - more_info: list(include('guide_topic', required=False)) + excerpts: list(include('excerpt'), required=False) + block_content: block_content(required=False) + add_services: map(key=service_name(), required=False) -# One language example can have several excerpts, each having a description block and one or more snippets. -# An excerpt may have either snippet_files OR snippet_tags, but not both. +# The references to code content that will be included in the example's content. excerpt: - description: str(required=False, upper_start=True, end_punc=True) - # A path within the repo to extract the entire file as a snippet. - snippet_files: list(str(), required=False) - # Tags embedded in source files to extract as snippets. + description: str(required=False) + genai: enum('none', 'some', 'most', 'all', required=False) snippet_tags: list(str(), required=False) + snippet_files: list(str(), required=False) service_slug_regex: regex('^[-a-z0-9]+$', name='service slug') doc_url: regex('^(?!https://docs.aws.amazon.com/).+', name="relative documentation URL") diff --git a/aws_doc_sdk_examples_tools/config/example_strict_schema.yaml b/aws_doc_sdk_examples_tools/config/example_strict_schema.yaml index 5acb179..629a058 100644 --- a/aws_doc_sdk_examples_tools/config/example_strict_schema.yaml +++ b/aws_doc_sdk_examples_tools/config/example_strict_schema.yaml @@ -4,13 +4,13 @@ map(include('example'), key=example_id()) --- example: - title: str(upper_start=True, no_end_punc=True) - title_abbrev: str(upper_start=True, no_end_punc=True) + title: str(required=False, upper_start=True, no_end_punc=True) + title_abbrev: str(required=False, upper_start=True, no_end_punc=True) synopsis: str(required=False, lower_start=True, end_punc_or_semicolon=True) synopsis_list: list(str(upper_start=True, end_punc=True), required=False) category: str(required=False, upper_start=True, no_end_punc=True) guide_topic: include('guide_topic', required=False) - languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift')) + languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift')) service_main: service_name(required=False) services: map(map(key=str(), required=False), key=service_name()) @@ -34,6 +34,7 @@ version: # The references to code content that will be included in the example's content. excerpt: description: str(required=False, upper_start=True, end_punc=True) + genai: enum('none', 'some', 'most', 'all', required=False) snippet_tags: list(str(), required=False) snippet_files: list(str(), required=False) diff --git a/aws_doc_sdk_examples_tools/config/sdks_schema.yaml b/aws_doc_sdk_examples_tools/config/sdks_schema.yaml index a841490..1a09528 100644 --- a/aws_doc_sdk_examples_tools/config/sdks_schema.yaml +++ b/aws_doc_sdk_examples_tools/config/sdks_schema.yaml @@ -1,6 +1,6 @@ # Yamale Schema for SDK metadata, which is the sdks.yaml file in the metadata folder. -map(include('sdk'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift')) +map(include('sdk'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift')) --- sdk: property: include('syntax_enum') @@ -30,6 +30,6 @@ title_override: title: str() title_abbrev: str() -syntax_enum: enum('bash', 'cli', 'none', 'cpp', 'go', 'java', 'javascript', 'kotlin', 'csharp', 'php', 'python', 'ruby', 'rust', 'sap-abap', 'sh', 'swift') +syntax_enum: enum('bash', 'cli', 'none', 'cpp', 'go', 'java', 'javascript', 'kotlin', 'csharp', 'php', 'powershell', 'python', 'ruby', 'rust', 'sap-abap', 'sh', 'swift') entity_regex: regex('^&[-_a-zA-Z0-9]+;$', name='valid entity') entity_with_version_regex: regex('^&[-_a-zA-Z0-9]+;', name='valid entity with version') diff --git a/aws_doc_sdk_examples_tools/config/services_schema.yaml b/aws_doc_sdk_examples_tools/config/services_schema.yaml index a998ec9..3b8fa50 100644 --- a/aws_doc_sdk_examples_tools/config/services_schema.yaml +++ b/aws_doc_sdk_examples_tools/config/services_schema.yaml @@ -3,8 +3,8 @@ map(include('service'), key=regex('^[-a-z0-9]+$', name='service slug')) --- service: - long: include('long_entity_regex') - short: include('entity_regex') + long: str() + short: str() sort: regex('^[^&]\\w', name='non-entity') chapter_override: include('chapter_override', required=False) expanded: @@ -16,7 +16,7 @@ service: url: include('doc_url') api_client: service_name(required=False) api_ref: include('doc_url') - version: service_version() + version: str() caveat: str(required=False, upper_start=True, end_punc=True) bundle: service_name(required=False) tags: map(key=enum('product_categories')) diff --git a/aws_doc_sdk_examples_tools/doc_gen.py b/aws_doc_sdk_examples_tools/doc_gen.py index bc6b6a2..0ce6a41 100644 --- a/aws_doc_sdk_examples_tools/doc_gen.py +++ b/aws_doc_sdk_examples_tools/doc_gen.py @@ -270,7 +270,7 @@ def validate(self): service.validate(self.errors) for example in self.examples.values(): example.validate(self.errors, self.root) - validate_metadata(self.root, self.errors) + validate_metadata(self.root, self.validation.strict_titles, self.errors) validate_no_duplicate_api_examples(self.examples.values(), self.errors) validate_snippets( [*self.examples.values()], diff --git a/aws_doc_sdk_examples_tools/metadata_validator.py b/aws_doc_sdk_examples_tools/metadata_validator.py index 9aa91a4..576f3de 100755 --- a/aws_doc_sdk_examples_tools/metadata_validator.py +++ b/aws_doc_sdk_examples_tools/metadata_validator.py @@ -12,6 +12,7 @@ import datetime import os import re +import xml.etree.ElementTree as xml_tree import yaml from dataclasses import dataclass, field from pathlib import Path @@ -122,8 +123,7 @@ def _is_valid(self, value: str): return True valid = True if self.check_aws: - # All occurrences of AWS must be entities or within a word. - valid = len(re.findall("(? bool: + """ + All occurrences of AWS must be entities or within a word or within a programlisting or code or noloc block. + + Count all bare AWS occurrences within accepted XML tags. + Count all bare AWS occurrences overall. + If these counts differ, there's an invalid usage. + """ + xval = value.replace("&", "&") + xtree = xml_tree.fromstring(f"{xval}") + blocks = ( + xtree.findall(".//programlisting") + + xtree.findall(".//code") + + xtree.findall(".//noloc") + ) + aws_in_blocks = 0 + for element in blocks: + aws_in_blocks += len( + re.findall("(? MetadataErrors: +def validate_metadata( + doc_gen_root: Path, strict: bool, errors: MetadataErrors +) -> MetadataErrors: config = Path(__file__).parent / "config" with open(config / "sdks.yaml") as sdks_file: sdks_yaml: Dict[str, Any] = yaml.safe_load(sdks_file) @@ -206,20 +233,28 @@ def validate_metadata(doc_gen_root: Path, errors: MetadataErrors) -> MetadataErr validators[BlockContent.tag] = BlockContent validators[String.tag] = StringExtension - schema_root = Path(__file__).parent / "config" + config_root = Path(__file__).parent / "config" + if strict: + example_schema = "example_strict_schema.yaml" + else: + example_schema = "example_schema.yaml" to_validate = [ # (schema, metadata_glob) - ("sdks_schema.yaml", "sdks.yaml"), - ("services_schema.yaml", "services.yaml"), - # TODO: Switch between strict schema for aws-doc-sdk-examples and loose schema for tributaries - ("example_strict_schema.yaml", "*_metadata.yaml"), + (config_root / "sdks_schema.yaml", config_root, "sdks.yaml"), + (config_root / "services_schema.yaml", config_root, "services.yaml"), + ( + config_root / example_schema, + doc_gen_root / ".doc_gen" / "metadata", + "*_metadata.yaml", + ), ] - for schema, metadata in to_validate: + for schema, meta_root, metadata in to_validate: validate_files( - schema_root / schema, - (doc_gen_root / "metadata").glob(metadata), + schema, + meta_root.glob(metadata), validators, + strict, errors, ) @@ -234,9 +269,12 @@ def main(): help="The folder that contains schema and metadata files.", required=False, ) + parser.add_argument( + "--strict", default=True, help="Use strict schema.", required=False + ) args = parser.parse_args() - errors = validate_metadata(Path(args.doc_gen), MetadataErrors()) + errors = validate_metadata(Path(args.doc_gen), args.strict, MetadataErrors()) if len(errors) == 0: print("Validation succeeded! 👍👍👍") diff --git a/aws_doc_sdk_examples_tools/metadata_validator_test.py b/aws_doc_sdk_examples_tools/metadata_validator_test.py new file mode 100644 index 0000000..45e447c --- /dev/null +++ b/aws_doc_sdk_examples_tools/metadata_validator_test.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path + +import pytest + +from .metadata_errors import MetadataErrors +from .metadata_validator import validate_metadata + + +@pytest.mark.parametrize("strict", [True, False]) +def test_aws_entity_usage(strict): + errors = MetadataErrors() + validate_metadata( + Path(Path(__file__).parent / "test_resources/doc_gen_test"), strict, errors + ) + + e_str = str(errors) + assert "Title has AWS" in e_str + assert "Title Abbrev has AWS" in e_str + assert "Synopsis has AWS" in e_str + assert "Synopsis list has AWS" in e_str + assert "Description has AWS" in e_str + + assert "Title has &AWS;" not in e_str + assert "Title Abbrev has &AWS;" not in e_str + assert "Synopsis programlisting has AWS" not in e_str + assert "Synopsis list code has AWS" not in e_str + assert "Description programlisting has AWS" not in e_str diff --git a/aws_doc_sdk_examples_tools/test_resources/doc_gen_test/.doc_gen/metadata/aws_entity_metadata.yaml b/aws_doc_sdk_examples_tools/test_resources/doc_gen_test/.doc_gen/metadata/aws_entity_metadata.yaml new file mode 100644 index 0000000..b499d21 --- /dev/null +++ b/aws_doc_sdk_examples_tools/test_resources/doc_gen_test/.doc_gen/metadata/aws_entity_metadata.yaml @@ -0,0 +1,38 @@ +sns_EntityFailures: + title: Title has AWS using an &AWS; SDK + title_abbrev: Title Abbrev has AWS in it + synopsis: "Synopsis has AWS in it." + synopsis_list: + - "Synopsis list has AWS in it." + category: Cat + languages: + Java: + versions: + - sdk_version: 1 + github: java/example_code/svc_EntityFailures + sdkguide: + excerpts: + - description: Description has AWS in it. + snippet_tags: + - java.example_code.svc_EntityFailures.Test + services: + sns: +sns_EntitySuccesses: + title: Title has &AWS; using an &AWS; SDK + title_abbrev: Title Abbrev has &AWS; in it + synopsis: "this Synopsis programlisting has AWS in it.." + synopsis_list: + - "Synopsis list code has AWS in it." + category: Cat + languages: + Java: + versions: + - sdk_version: 1 + github: java/example_code/svc_EntityFailures + sdkguide: + excerpts: + - description: This Description programlisting has AWS in it doesn't it. + snippet_tags: + - java.example_code.svc_EntityFailures.Test + services: + sns: