From 58ecee61f181dc2f0ffd9b8f5924cb61e11ae552 Mon Sep 17 00:00:00 2001
From: Laren-AWS <57545972+Laren-AWS@users.noreply.github.com>
Date: Wed, 4 Dec 2024 11:51:10 -0800
Subject: [PATCH] Validator: Re-enable yamale validator. (#124)
* Re-enable yamale validator.
* Update relaxed schema and use it for tributaries by using strict_titles arg to control strict schema.
* Allow bare AWS within code-style XML tags.
* Write unit tests for yamale validation.
---
.../config/example_schema.yaml | 55 +++++---------
.../config/example_strict_schema.yaml | 7 +-
.../config/sdks_schema.yaml | 4 +-
.../config/services_schema.yaml | 6 +-
aws_doc_sdk_examples_tools/doc_gen.py | 2 +-
.../metadata_validator.py | 72 ++++++++++++++-----
.../metadata_validator_test.py | 31 ++++++++
.../metadata/aws_entity_metadata.yaml | 38 ++++++++++
8 files changed, 153 insertions(+), 62 deletions(-)
create mode 100644 aws_doc_sdk_examples_tools/metadata_validator_test.py
create mode 100644 aws_doc_sdk_examples_tools/test_resources/doc_gen_test/.doc_gen/metadata/aws_entity_metadata.yaml
diff --git a/aws_doc_sdk_examples_tools/config/example_schema.yaml b/aws_doc_sdk_examples_tools/config/example_schema.yaml
index df55c42..e9e9099 100644
--- a/aws_doc_sdk_examples_tools/config/example_schema.yaml
+++ b/aws_doc_sdk_examples_tools/config/example_schema.yaml
@@ -1,60 +1,43 @@
# Yamale Schema for example metadata, which is all .yaml files in the metadata folder
# with a _metadata.yaml suffix.
-map(include('example'), key=example_id())
+map(include('example'), key=str())
---
-# An example blocks all the languages together for a single example in a tab list. It is a navigable page on the code examples library. It is the top level "unit" of SoS content. This metadata is merged from tributaries with aws-doc-sdk-examples.
example:
- # Human readable title. TODO: Defaults to slug-to-title of the ID if not provided. Overwritten by aws-doc-sdk-example when merging.
- title: str(upper_start=True, no_end_punc=True, required=False)
- # Used in the TOC. TODO: Defaults to slug-to-title of the ID if not provided. Overwritten by aws-doc-sdk-example when merging.
- title_abbrev: str(upper_start=True, no_end_punc=True, required=False)
- # String label categories. Categories inferred by cross-service with multiple services, and can be whatever else it wants. Controls where in the TOC it appears. Overwritten by aws-doc-sdk-example when merging.
- category: str(upper_start=True, no_end_punc=True, required=False)
- # Link to additional topic places. Overwritten by aws-doc-sdk-example when merging.
- guide_topic: include('guide_topic', required=False) # TODO Make this a list or a single.
- # TODO how to add a language here and require it in sdks_schema. TODO: Keys merged by aws-doc-sdk-example when merging.
- languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
- # TODO document service_main and services. Not to be used by tributaries. Part of Cross Service.
- # List of services used by the examples. Lines up with those in services.yaml. Overwritten by aws-doc-sdk-example when merging.
+ title: str(required=False, upper_start=True, no_end_punc=True)
+ title_abbrev: str(required=False, upper_start=True, no_end_punc=True)
+ synopsis: str(required=False)
+ synopsis_list: list(str(upper_start=True), required=False)
+ category: str(required=False, upper_start=True, no_end_punc=True)
+ guide_topic: include('guide_topic', required=False)
+ languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
service_main: service_name(required=False)
- services: map(map(key=str(), required=False), key=service_name())
- synopsis: str(required=False, lower_start=True, end_punc_or_semicolon=True, required=False)
- synopsis_list: list(str(upper_start=True, end_punc=True), required=False)
+ services: map(key=service_name())
-# Used for creating links in the block.
guide_topic:
title: str(upper_start=True, no_end_punc=True)
url: include('doc_url', required=False)
-# Language Version configuration. Likely just the single list item.
language:
versions: list(include('version'))
-# Example for a single Language.
+# Per-language excerpts for the example. Languages and SDK versions are defined in .doc_gen/metadata/sdk_metadata.yaml
version:
- sdk_version: int(min=1)
- # Additional ZonBook XML to include in the tab for this sample.
- block_content: block_content(required=False)
- # The specific code samples to include in the example.
- excerpts: list(include('excerpt'), required=False)
- # Link to the source code for this example. TODO rename.
+ sdk_version: any(int(min=1), str(check_aws=False))
github: str(required=False)
+ github_name: str(required=False)
github_note_at_bottom: bool(required=False)
- add_services: map(key=service_name(), required=False)
- # Deprecated. Replace with guide_topic list.
sdkguide: include('doc_url', required=False)
- # Link to additional topic places. TODO: Overwritten by aws-doc-sdk-example when merging.
- more_info: list(include('guide_topic', required=False))
+ excerpts: list(include('excerpt'), required=False)
+ block_content: block_content(required=False)
+ add_services: map(key=service_name(), required=False)
-# One language example can have several excerpts, each having a description block and one or more snippets.
-# An excerpt may have either snippet_files OR snippet_tags, but not both.
+# The references to code content that will be included in the example's content.
excerpt:
- description: str(required=False, upper_start=True, end_punc=True)
- # A path within the repo to extract the entire file as a snippet.
- snippet_files: list(str(), required=False)
- # Tags embedded in source files to extract as snippets.
+ description: str(required=False)
+ genai: enum('none', 'some', 'most', 'all', required=False)
snippet_tags: list(str(), required=False)
+ snippet_files: list(str(), required=False)
service_slug_regex: regex('^[-a-z0-9]+$', name='service slug')
doc_url: regex('^(?!https://docs.aws.amazon.com/).+', name="relative documentation URL")
diff --git a/aws_doc_sdk_examples_tools/config/example_strict_schema.yaml b/aws_doc_sdk_examples_tools/config/example_strict_schema.yaml
index 5acb179..629a058 100644
--- a/aws_doc_sdk_examples_tools/config/example_strict_schema.yaml
+++ b/aws_doc_sdk_examples_tools/config/example_strict_schema.yaml
@@ -4,13 +4,13 @@
map(include('example'), key=example_id())
---
example:
- title: str(upper_start=True, no_end_punc=True)
- title_abbrev: str(upper_start=True, no_end_punc=True)
+ title: str(required=False, upper_start=True, no_end_punc=True)
+ title_abbrev: str(required=False, upper_start=True, no_end_punc=True)
synopsis: str(required=False, lower_start=True, end_punc_or_semicolon=True)
synopsis_list: list(str(upper_start=True, end_punc=True), required=False)
category: str(required=False, upper_start=True, no_end_punc=True)
guide_topic: include('guide_topic', required=False)
- languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
+ languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
service_main: service_name(required=False)
services: map(map(key=str(), required=False), key=service_name())
@@ -34,6 +34,7 @@ version:
# The references to code content that will be included in the example's content.
excerpt:
description: str(required=False, upper_start=True, end_punc=True)
+ genai: enum('none', 'some', 'most', 'all', required=False)
snippet_tags: list(str(), required=False)
snippet_files: list(str(), required=False)
diff --git a/aws_doc_sdk_examples_tools/config/sdks_schema.yaml b/aws_doc_sdk_examples_tools/config/sdks_schema.yaml
index a841490..1a09528 100644
--- a/aws_doc_sdk_examples_tools/config/sdks_schema.yaml
+++ b/aws_doc_sdk_examples_tools/config/sdks_schema.yaml
@@ -1,6 +1,6 @@
# Yamale Schema for SDK metadata, which is the sdks.yaml file in the metadata folder.
-map(include('sdk'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
+map(include('sdk'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
---
sdk:
property: include('syntax_enum')
@@ -30,6 +30,6 @@ title_override:
title: str()
title_abbrev: str()
-syntax_enum: enum('bash', 'cli', 'none', 'cpp', 'go', 'java', 'javascript', 'kotlin', 'csharp', 'php', 'python', 'ruby', 'rust', 'sap-abap', 'sh', 'swift')
+syntax_enum: enum('bash', 'cli', 'none', 'cpp', 'go', 'java', 'javascript', 'kotlin', 'csharp', 'php', 'powershell', 'python', 'ruby', 'rust', 'sap-abap', 'sh', 'swift')
entity_regex: regex('^&[-_a-zA-Z0-9]+;$', name='valid entity')
entity_with_version_regex: regex('^&[-_a-zA-Z0-9]+;', name='valid entity with version')
diff --git a/aws_doc_sdk_examples_tools/config/services_schema.yaml b/aws_doc_sdk_examples_tools/config/services_schema.yaml
index a998ec9..3b8fa50 100644
--- a/aws_doc_sdk_examples_tools/config/services_schema.yaml
+++ b/aws_doc_sdk_examples_tools/config/services_schema.yaml
@@ -3,8 +3,8 @@
map(include('service'), key=regex('^[-a-z0-9]+$', name='service slug'))
---
service:
- long: include('long_entity_regex')
- short: include('entity_regex')
+ long: str()
+ short: str()
sort: regex('^[^&]\\w', name='non-entity')
chapter_override: include('chapter_override', required=False)
expanded:
@@ -16,7 +16,7 @@ service:
url: include('doc_url')
api_client: service_name(required=False)
api_ref: include('doc_url')
- version: service_version()
+ version: str()
caveat: str(required=False, upper_start=True, end_punc=True)
bundle: service_name(required=False)
tags: map(key=enum('product_categories'))
diff --git a/aws_doc_sdk_examples_tools/doc_gen.py b/aws_doc_sdk_examples_tools/doc_gen.py
index bc6b6a2..0ce6a41 100644
--- a/aws_doc_sdk_examples_tools/doc_gen.py
+++ b/aws_doc_sdk_examples_tools/doc_gen.py
@@ -270,7 +270,7 @@ def validate(self):
service.validate(self.errors)
for example in self.examples.values():
example.validate(self.errors, self.root)
- validate_metadata(self.root, self.errors)
+ validate_metadata(self.root, self.validation.strict_titles, self.errors)
validate_no_duplicate_api_examples(self.examples.values(), self.errors)
validate_snippets(
[*self.examples.values()],
diff --git a/aws_doc_sdk_examples_tools/metadata_validator.py b/aws_doc_sdk_examples_tools/metadata_validator.py
index 9aa91a4..576f3de 100755
--- a/aws_doc_sdk_examples_tools/metadata_validator.py
+++ b/aws_doc_sdk_examples_tools/metadata_validator.py
@@ -12,6 +12,7 @@
import datetime
import os
import re
+import xml.etree.ElementTree as xml_tree
import yaml
from dataclasses import dataclass, field
from pathlib import Path
@@ -122,8 +123,7 @@ def _is_valid(self, value: str):
return True
valid = True
if self.check_aws:
- # All occurrences of AWS must be entities or within a word.
- valid = len(re.findall("(? bool:
+ """
+ All occurrences of AWS must be entities or within a word or within a programlisting or code or noloc block.
+
+ Count all bare AWS occurrences within accepted XML tags.
+ Count all bare AWS occurrences overall.
+ If these counts differ, there's an invalid usage.
+ """
+ xval = value.replace("&", "&")
+ xtree = xml_tree.fromstring(f"{xval}")
+ blocks = (
+ xtree.findall(".//programlisting")
+ + xtree.findall(".//code")
+ + xtree.findall(".//noloc")
+ )
+ aws_in_blocks = 0
+ for element in blocks:
+ aws_in_blocks += len(
+ re.findall("(? MetadataErrors:
+def validate_metadata(
+ doc_gen_root: Path, strict: bool, errors: MetadataErrors
+) -> MetadataErrors:
config = Path(__file__).parent / "config"
with open(config / "sdks.yaml") as sdks_file:
sdks_yaml: Dict[str, Any] = yaml.safe_load(sdks_file)
@@ -206,20 +233,28 @@ def validate_metadata(doc_gen_root: Path, errors: MetadataErrors) -> MetadataErr
validators[BlockContent.tag] = BlockContent
validators[String.tag] = StringExtension
- schema_root = Path(__file__).parent / "config"
+ config_root = Path(__file__).parent / "config"
+ if strict:
+ example_schema = "example_strict_schema.yaml"
+ else:
+ example_schema = "example_schema.yaml"
to_validate = [
# (schema, metadata_glob)
- ("sdks_schema.yaml", "sdks.yaml"),
- ("services_schema.yaml", "services.yaml"),
- # TODO: Switch between strict schema for aws-doc-sdk-examples and loose schema for tributaries
- ("example_strict_schema.yaml", "*_metadata.yaml"),
+ (config_root / "sdks_schema.yaml", config_root, "sdks.yaml"),
+ (config_root / "services_schema.yaml", config_root, "services.yaml"),
+ (
+ config_root / example_schema,
+ doc_gen_root / ".doc_gen" / "metadata",
+ "*_metadata.yaml",
+ ),
]
- for schema, metadata in to_validate:
+ for schema, meta_root, metadata in to_validate:
validate_files(
- schema_root / schema,
- (doc_gen_root / "metadata").glob(metadata),
+ schema,
+ meta_root.glob(metadata),
validators,
+ strict,
errors,
)
@@ -234,9 +269,12 @@ def main():
help="The folder that contains schema and metadata files.",
required=False,
)
+ parser.add_argument(
+ "--strict", default=True, help="Use strict schema.", required=False
+ )
args = parser.parse_args()
- errors = validate_metadata(Path(args.doc_gen), MetadataErrors())
+ errors = validate_metadata(Path(args.doc_gen), args.strict, MetadataErrors())
if len(errors) == 0:
print("Validation succeeded! 👍👍👍")
diff --git a/aws_doc_sdk_examples_tools/metadata_validator_test.py b/aws_doc_sdk_examples_tools/metadata_validator_test.py
new file mode 100644
index 0000000..45e447c
--- /dev/null
+++ b/aws_doc_sdk_examples_tools/metadata_validator_test.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+
+import pytest
+
+from .metadata_errors import MetadataErrors
+from .metadata_validator import validate_metadata
+
+
+@pytest.mark.parametrize("strict", [True, False])
+def test_aws_entity_usage(strict):
+ errors = MetadataErrors()
+ validate_metadata(
+ Path(Path(__file__).parent / "test_resources/doc_gen_test"), strict, errors
+ )
+
+ e_str = str(errors)
+ assert "Title has AWS" in e_str
+ assert "Title Abbrev has AWS" in e_str
+ assert "Synopsis has AWS" in e_str
+ assert "Synopsis list has AWS" in e_str
+ assert "Description has AWS" in e_str
+
+ assert "Title has &AWS;" not in e_str
+ assert "Title Abbrev has &AWS;" not in e_str
+ assert "Synopsis programlisting has AWS" not in e_str
+ assert "Synopsis list code has AWS" not in e_str
+ assert "Description programlisting has AWS" not in e_str
diff --git a/aws_doc_sdk_examples_tools/test_resources/doc_gen_test/.doc_gen/metadata/aws_entity_metadata.yaml b/aws_doc_sdk_examples_tools/test_resources/doc_gen_test/.doc_gen/metadata/aws_entity_metadata.yaml
new file mode 100644
index 0000000..b499d21
--- /dev/null
+++ b/aws_doc_sdk_examples_tools/test_resources/doc_gen_test/.doc_gen/metadata/aws_entity_metadata.yaml
@@ -0,0 +1,38 @@
+sns_EntityFailures:
+ title: Title has AWS using an &AWS; SDK
+ title_abbrev: Title Abbrev has AWS in it
+ synopsis: "Synopsis has AWS in it."
+ synopsis_list:
+ - "Synopsis list has AWS in it."
+ category: Cat
+ languages:
+ Java:
+ versions:
+ - sdk_version: 1
+ github: java/example_code/svc_EntityFailures
+ sdkguide:
+ excerpts:
+ - description: Description has AWS in it.
+ snippet_tags:
+ - java.example_code.svc_EntityFailures.Test
+ services:
+ sns:
+sns_EntitySuccesses:
+ title: Title has &AWS; using an &AWS; SDK
+ title_abbrev: Title Abbrev has &AWS; in it
+ synopsis: "this Synopsis programlisting has AWS in it.."
+ synopsis_list:
+ - "Synopsis list code has AWS
in it."
+ category: Cat
+ languages:
+ Java:
+ versions:
+ - sdk_version: 1
+ github: java/example_code/svc_EntityFailures
+ sdkguide:
+ excerpts:
+ - description: This Description programlisting has AWS in it doesn't it.
+ snippet_tags:
+ - java.example_code.svc_EntityFailures.Test
+ services:
+ sns: