From ed5d1a0b038071b78f1ff6fb4cfe3b597d3eedd7 Mon Sep 17 00:00:00 2001 From: Sebastian Toepfer <61313468+sebastian-toepfer@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:03:53 +0100 Subject: [PATCH] add support for uri format --- .../vocabulary/formatassertion/Formats.java | 2 + .../vocabulary/formatassertion/rfc/Rfcs.java | 53 +++++++++++- .../src/main/resources/rfc/rfc3986 | 81 +++++++++++++++++++ .../formatassertion/FormatsTest.java | 6 ++ 4 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 vocabulary/format-assertion/src/main/resources/rfc/rfc3986 diff --git a/vocabulary/format-assertion/src/main/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/Formats.java b/vocabulary/format-assertion/src/main/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/Formats.java index 73c97e9..ff13d68 100644 --- a/vocabulary/format-assertion/src/main/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/Formats.java +++ b/vocabulary/format-assertion/src/main/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/Formats.java @@ -41,6 +41,8 @@ final class Formats { Map.of(3339, "full-time"), "duration", Map.of(3339, "duration"), + "uri", + Map.of(3986, "URI"), "email", Map.of(5321, "mailbox"), "ipv4", diff --git a/vocabulary/format-assertion/src/main/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/rfc/Rfcs.java b/vocabulary/format-assertion/src/main/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/rfc/Rfcs.java index c0266d5..71edfa6 100644 --- a/vocabulary/format-assertion/src/main/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/rfc/Rfcs.java +++ b/vocabulary/format-assertion/src/main/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/rfc/Rfcs.java @@ -27,7 +27,7 @@ import java.util.Optional; import java.util.regex.Pattern; -@SuppressWarnings({ "java:S5843", "java:S6035" }) +@SuppressWarnings({ "java:S5843", "java:S6035", "java:S5855" }) public final class Rfcs { private static final String DATE = "\\d{4}\\-\\d{2}\\-\\d{2}"; @@ -62,6 +62,57 @@ public final class Rfcs { ) ) ), + Map.entry( + 3986, + Map.of( + "URI", + new RegExRule( + Pattern.compile( + "^([A-Za-z]([A-Za-z0-9]|\\+|\\-|\\.)*)\\:(//(((([-A-Z._a-z0-9]|~)|%[0-9A-Fa-f][0-" + + "9A-Fa-f]|(\\!|\\$|&|'|\\(|\\)|\\*|\\+|,|;|\\=)|\\:)*)@)?((\\[((([0-9A-Fa-f]{1,4}" + + "\\:){6}(([0-9A-Fa-f]{1,4}\\:[0-9A-Fa-f]{1,4})|(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25" + + "[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]" + + "\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5]))|\\:\\:([0-9A-Fa-f]{1,4}" + + "\\:){5}(([0-9A-Fa-f]{1,4}\\:[0-9A-Fa-f]{1,4})|(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25" + + "[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]" + + "\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5]))|([0-9A-Fa-f]{1,4})?\\:" + + "\\:([0-9A-Fa-f]{1,4}\\:){4}(([0-9A-Fa-f]{1,4}\\:[0-9A-Fa-f]{1,4})|(\\d|[1-9]\\d|" + + "1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-" + + "9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5]))|(([" + + "0-9A-Fa-f]{1,4}\\:)?[0-9A-Fa-f]{1,4})?\\:\\:([0-9A-Fa-f]{1,4}\\:){3}(([0-9A-" + + "Fa-f]{1,4}\\:[0-9A-Fa-f]{1,4})|(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[" + + "1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\." + + "(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5]))|(([0-9A-Fa-f]{1,4}\\:){0,2}[0-9A-Fa-f]" + + "{1,4})?\\:\\:([0-9A-Fa-f]{1,4}\\:){2}(([0-9A-Fa-f]{1,4}\\:[0-9A-Fa-f]{1,4})|(\\d" + + "|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])" + + "\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25" + + "[0-5]))|(([0-9A-Fa-f]{1,4}\\:){0,3}[0-9A-Fa-f]{1,4})?\\:\\:[0-9A-Fa-f]{1,4}\\:((" + + "[0-9A-Fa-f]{1,4}\\:[0-9A-Fa-f]{1,4})|(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\." + + "(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-" + + "5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5]))|(([0-9A-Fa-f]{1,4}\\:){0,4}[0-9A" + + "-Fa-f]{1,4})?\\:\\:(([0-9A-Fa-f]{1,4}\\:[0-9A-Fa-f]{1,4})|(\\d|[1-9]\\d|1\\d{2}|" + + "2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1" + + "\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5]))|(([0-9A-Fa" + + "-f]{1,4}\\:){0,5}[0-9A-Fa-f]{1,4})?\\:\\:[0-9A-Fa-f]{1,4}|(([0-9A-Fa-f]{1,4}\\:)" + + "{0,6}[0-9A-Fa-f]{1,4})?\\:\\:)|([Vv][0-9A-Fa-f]+\\.(([-A-Z._a-z0-9]|~)|(\\!|\\$|" + + "&|'|\\(|\\)|\\*|\\+|,|;|\\=)|\\:)+))\\])|(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5]" + + ")\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|2" + + "5[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])|((([-A-Z._a-z0-9]|~)|%[0-9A-F" + + "a-f][0-9A-Fa-f]|(\\!|\\$|&|'|\\(|\\)|\\*|\\+|,|;|\\=))*))(\\:\\d*)?(/(([-A-Z._a-" + + "z0-9]|~)|%[0-9A-Fa-f][0-9A-Fa-f]|(\\!|\\$|&|'|\\(|\\)|\\*|\\+|,|;|\\=)|\\:|@)*)*" + + "|/((([-A-Z._a-z0-9]|~)|%[0-9A-Fa-f][0-9A-Fa-f]|(\\!|\\$|&|'|\\(|\\)|\\*|\\+|,|;|" + + "\\=)|\\:|@)+(/(([-A-Z._a-z0-9]|~)|%[0-9A-Fa-f][0-9A-Fa-f]|(\\!|\\$|&|'|\\(|\\)|" + + "\\*|\\+|,|;|\\=)|\\:|@)*)*)?|(([-A-Z._a-z0-9]|~)|%[0-9A-Fa-f][0-9A-Fa-f]|(\\!|" + + "\\$|&|'|\\(|\\)|\\*|\\+|,|;|\\=)|\\:|@)+(/(([-A-Z._a-z0-9]|~)|%[0-9A-Fa-f][0-9A-" + + "Fa-f]|(\\!|\\$|&|'|\\(|\\)|\\*|\\+|,|;|\\=)|\\:|@)*)*|MISSING-0(([-A-Z._a-z0-9]|" + + "~)|%[0-9A-Fa-f][0-9A-Fa-f]|(\\!|\\$|&|'|\\(|\\)|\\*|\\+|,|;|\\=)|\\:|@))(\\?((((" + + "[-A-Z._a-z0-9]|~)|%[0-9A-Fa-f][0-9A-Fa-f]|(\\!|\\$|&|'|\\(|\\)|\\*|\\+|,|;|\\=)|" + + "\\:|@)|/|\\?)*))?(#(((([-A-Z._a-z0-9]|~)|%[0-9A-Fa-f][0-9A-Fa-f]|(\\!|\\$|&|'|" + + "\\(|\\)|\\*|\\+|,|;|\\=)|\\:|@)|/|\\?)*))?$" + ) + ) + ) + ), Map.entry( 4291, Map.of( diff --git a/vocabulary/format-assertion/src/main/resources/rfc/rfc3986 b/vocabulary/format-assertion/src/main/resources/rfc/rfc3986 new file mode 100644 index 0000000..b72e41e --- /dev/null +++ b/vocabulary/format-assertion/src/main/resources/rfc/rfc3986 @@ -0,0 +1,81 @@ +URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + +hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + +URI-reference = URI / relative-ref + +absolute-URI = scheme ":" hier-part [ "?" query ] + +relative-ref = relative-part [ "?" query ] [ "#" fragment ] + +relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + +scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + +authority = [ userinfo "@" ] host [ ":" port ] +userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +host = IP-literal / IPv4address / reg-name +port = *DIGIT + +IP-literal = "[" ( IPv6address / IPvFuture ) "]" + +IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + +IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + +h16 = 1*4HEXDIG +ls32 = ( h16 ":" h16 ) / IPv4address +IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + +dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + +reg-name = *( unreserved / pct-encoded / sub-delims ) + +path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters + +path-abempty = *( "/" segment ) +path-absolute = "/" [ segment-nz *( "/" segment ) ] +path-noscheme = segment-nz-nc *( "/" segment ) +path-rootless = segment-nz *( "/" segment ) +path-empty = 0 + +segment = *pchar +segment-nz = 1*pchar +segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + +pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + +query = *( pchar / "/" / "?" ) + +fragment = *( pchar / "/" / "?" ) + +pct-encoded = "%" HEXDIG HEXDIG + +unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +reserved = gen-delims / sub-delims +gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" \ No newline at end of file diff --git a/vocabulary/format-assertion/src/test/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/FormatsTest.java b/vocabulary/format-assertion/src/test/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/FormatsTest.java index 0f299e3..d7e9e32 100644 --- a/vocabulary/format-assertion/src/test/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/FormatsTest.java +++ b/vocabulary/format-assertion/src/test/java/io/github/sebastiantoepfer/jsonschema/vocabulary/formatassertion/FormatsTest.java @@ -96,4 +96,10 @@ void should_found_ipv6Format() { //ipv4 only -> invalid! assertThat(new Formats().findByName("ipv6").applyTo("125.158.4589.1"), is(false)); } + + @Test + void should_found_uriformat() { + assertThat(new Formats().findByName("uri").applyTo("http://www.example.com"), is(true)); + assertThat(new Formats().findByName("uri").applyTo("1://noUri"), is(false)); + } }