Materials-Consortia · sauliusg · Jul 15, 2019 · Jul 15, 2019 · Jul 15, 2019 · Jul 20, 2019
diff --git a/optimade.md b/optimade.md
@@ -1287,8 +1287,22 @@ In addition to the standard equality and inequality operators, matching of parti
 
 OPTIONAL features: 
 
+The following comparison operators are OPTIONAL:
+
+* `identfier LIKE x`
-* `identfier LIKE x`
+* `identifier LIKE x`
+
-* `identfier LIKE x`
+* `identifier LIKE x`
+
+
+* `identfier UNLIKE x`
-* `identfier LIKE x`
-
-* `identfier UNLIKE x`
+* `identifier LIKE pattern`: Is true if the property matches the provided `pattern`.
+
+* `identifier UNLIKE x`: Is true if the property does not match the provided `pattern`.
+
-* `identfier UNLIKE x`
+* `identifier UNLIKE x`
+
-* `identfier LIKE x`
-
-* `identfier UNLIKE x`
+* `identifier LIKE pattern`: Is true if the property matches the provided `pattern`.
+
+* `identifier UNLIKE x`: Is true if the property does not match the provided `pattern`.
+
-* `identfier UNLIKE x`
+* `identifier UNLIKE x`
+
+
 * Support for x to be an identifier, rather than a string is OPTIONAL.
 
+If implemented, the "LIKE" operator MUST behave as the correspoding standard SQL operator. In particular,
+The `x` string MUST be interpreted as a pattern where an underscore character ('_', ASCII DEC 95, HEX 5F)
+matches any single character and a percent character ('%', ASCII DEC 37, HEX 25) matches an arbitrary
+sequence of characters (including zero characters).
-If implemented, the "LIKE" operator MUST behave as the correspoding standard SQL operator. In particular,
-The `x` string MUST be interpreted as a pattern where an underscore character ('_', ASCII DEC 95, HEX 5F)
-matches any single character and a percent character ('%', ASCII DEC 37, HEX 25) matches an arbitrary
-sequence of characters (including zero characters).
+If implemented, the `LIKE` operator MUST behave as the corresponding standard SQL operator.
+The `x` string MUST be interpreted as a string-matching pattern where an underscore character ('_', ASCII DEC 95, HEX 5F) matches any single character and a percent character ('%', ASCII DEC 37, HEX 25) matches an arbitrary sequence of characters (including zero characters).
+
-If implemented, the "LIKE" operator MUST behave as the correspoding standard SQL operator. In particular,
-The `x` string MUST be interpreted as a pattern where an underscore character ('_', ASCII DEC 95, HEX 5F)
-matches any single character and a percent character ('%', ASCII DEC 37, HEX 25) matches an arbitrary
-sequence of characters (including zero characters).
+If implemented, the `LIKE` operator MUST behave as the corresponding standard SQL operator.
+The `x` string MUST be interpreted as a string-matching pattern where an underscore character ('_', ASCII DEC 95, HEX 5F) matches any single character and a percent character ('%', ASCII DEC 37, HEX 25) matches an arbitrary sequence of characters (including zero characters).
+
+
+If operator "UNLIKE" is supported, the bahavior of this oprtator MUST be the negation of the "LIKE" operator; i.e.
+an expression `(property UNLIKE "value")" must behave exactly as `(NOT (property LIKE "value"))`.
-an expression `(property UNLIKE "value")" must behave exactly as `(NOT (property LIKE "value"))`.
+expression `(property UNLIKE "value")` must behave exactly as `(NOT (property LIKE "value"))`.
+
-an expression `(property UNLIKE "value")" must behave exactly as `(NOT (property LIKE "value"))`.
+expression `(property UNLIKE "value")` must behave exactly as `(NOT (property LIKE "value"))`.
+
+
 Examples:
 
 * `chemical_formula_anonymous CONTAINS "C2" AND chemical_formula_anonymous STARTS WITH "A2"` 
@@ -2188,7 +2202,13 @@ ValueOpRhs = Operator, Value ;
 
 KnownOpRhs = IS, ( KNOWN | UNKNOWN ) ; 
 
-FuzzyStringOpRhs = CONTAINS, String | STARTS, [ WITH ], String | ENDS, [ WITH ], String ;
+StringProperty = String | Property ;
+
+FuzzyStringOpRhs = CONTAINS, StringProperty |
+                   STARTS, [ WITH ], StringProperty |
+                   ENDS, [ WITH ], StringProperty |
+                   MATCH, ( RegularExpression | StringProperty ) |
+                   NOT, MATCH, ( RegularExpression | StringProperty ) ;
 
 SetOpRhs = HAS, ( [ Operator ], Value | ALL, ValueList | ANY, ValueList | ONLY, ValueList ) ;
 (* Note: support for ONLY in SetOpRhs is OPTIONAL *)
@@ -2236,6 +2256,8 @@ ALL = 'A', 'L', 'L', [Spaces] ;
 ONLY = 'O', 'N', 'L', 'Y', [Spaces] ;
 ANY = 'A', 'N', 'Y', [Spaces] ;
 
+MATCH = 'M', 'A', 'T', 'C', 'H', [Spaces];
+
 (* OperatorComparison operator tokens: *)
 
 Operator = ( '<', [ '=' ] | '>', [ '=' ] | '=' | '!', '=' ), [Spaces] ;
@@ -2262,16 +2284,34 @@ LowercaseLetter =
 
 String = '"', { EscapedChar }, '"', [Spaces] ;
 
-EscapedChar = UnescapedChar | '\', '"' | '\', '\' ;
+UnescapedChar = Letter | Digit | Space | '/' |
+                Punctuator | RegexpMetacharacter |
+                UnicodeHighChar ;
 
-UnescapedChar = Letter | Digit | Space | Punctuator | UnicodeHighChar ;
+EscapedChar = UnescapedChar | '\', '"' | '\', '\' ;
 
 Punctuator =
-    '!' | '#' | '$' | '%' | '&' | "'" | '(' | ')' | '*' | '+' | ',' |
-    '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' |
-    ']' | '^' | '`' | '{' | '|' | '}' | '~'
+    '!' | '#' | '%' | '&' | "'" | ',' |
+    '-' | ':' | ';' | '<' | '=' | '>' | '@' |
+    '`' | '~'
+;
+
+RegexpMetacharacter =
+    '(' | ')' | '[' | ']' | '+' | '*' | '?' | '.' | '{' | '}' |
+    '|' | '^' | '$'
 ;
 
+(* Regular expressions: *)
+
+UnescapedREChar = Letter | Digit | Space | '"' |
+                  Punctuator | RegexpMetacharacter |
+                  UnicodeHighChar ;
+
+EscapedREChar = UnescapedREChar | '\', '/' | '\', '\' |
+                '\', RegexpMetacharacter ;
+
+RegularExpression = '/', { EscapedREChar }, '/', [Spaces] ;
+
 (* BEGIN EBNF GRAMMAR Number *)
 (* Number token syntax: *)
 

diff --git a/tests/cases/Filter_072.inp b/tests/cases/Filter_072.inp
@@ -0,0 +1 @@
+chemical_formula LIKE "H2 O2"
diff --git a/tests/cases/Filter_072.opt b/tests/cases/Filter_072.opt
@@ -0,0 +1 @@
+Filter_001.opt
diff --git a/tests/cases/Filter_073.inp b/tests/cases/Filter_073.inp
@@ -0,0 +1 @@
+chemical_formula NOT LIKE "C6 H12 O6"
diff --git a/tests/cases/Filter_073.opt b/tests/cases/Filter_073.opt
@@ -0,0 +1 @@
+Filter_001.opt
diff --git a/tests/cases/Filter_074.inp b/tests/cases/Filter_074.inp
@@ -0,0 +1 @@
+chemical_formula UNLIKE "C6 H12 O6"
diff --git a/tests/cases/Filter_074.opt b/tests/cases/Filter_074.opt
@@ -0,0 +1 @@
+Filter_001.opt
diff --git a/tests/cases/Filter_075.inp b/tests/cases/Filter_075.inp
@@ -0,0 +1 @@
+property MATCH /"^.?abc+[a-z0-9]*$"/
diff --git a/tests/cases/Filter_075.opt b/tests/cases/Filter_075.opt
@@ -0,0 +1 @@
+Filter_001.opt
diff --git a/tests/cases/Filter_076.inp b/tests/cases/Filter_076.inp
@@ -0,0 +1 @@
+property MATCH "\"^.?abc+[a-z0-9]\\*$\""
diff --git a/tests/cases/Filter_076.opt b/tests/cases/Filter_076.opt
@@ -0,0 +1 @@
+Filter_001.opt
diff --git a/tests/outputs/Filter_019.out b/tests/outputs/Filter_019.out
@@ -56,7 +56,9 @@ Filter(9999)
                   Space(9999)
                     TOKEN_1(9999): " ", line: 1, col: 26
 Error: in tests/cases/Filter_019.inp: line 1:
-    unexpected token "4", expected """
+    unexpected token "4", expected one of """, "a", "b", "c", "d",
+    "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q",
+    "r", "s", "t", "u", "v", "w", "x", "y", "z", or "_"
 
 chemical_formula CONTAINS 42
                           ^
diff --git a/tests/outputs/Filter_020.out b/tests/outputs/Filter_020.out
@@ -56,7 +56,9 @@ Filter(9999)
                   Space(9999)
                     TOKEN_1(9999): " ", line: 1, col: 26
 Error: in tests/cases/Filter_020.inp: line 1:
-    unexpected token "S", expected """
+    unexpected token "S", expected one of """, "a", "b", "c", "d",
+    "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q",
+    "r", "s", "t", "u", "v", "w", "x", "y", "z", or "_"
 
 chemical_formula CONTAINS STARTS "Al"
                           ^
diff --git a/tests/outputs/Filter_021.out b/tests/outputs/Filter_021.out
@@ -55,22 +55,23 @@ Filter(9999)
                 Spaces(9999)
                   Space(9999)
                     TOKEN_1(9999): " ", line: 1, col: 26
-              String(9999)
-                TOKEN_3(9999): """, line: 1, col: 27
-                EscapedChar(9999)
-                  UnescapedChar(9999)
-                    Letter(9999)
-                      UppercaseLetter(9999)
-                        TOKEN_33(9999): "A", line: 1, col: 28
-                EscapedChar(9999)
-                  UnescapedChar(9999)
-                    Letter(9999)
-                      LowercaseLetter(9999)
-                        TOKEN_76(9999): "l", line: 1, col: 29
-                TOKEN_3(9999): """, line: 1, col: 30
-                Spaces(9999)
-                  Space(9999)
-                    TOKEN_1(9999): " ", line: 1, col: 31
+              StringProperty(9999)
+                String(9999)
+                  TOKEN_3(9999): """, line: 1, col: 27
+                  EscapedChar(9999)
+                    UnescapedChar(9999)
+                      Letter(9999)
+                        UppercaseLetter(9999)
+                          TOKEN_33(9999): "A", line: 1, col: 28
+                  EscapedChar(9999)
+                    UnescapedChar(9999)
+                      Letter(9999)
+                        LowercaseLetter(9999)
+                          TOKEN_76(9999): "l", line: 1, col: 29
+                  TOKEN_3(9999): """, line: 1, col: 30
+                  Spaces(9999)
+                    Space(9999)
+                      TOKEN_1(9999): " ", line: 1, col: 31
       AND(9999)
         TOKEN_33(9999): "A", line: 1, col: 32
         TOKEN_46(9999): "N", line: 1, col: 33
@@ -130,22 +131,23 @@ Filter(9999)
                   Spaces(9999)
                     Space(9999)
                       TOKEN_1(9999): " ", line: 1, col: 59
-                String(9999)
-                  TOKEN_3(9999): """, line: 1, col: 60
-                  EscapedChar(9999)
-                    UnescapedChar(9999)
-                      Letter(9999)
-                        UppercaseLetter(9999)
-                          TOKEN_33(9999): "A", line: 1, col: 61
-                  EscapedChar(9999)
-                    UnescapedChar(9999)
-                      Letter(9999)
-                        LowercaseLetter(9999)
-                          TOKEN_76(9999): "l", line: 1, col: 62
-                  TOKEN_3(9999): """, line: 1, col: 63
-                  Spaces(9999)
-                    Space(9999)
-                      TOKEN_1(9999): " ", line: 1, col: 64
+                StringProperty(9999)
+                  String(9999)
+                    TOKEN_3(9999): """, line: 1, col: 60
+                    EscapedChar(9999)
+                      UnescapedChar(9999)
+                        Letter(9999)
+                          UppercaseLetter(9999)
+                            TOKEN_33(9999): "A", line: 1, col: 61
+                    EscapedChar(9999)
+                      UnescapedChar(9999)
+                        Letter(9999)
+                          LowercaseLetter(9999)
+                            TOKEN_76(9999): "l", line: 1, col: 62
+                    TOKEN_3(9999): """, line: 1, col: 63
+                    Spaces(9999)
+                      Space(9999)
+                        TOKEN_1(9999): " ", line: 1, col: 64
         AND(9999)
           TOKEN_33(9999): "A", line: 1, col: 65
           TOKEN_46(9999): "N", line: 1, col: 66
@@ -203,20 +205,21 @@ Filter(9999)
                     Spaces(9999)
                       Space(9999)
                         TOKEN_1(9999): " ", line: 1, col: 90
-                  String(9999)
-                    TOKEN_3(9999): """, line: 1, col: 91
-                    EscapedChar(9999)
-                      UnescapedChar(9999)
-                        Letter(9999)
-                          UppercaseLetter(9999)
-                            TOKEN_33(9999): "A", line: 1, col: 92
-                    EscapedChar(9999)
-                      UnescapedChar(9999)
-                        Letter(9999)
-                          LowercaseLetter(9999)
-                            TOKEN_76(9999): "l", line: 1, col: 93
-                    TOKEN_3(9999): """, line: 1, col: 94
-                    Spaces(9999)
-                      Space(9999)
-                        nl(9999)
-                          SPECIAL_1(9999): "(...)", line: 1, col: 95
+                  StringProperty(9999)
+                    String(9999)
+                      TOKEN_3(9999): """, line: 1, col: 91
+                      EscapedChar(9999)
+                        UnescapedChar(9999)
+                          Letter(9999)
+                            UppercaseLetter(9999)
+                              TOKEN_33(9999): "A", line: 1, col: 92
+                      EscapedChar(9999)
+                        UnescapedChar(9999)
+                          Letter(9999)
+                            LowercaseLetter(9999)
+                              TOKEN_76(9999): "l", line: 1, col: 93
+                      TOKEN_3(9999): """, line: 1, col: 94
+                      Spaces(9999)
+                        Space(9999)
+                          nl(9999)
+                            SPECIAL_1(9999): "(...)", line: 1, col: 95
diff --git a/tests/outputs/Filter_022.out b/tests/outputs/Filter_022.out
@@ -46,7 +46,7 @@ Filter(9999)
                     TOKEN_1(9999): " ", line: 1, col: 18
 Error: in tests/cases/Filter_022.inp: line 1:
     unexpected token "U", expected one of "<", ">", "=", "!", "I",
-    "C", "S", "E", "H", or ":"
+    "C", "S", "E", "M", "N", "H", or ":"
 
 prototype_formula UNKNOWN
                   ^
diff --git a/tests/outputs/Filter_023.out b/tests/outputs/Filter_023.out
@@ -28,7 +28,7 @@ Filter(9999)
                     TOKEN_1(9999): " ", line: 1, col: 9
 Error: in tests/cases/Filter_023.inp: line 1:
     unexpected token "f", expected one of "<", ">", "=", "!", "I",
-    "C", "S", "E", "H", or ":"
+    "C", "S", "E", "M", "N", "H", or ":"
 
 chemical formula IS KNOWN 42
          ^

diff --git a/tests/outputs/Filter_024.out b/tests/outputs/Filter_024.out
@@ -44,7 +44,7 @@ Filter(9999)
                     TOKEN_1(9999): " ", line: 1, col: 17
 Error: in tests/cases/Filter_024.inp: line 1:
     unexpected token "K", expected one of "<", ">", "=", "!", "I",
-    "C", "S", "E", "H", or ":"
+    "C", "S", "E", "M", "N", "H", or ":"
 
 chemical_formula KNOWN
                  ^
diff --git a/tests/outputs/Filter_028.out b/tests/outputs/Filter_028.out
@@ -28,7 +28,7 @@ Filter(9999)
                     TOKEN_1(9999): " ", line: 1, col: 9
 Error: in tests/cases/Filter_028.inp: line 1:
     unexpected token "L", expected one of "<", ">", "=", "!", "I",
-    "C", "S", "E", "H", or ":"
+    "C", "S", "E", "M", "N", "H", or ":"
 
 elements LENGTH 42
          ^
diff --git a/tests/outputs/Filter_059.out b/tests/outputs/Filter_059.out
@@ -57,21 +57,22 @@ Filter(9999)
                 Spaces(9999)
                   Space(9999)
                     TOKEN_1(9999): " ", line: 1, col: 27
-              String(9999)
-                TOKEN_3(9999): """, line: 1, col: 28
-                EscapedChar(9999)
-                  UnescapedChar(9999)
-                    Letter(9999)
-                      UppercaseLetter(9999)
-                        TOKEN_35(9999): "C", line: 1, col: 29
-                EscapedChar(9999)
-                  UnescapedChar(9999)
-                    Digit(9999)
-                      TOKEN_18(9999): "2", line: 1, col: 30
-                TOKEN_3(9999): """, line: 1, col: 31
-                Spaces(9999)
-                  Space(9999)
-                    TOKEN_1(9999): " ", line: 1, col: 32
+              StringProperty(9999)
+                String(9999)
+                  TOKEN_3(9999): """, line: 1, col: 28
+                  EscapedChar(9999)
+                    UnescapedChar(9999)
+                      Letter(9999)
+                        UppercaseLetter(9999)
+                          TOKEN_35(9999): "C", line: 1, col: 29
+                  EscapedChar(9999)
+                    UnescapedChar(9999)
+                      Digit(9999)
+                        TOKEN_18(9999): "2", line: 1, col: 30
+                  TOKEN_3(9999): """, line: 1, col: 31
+                  Spaces(9999)
+                    Space(9999)
+                      TOKEN_1(9999): " ", line: 1, col: 32
       AND(9999)
         TOKEN_33(9999): "A", line: 1, col: 33
         TOKEN_46(9999): "N", line: 1, col: 34
@@ -141,15 +142,16 @@ Filter(9999)
                   Spaces(9999)
                     Space(9999)
                       TOKEN_1(9999): " ", line: 1, col: 66
-                String(9999)
-                  TOKEN_3(9999): """, line: 1, col: 67
-                  EscapedChar(9999)
-                    UnescapedChar(9999)
-                      Letter(9999)
-                        UppercaseLetter(9999)
-                          TOKEN_33(9999): "A", line: 1, col: 68
-                  EscapedChar(9999)
-                    UnescapedChar(9999)
-                      Digit(9999)
-                        TOKEN_18(9999): "2", line: 1, col: 69
-                  TOKEN_3(9999): """, line: 1, col: 70
+                StringProperty(9999)
+                  String(9999)
+                    TOKEN_3(9999): """, line: 1, col: 67
+                    EscapedChar(9999)
+                      UnescapedChar(9999)
+                        Letter(9999)
+                          UppercaseLetter(9999)
+                            TOKEN_33(9999): "A", line: 1, col: 68
+                    EscapedChar(9999)
+                      UnescapedChar(9999)
+                        Digit(9999)
+                          TOKEN_18(9999): "2", line: 1, col: 69
+                    TOKEN_3(9999): """, line: 1, col: 70