Export of internal ZetaSQL changes.

-- Change by ZetaSQL Team <[email protected]>: Add "alwayslink=1" to the target compliance:sql_test_base. -- Change by ZetaSQL Team <[email protected]>: Docs for JSON_KEYS function. -- Change by ZetaSQL Team <[email protected]>: Clarify that INT64 is converted to a string in JavaScript -- Change by ZetaSQL Team <[email protected]>: No public description -- Change by ZetaSQL Team <[email protected]>: No public description -- Change by ZetaSQL Team <[email protected]>: CL to regenerate artifacts. -- Change by ZetaSQL Team <[email protected]>: Add MAP_VALUES_SORTED_BY_KEY function -- Change by ZetaSQL Team <[email protected]>: No public description -- Change by ZetaSQL Team <[email protected]>: No public description -- Change by ZetaSQL Team <[email protected]>: No public description -- Change by ZetaSQL Team <[email protected]>: Add map_keys_sorted, map_keys_unsorted, map_values_sorted, and map_values_unsorted functions. -- Change by ZetaSQL Team <[email protected]>: Update ZetaSQL docs GitOrigin-RevId: d6002ecc05e21d26aef5686de0d2188ff622e244 Change-Id: Ifd9836e733968d7cf6ac4f17d94d3748cd6e95fe
google · Jun 26, 2024 · f6df697 · f6df697
1 parent 46aa917
commit f6df697
Show file tree

Hide file tree

Showing 38 changed files with 1,347 additions and 153 deletions.
diff --git a/docs/functions-and-operators.md b/docs/functions-and-operators.md
@@ -18862,6 +18862,7 @@ behavior:
       <td><a id="accessors"></a>Accessors</td>
       <td>
 
+
         <a href="#json_type"><code>JSON_TYPE</code></a><br>
 
       </td>
@@ -25324,7 +25325,7 @@ PARSE_JSON(json_string_expr[, wide_number_mode=>{ 'exact' | 'round' }])
 
 **Description**
 
-Converts a JSON-formatted `STRING` value to a `JSON` value.
+Converts a JSON-formatted `STRING` value to a [`JSON` value](https://www.json.org/json-en.html).
 
 Arguments:
 
@@ -25395,6 +25396,28 @@ SELECT PARSE_JSON('{"id": 922337203685477580701}', wide_number_mode=>'round') AS
  *------------------------------*/
 ```
 
+You can also use valid JSON-formatted strings that don't represent name/value pairs. For example:
+
+```sql
+SELECT PARSE_JSON('6') AS json_data;
+
+/*------------------------------*
+ | json_data                    |
+ +------------------------------+
+ | 6                            |
+ *------------------------------*/
+```
+
+```sql
+SELECT PARSE_JSON('"red"') AS json_data;
+
+/*------------------------------*
+ | json_data                    |
+ +------------------------------+
+ | "red"                        |
+ *------------------------------*/
+```
+
 ### `STRING` 
 <a id="string_for_json"></a>
 

diff --git a/docs/json_functions.md b/docs/json_functions.md
@@ -249,6 +249,7 @@ behavior:
       <td><a id="accessors"></a>Accessors</td>
       <td>
         
+        
         <a href="#json_type"><code>JSON_TYPE</code></a><br>
         
       </td>
@@ -6711,7 +6712,7 @@ PARSE_JSON(json_string_expr[, wide_number_mode=>{ 'exact' | 'round' }])
 
 **Description**
 
-Converts a JSON-formatted `STRING` value to a `JSON` value.
+Converts a JSON-formatted `STRING` value to a [`JSON` value](https://www.json.org/json-en.html).
 
 Arguments:
 
@@ -6782,6 +6783,28 @@ SELECT PARSE_JSON('{"id": 922337203685477580701}', wide_number_mode=>'round') AS
  *------------------------------*/
 ```
 
+You can also use valid JSON-formatted strings that don't represent name/value pairs. For example:
+
+```sql
+SELECT PARSE_JSON('6') AS json_data;
+
+/*------------------------------*
+ | json_data                    |
+ +------------------------------+
+ | 6                            |
+ *------------------------------*/
+```
+
+```sql
+SELECT PARSE_JSON('"red"') AS json_data;
+
+/*------------------------------*
+ | json_data                    |
+ +------------------------------+
+ | "red"                        |
+ *------------------------------*/
+```
+
 ### `STRING` 
 <a id="string_for_json"></a>
 

diff --git a/docs/resolved_ast.md b/docs/resolved_ast.md
@@ -164,6 +164,7 @@ See that file for comments on specific nodes and fields.
       <a href="#ResolvedDifferentialPrivacyAggregateScan">ResolvedDifferentialPrivacyAggregateScan</a>
     <a href="#ResolvedAnalyticScan">ResolvedAnalyticScan</a>
     <a href="#ResolvedArrayScan">ResolvedArrayScan</a>
+    <a href="#ResolvedBarrierScan">ResolvedBarrierScan</a>
     <a href="#ResolvedExecuteAsRoleScan">ResolvedExecuteAsRoleScan</a>
     <a href="#ResolvedFilterScan">ResolvedFilterScan</a>
     <a href="#ResolvedGroupRowsScan">ResolvedGroupRowsScan</a>
@@ -739,8 +740,28 @@ class ResolvedNonScalarFunctionCallBase : public <a href="#ResolvedFunctionCallB
 
 <p><pre><code class="lang-c++">
 <font color="brown">// An aggregate function call.  The signature always has mode AGGREGATE.
-// This node only ever shows up as the outer function call in a
-// ResolvedAggregateScan::aggregate_list.</font>
+//
+// FEATURE_V_1_4_MULTILEVEL_AGGREGATION enables multi-level aggregate
+// expressions (e.g. &#39;SUM(AVG(1 + X) GROUP BY key)&#39; ). The GROUP BY modifier
+// within an aggregate function body indicates the presence of a multi-level
+// aggregate expression.
+//
+// `group_by_aggregate_list` can only be present if `group_by_list` is
+// present. `group_by_list` and `group_by_aggregate_list` are mutually
+// exclusive with `having_modifier`.
+//
+// If `group_by_list` is empty, then standard column visibility rules apply
+// (i.e. columns supplied by input scan to the enclosing AggregateScan are
+// visible to argument expressions and aggregate function modifiers, as are
+// correlated columns).
+//
+// If `group_by_list` is non-empty, the initial aggregation is applied first,
+// computing the aggregate and grouping columns in `group_by_aggregate_list`
+// `group_by_list`.  Only these computed columns (plus correlated columns)
+// are visible to argument expressions and aggregate function modifiers
+// (e.g. DISTINCT, IGNORE / RESPECT NULLS, LIMIT, ORDER BY). These
+// modifiers are applied on the output rows from the initial aggregation,
+// as input to the final aggregation.</font>
 class ResolvedAggregateFunctionCall : public <a href="#ResolvedNonScalarFunctionCallBase">ResolvedNonScalarFunctionCallBase</a> {
   static const ResolvedNodeKind TYPE = RESOLVED_AGGREGATE_FUNCTION_CALL;
 
@@ -768,6 +789,21 @@ class ResolvedAggregateFunctionCall : public <a href="#ResolvedNonScalarFunction
   // fully-resolved function body in context of the actual concrete
   // types of the arguments provided to the function call.</font>
   const std::shared_ptr&lt;<a href="#ResolvedFunctionCallInfo">ResolvedFunctionCallInfo</a>&gt;&amp; function_call_info() const;
+
+<font color="brown">  // Group the stream of input values by columns in this list, and
+  // compute the aggregates defined in `group_by_aggregate_list`.
+  // Used only for multi-level aggregation, when
+  // FEATURE_V_1_4_MULTILEVEL_AGGREGATION is enabled.</font>
+  const std::vector&lt;std::unique_ptr&lt;const <a href="#ResolvedComputedColumnBase">ResolvedComputedColumnBase</a>&gt;&gt;&amp; group_by_list() const;
+  int group_by_list_size() const;
+  const <a href="#ResolvedComputedColumnBase">ResolvedComputedColumnBase</a>* group_by_list(int i) const;
+
+<font color="brown">  // Aggregate columns to compute over the grouping keys defined in
+  // `group_by_list`. Used only for multi-level aggregation, when
+  // FEATURE_V_1_4_MULTILEVEL_AGGREGATION is enabled.</font>
+  const std::vector&lt;std::unique_ptr&lt;const <a href="#ResolvedComputedColumnBase">ResolvedComputedColumnBase</a>&gt;&gt;&amp; group_by_aggregate_list() const;
+  int group_by_aggregate_list_size() const;
+  const <a href="#ResolvedComputedColumnBase">ResolvedComputedColumnBase</a>* group_by_aggregate_list(int i) const;
 };
 </code></pre></p>
 
@@ -3167,7 +3203,7 @@ class ResolvedCreateIndexStmt : public <a href="#ResolvedCreateStatement">Resolv
 //   [OPTIONS (name=value, ...)]
 //
 //   CREATE [OR REPLACE] [TEMP|TEMPORARY|PUBLIC|PRIVATE] EXTERNAL SCHEMA
-//   [IF NOT EXISTS] &lt;name&gt; WITH CONNECTION &lt;connection&gt;
+//   [IF NOT EXISTS] &lt;name&gt; [WITH CONNECTION] &lt;connection&gt;
 //   OPTIONS (name=value, ...)
 //
 // &lt;option_list&gt; contains engine-specific options associated with the schema</font>
@@ -3209,7 +3245,7 @@ class ResolvedCreateSchemaStmt : public <a href="#ResolvedCreateSchemaStmtBase">
 <p><pre><code class="lang-c++">
 <font color="brown">// This statement:
 // CREATE [OR REPLACE] [TEMP|TEMPORARY|PUBLIC|PRIVATE] EXTERNAL SCHEMA
-// [IF NOT EXISTS] &lt;name&gt; WITH CONNECTION &lt;connection&gt;
+// [IF NOT EXISTS] &lt;name&gt; [WITH CONNECTION] &lt;connection&gt;
 // OPTIONS (name=value, ...)
 //
 // &lt;connection&gt; encapsulates engine-specific metadata used to connect
@@ -4457,9 +4493,9 @@ class ResolvedAnalyticFunctionGroup : public <a href="#ResolvedArgument">Resolve
 
   const <a href="#ResolvedWindowOrdering">ResolvedWindowOrdering</a>* order_by() const;
 
-  const std::vector&lt;std::unique_ptr&lt;const <a href="#ResolvedComputedColumn">ResolvedComputedColumn</a>&gt;&gt;&amp; analytic_function_list() const;
+  const std::vector&lt;std::unique_ptr&lt;const <a href="#ResolvedComputedColumnBase">ResolvedComputedColumnBase</a>&gt;&gt;&amp; analytic_function_list() const;
   int analytic_function_list_size() const;
-  const <a href="#ResolvedComputedColumn">ResolvedComputedColumn</a>* analytic_function_list(int i) const;
+  const <a href="#ResolvedComputedColumnBase">ResolvedComputedColumnBase</a>* analytic_function_list(int i) const;
 };
 </code></pre></p>
 
@@ -5230,7 +5266,7 @@ class ResolvedPrivilege : public <a href="#ResolvedArgument">ResolvedArgument</a
 <font color="brown">// Common superclass of GRANT/REVOKE statements.
 //
 // &lt;privilege_list&gt; is the list of privileges to be granted/revoked. ALL
-// PRIVILEGES should be granted/fromed if it is empty.
+// PRIVILEGES should be granted/revoked if it is empty.
 // &lt;object_type_list&gt; is an optional list of string identifiers, e.g., TABLE,
 // VIEW, MATERIALIZED VIEW.
 // &lt;name_path&gt; is a vector of segments of the object identifier&#39;s pathname.
@@ -7623,3 +7659,27 @@ class ResolvedIdentityColumnInfo : public <a href="#ResolvedArgument">ResolvedAr
 };
 </code></pre></p>
 
+### ResolvedBarrierScan
+<a id="ResolvedBarrierScan"></a>
+
+<p><pre><code class="lang-c++">
+<font color="brown">// ResolvedBarrierScan marks an optimization barrier during query planning.
+// It wraps an `input_scan` and ensures `input_scan` is evaluated as if
+// `input_scan` stands alone; plan transformations that may cause
+// different observable side effects may not cross the optimization barrier.
+//
+// The output rows of a ResolvedBarrierScan are the same as those of the
+// `input_scan`, propagating the `is_ordered` property of `input_scan`.
+//
+// The following optimizations are allowed:
+// * Prune an unused column of a ResolvedBarrierScan.
+// * Prune the whole ResolvedBarrierScan.
+//
+// This node does not have a corresponding syntax.</font>
+class ResolvedBarrierScan : public <a href="#ResolvedScan">ResolvedScan</a> {
+  static const ResolvedNodeKind TYPE = RESOLVED_BARRIER_SCAN;
+
+  const <a href="#ResolvedScan">ResolvedScan</a>* input_scan() const;
+};
+</code></pre></p>
+
diff --git a/docs/user-defined-functions.md b/docs/user-defined-functions.md
@@ -275,20 +275,20 @@ see [Modules][modules].
 
 ### Call a JavaScript UDF
 
-You can call a JavaScript UDF in the same way that you call a built-in
+You can call a JavaScript UDF the same way that you call a built-in
 function. For details, see [Function calls][function-calls].
 
 ### SQL type encodings in JavaScript 
 <a id="javascript_udf_data_types"></a>
 
 [ZetaSQL data types][data-types] represent
-[JavaScript data types][javascript-types] in the following manner:
+[JavaScript data types][javascript-types] as follows:
 
 <table>
   <thead>
     <tr>
-    <th>ZetaSQL<br/> Data Type</th>
-    <th>JavaScript<br/> Data Type</th>
+    <th>ZetaSQL<br/> data type</th>
+    <th>JavaScript<br/> data type</th>
     <th>Notes</th>
     </tr>
   </thead>
@@ -381,12 +381,9 @@ function. For details, see [Function calls][function-calls].
     <tr>
       <td>INT64</td>
       <td>
-        N/A
+        String
       </td>
       <td>
-        INT64 is unsupported as an input type for JavaScript UDFs. Instead,
-        use DOUBLE to represent integer values as a
-        number, or STRING to represent integer values as a string.
       </td>
     </tr>
 

diff --git a/zetasql/analyzer/resolver.h b/zetasql/analyzer/resolver.h
@@ -1853,6 +1853,12 @@ class Resolver {
       const ASTWhereClause* where_clause, const NameScope* from_scan_scope,
       std::unique_ptr<const ResolvedScan>* current_scan);
 
+  // Check the ExprResolutionInfo for an expression resolved in this query,
+  // checking for any features required or not allowed in this SelectForm.
+  absl::Status CheckExprResolutionInfoForQuery(
+      const ASTNode* ast_location, QueryResolutionInfo* query_resolution_info,
+      const ExprResolutionInfo& expr_resolution_info);
+
   // Performs first pass analysis on the SELECT list expressions against the
   // FROM clause. This pass includes star and dot-star expansion, but defers
   // resolution of expressions that use GROUP ROWS or GROUP BY modifiers (see

diff --git a/zetasql/analyzer/resolver_query.cc b/zetasql/analyzer/resolver_query.cc
@@ -3372,6 +3372,13 @@ absl::Status Resolver::ResolveSelectDotStar(
                                           query_resolution_info);
   ZETASQL_RETURN_IF_ERROR(
       ResolveExpr(ast_expr, &expr_resolution_info, &resolved_dotstar_expr));
+
+  // Check for any features required or not allowed in this expression.
+  // This applies the checks from ResolveSelectColumnFirstPass here too
+  // since ResolveSelectDotStar is an early exit in that function.
+  ZETASQL_RETURN_IF_ERROR(CheckExprResolutionInfoForQuery(
+      ast_expr, query_resolution_info, expr_resolution_info));
+
   const Type* source_type = resolved_dotstar_expr->type();
 
   std::unique_ptr<const ResolvedColumnRef> src_column_ref;
@@ -3600,6 +3607,13 @@ absl::Status Resolver::AddColumnFieldsToSelectList(
   return absl::OkStatus();
 }
 
+absl::Status Resolver::CheckExprResolutionInfoForQuery(
+    const ASTNode* ast_location, QueryResolutionInfo* query_resolution_info,
+    const ExprResolutionInfo& expr_resolution_info) {
+
+  return absl::OkStatus();
+}
+
 absl::Status Resolver::ResolveSelectColumnFirstPass(
     const ASTSelectColumn* ast_select_column, const NameScope* from_scan_scope,
     const std::shared_ptr<const NameList>& from_clause_name_list,
@@ -3650,6 +3664,12 @@ absl::Status Resolver::ResolveSelectColumnFirstPass(
   ZETASQL_RETURN_IF_ERROR(ResolveExpr(ast_select_expr, expr_resolution_info.get(),
                               &resolved_expr, inferred_type));
 
+  // Check for any features required or not allowed in this expression.
+  // Note that this also needs to happen inside the ResolveSelectDotStar
+  // call above that exists early.
+  ZETASQL_RETURN_IF_ERROR(CheckExprResolutionInfoForQuery(
+      ast_select_expr, query_resolution_info, *expr_resolution_info));
+
   // We can set is_explicit=true unconditionally because this either came
   // from an AS alias or from a path in the query, or it's an internal name
   // for an anonymous column (that can't be looked up).