ethyca
diff --git a/‎CHANGELOG.md
+1 b/‎CHANGELOG.md
+1
diff --git a/‎data/dataset/bigquery_example_test_dataset.yml
+20 b/‎data/dataset/bigquery_example_test_dataset.yml
+20
diff --git a/‎src/fides/api/service/connectors/query_configs/README.md
+166 b/‎src/fides/api/service/connectors/query_configs/README.md
+166
diff --git a/‎src/fides/api/service/connectors/query_configs/bigquery_query_config.py
+46-15 b/‎src/fides/api/service/connectors/query_configs/bigquery_query_config.py
+46-15
@@ -28,6 +28,7 @@ Changes can also be flagged with a GitHub label for tracking purposes. The URL o
 - Attachments storage capabilities (S3 or local) [#5812](https://github.com/ethyca/fides/pull/5812) https://github.com/ethyca/fides/labels/db-migration
 - DB model support for Comments [#5833](https://github.com/ethyca/fides/pull/5833/files) https://github.com/ethyca/fides/labels/db-migration
 - Added UI for configuring website integrations and monitors [#5867](https://github.com/ethyca/fides/pull/5867)
+- Adding support for BigQuery struct updates [#5849](https://github.com/ethyca/fides/pull/5849)
 
 ### Changed
 - Bumped supported Python versions to `3.10.16` and `3.9.21` [#5840](https://github.com/ethyca/fides/pull/5840)
 
@@ -64,6 +64,7 @@ dataset:
             fides_meta:
               identity: email
               data_type: string
+              read_only: True
           - name: custom_id
             data_categories: [user.unique_id]
             fides_meta:
@@ -76,6 +77,24 @@ dataset:
             fides_meta:
               data_type: string
               length: 40
+          - name: tags
+            data_categories: [user]
+            fides_meta:
+              data_type: string[]
+          - name: purchase_history
+            fides_meta:
+              data_type: object[]
+            fields:
+              - name: item_id
+                data_categories: [system.operations]
+              - name: price
+                data_categories: [user.financial]
+              - name: purchase_date
+                data_categories: [system.operations]
+              - name: item_tags
+                data_categories: [user]
+                fides_meta:
+                  data_type: string[]
 
       - name: employee
         fides_meta:
@@ -256,6 +275,7 @@ dataset:
           - name: email
             data_categories: [user.contact.email]
             fides_meta:
+              read_only: true
               identity: email
               data_type: string
           - name: last_visit
 
@@ -0,0 +1,166 @@
+## Understanding the `BigQueryQueryConfig.generate_update` Method
+
+### Example: Handling Nested Data and Arrays
+
+Consider this original row in BigQuery:
+
+```python
+# Original row data
+row = {
+    "user_id": 123,
+    "profile": {
+        "name": "John Doe",
+        "email": "[email protected]",
+        "preferences": {
+            "theme": "dark",
+            "notifications": True
+        },
+        "tags": ["customer", "premium", "active"]
+    },
+    "activity": {
+        "last_login": "2023-05-15",
+        "login_count": 42
+    }
+}
+```
+
+And a masking update:
+
+```python
+update_value_map = {
+    "profile.email": "REDACTED",
+    "profile.preferences.notifications": None,
+    "profile.tags.0": None,  # Null first tag
+    "profile.tags.1": None,  # Null second tag
+    "profile.tags.2": None   # Null third tag
+}
+```
+
+### Step-by-Step Process
+
+#### Step 1: Take `update_value_map` as-is
+```python
+# update_value_map with array indexes
+{
+    "profile.email": "REDACTED",
+    "profile.preferences.notifications": None,
+    "profile.tags.0": None,
+    "profile.tags.1": None,
+    "profile.tags.2": None
+}
+```
+
+#### Step 2: Flatten the row with array indexes
+```python
+flattened_row = {
+    "user_id": 123,
+    "profile.name": "John Doe",
+    "profile.email": "[email protected]",
+    "profile.preferences.theme": "dark",
+    "profile.preferences.notifications": True,
+    "profile.tags.0": "customer",
+    "profile.tags.1": "premium",
+    "profile.tags.2": "active",
+    "activity.last_login": "2023-05-15",
+    "activity.login_count": 42
+}
+```
+
+#### Step 3: Merge `update_value_map` into `flattened_row`
+```python
+merged_dict = {
+    "user_id": 123,
+    "profile.name": "John Doe",
+    "profile.email": "REDACTED",  # From update_value_map
+    "profile.preferences.theme": "dark",
+    "profile.preferences.notifications": None,  # From update_value_map
+    "profile.tags.0": None,  # From update_value_map
+    "profile.tags.1": None,  # From update_value_map
+    "profile.tags.2": None,  # From update_value_map
+    "activity.last_login": "2023-05-15",
+    "activity.login_count": 42
+}
+```
+
+#### Step 4: Unflatten the merged dictionary
+```python
+nested_result = {
+    "user_id": 123,
+    "profile": {
+        "name": "John Doe",
+        "email": "REDACTED",
+        "preferences": {
+            "theme": "dark",
+            "notifications": None
+        },
+        "tags": [None, None, None]  # Reconstructed from indexed entries
+    },
+    "activity": {
+        "last_login": "2023-05-15",
+        "login_count": 42
+    }
+}
+```
+
+#### Step 5: Replace arrays containing only `None` values with empty arrays
+```python
+nested_result_with_arrays_fixed = {
+    "user_id": 123,
+    "profile": {
+        "name": "John Doe",
+        "email": "REDACTED",
+        "preferences": {
+            "theme": "dark",
+            "notifications": None
+        },
+        "tags": []  # Converted from [None, None, None] to empty array
+    },
+    "activity": {
+        "last_login": "2023-05-15",
+        "login_count": 42
+    }
+}
+```
+
+#### Step 6: Only keep top-level keys that are in the `update_value_map`
+```python
+# Top-level keys in update_value_map are "profile" only
+top_level_keys = {"profile"}
+
+final_update_map = {
+    "profile": {
+        "name": "John Doe",
+        "email": "REDACTED",
+        "preferences": {
+            "theme": "dark",
+            "notifications": null
+        },
+        "tags": []  # Empty array
+    }
+}
+```
+
+#### Step 7: Create SQL Update statements
+This generates a SQLAlchemy Update object that would translate to:
+
+```sql
+UPDATE `project_id.dataset_id.table_name`
+SET
+  profile = {
+    "name": "John Doe",
+    "email": "REDACTED",
+    "preferences": {
+      "theme": "dark",
+      "notifications": null
+    },
+    "tags": []
+  }
+WHERE user_id = 123
+```
+
+### Array Handling Notes
+
+- Individual array elements can be targeted using indexed keys like `profile.tags.0`
+- When all elements in an array become `None`, it's automatically converted to an empty array `[]`
+
+This approach ensures that complex nested JSON structures in BigQuery can be properly updated while maintaining their hierarchical structure.
@@ -1,5 +1,6 @@
 from typing import Any, Dict, List, Optional, Union, cast
 
+import pydash
 from fideslang.models import MaskingStrategies
 from loguru import logger
 from sqlalchemy import MetaData, Table, text
@@ -17,7 +18,14 @@
 from fides.api.service.connectors.query_configs.query_config import (
     QueryStringWithoutTuplesOverrideQueryConfig,
 )
-from fides.api.util.collection_util import Row, filter_nonempty_values
+from fides.api.util.collection_util import (
+    Row,
+    filter_nonempty_values,
+    flatten_dict,
+    merge_dicts,
+    replace_none_arrays,
+    unflatten_dict,
+)
 
 
 class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
@@ -120,18 +128,45 @@ def generate_update(
         A List of multiple Update objects are returned for partitioned tables; for a non-partitioned table,
         a single Update object is returned in a List for consistent typing.
 
-        TODO: DRY up this method and `generate_delete` a bit
+        This implementation handles nested fields by grouping them as JSON objects rather than
+        individual field updates.
+
+        See the README.md in this directory for a detailed example of how nested data is handled.
         """
+
+        # 1. Take update_value_map as-is (already flattened)
         update_value_map: Dict[str, Any] = self.update_value_map(row, policy, request)
+
+        # 2. Flatten the row
+        flattened_row = flatten_dict(row)
+
+        # 3. Merge flattened_row with update_value_map (update_value_map takes precedence)
+        merged_dict = merge_dicts(flattened_row, update_value_map)
+
+        # 4. Unflatten the merged dictionary
+        nested_result = unflatten_dict(merged_dict)
+
+        # 5. Replace any arrays containing only None values with empty arrays
+        nested_result = replace_none_arrays(nested_result)  # type: ignore
+
+        # 6. Only keep top-level keys that are in the update_value_map
+        top_level_keys = {key.split(".")[0] for key in update_value_map}
+
+        # Filter the nested result to only include those top-level keys
+        final_update_map = {
+            k: v for k, v in nested_result.items() if k in top_level_keys
+        }
+
+        # Use existing non-empty reference fields mechanism for WHERE clause
         non_empty_reference_field_keys: Dict[str, Field] = filter_nonempty_values(
             {
-                fpath.string_path: fld.cast(row[fpath.string_path])
+                fpath.string_path: fld.cast(pydash.get(row, fpath.string_path))
                 for fpath, fld in self.reference_field_paths.items()
-                if fpath.string_path in row
+                if pydash.get(row, fpath.string_path) is not None
             }
         )
 
-        valid = len(non_empty_reference_field_keys) > 0 and update_value_map
+        valid = len(non_empty_reference_field_keys) > 0 and final_update_map
         if not valid:
             logger.warning(
                 "There is not enough data to generate a valid update statement for {}",
@@ -154,12 +189,12 @@ def generate_update(
                 partitioned_queries.append(
                     table.update()
                     .where(*(where_clauses + [text(partition_clause)]))
-                    .values(**update_value_map)
+                    .values(**final_update_map)
                 )
 
             return partitioned_queries
 
-        return [table.update().where(*where_clauses).values(**update_value_map)]
+        return [table.update().where(*where_clauses).values(**final_update_map)]
 
     def generate_delete(self, row: Row, client: Engine) -> List[Delete]:
         """Returns a List of SQLAlchemy DELETE statements for BigQuery. Does not actually execute the delete statement.
@@ -213,18 +248,14 @@ def format_fields_for_query(
         self,
         field_paths: List[FieldPath],
     ) -> List[str]:
-        """Returns field paths in a format they can be added into SQL queries.
-
+        """
+        Returns field paths in a format they can be added into SQL queries.
         Only returns non-nested fields (fields with exactly one level).
-        Nested fields are skipped with a warning log.
         """
+
         formatted_fields = []
         for field_path in field_paths:
-            if len(field_path.levels) > 1:
-                logger.warning(
-                    f"Skipping nested field '{'.'.join(field_path.levels)}' as nested fields are not supported"
-                )
-            else:
+            if len(field_path.levels) == 1:
                 formatted_fields.append(field_path.levels[0])
         return formatted_fields