Skip to content

Commit

Permalink
Destination: BigQuery (denormalized typed struct) - Could not evaluat…
Browse files Browse the repository at this point in the history
…e union, field is expected to be one of these: NULL, STRING. (#16401)

* Fixed bucket naming for S3

* removed redundant configs

* BigQuery Denormalized: Could not evaluate union on Mongo formatted type

* add test

* fixed BigQuery tests

* updated changelog

* replaced star import

* bump version

* auto-bump connector version [ci skip]

* fixed compilation error

* bump mongodb-strict-encrypt

* auto-bump connector version [ci skip]

* removed unused method

Co-authored-by: Octavia Squidington III <[email protected]>
  • Loading branch information
VitaliiMaltsev and octavia-squidington-iii authored Sep 13, 2022
1 parent 8a126e6 commit 022b8db
Show file tree
Hide file tree
Showing 13 changed files with 70 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
- name: BigQuery (denormalized typed struct)
destinationDefinitionId: 079d5540-f236-4294-ba7c-ade8fd918496
dockerRepository: airbyte/destination-bigquery-denormalized
dockerImageTag: 1.2.0
dockerImageTag: 1.2.1
documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery
icon: bigquery.svg
resourceRequirements:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@
- "overwrite"
- "append"
- "append_dedup"
- dockerImage: "airbyte/destination-bigquery-denormalized:1.2.0"
- dockerImage: "airbyte/destination-bigquery-denormalized:1.2.1"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery"
connectionSpecification:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@
- name: MongoDb
sourceDefinitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e
dockerRepository: airbyte/source-mongodb-v2
dockerImageTag: 0.1.16
dockerImageTag: 0.1.17
documentationUrl: https://docs.airbyte.io/integrations/sources/mongodb-v2
icon: mongodb.svg
sourceType: database
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6356,7 +6356,7 @@
path_in_connector_config:
- "credentials"
- "client_secret"
- dockerImage: "airbyte/source-mongodb-v2:0.1.16"
- dockerImage: "airbyte/source-mongodb-v2:0.1.17"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2"
changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public class MongoUtils {

private static final String MISSING_TYPE = "missing";
private static final String NULL_TYPE = "null";
private static final String AIRBYTE_SUFFIX = "_aibyte_transform";
public static final String AIRBYTE_SUFFIX = "_aibyte_transform";
private static final int DISCOVER_LIMIT = 10000;
private static final String ID = "_id";

Expand Down Expand Up @@ -136,11 +136,12 @@ private static ObjectNode readDocument(final BsonReader reader, final ObjectNode
return jsonNodes;
}

private static void transformToStringIfMarked(final ObjectNode jsonNodes, final List<String> columnNames, final String fieldName) {
public static void transformToStringIfMarked(final ObjectNode jsonNodes, final List<String> columnNames, final String fieldName) {
if (columnNames.contains(fieldName + AIRBYTE_SUFFIX)) {
final JsonNode data = jsonNodes.get(fieldName);
if (data != null) {
jsonNodes.put(fieldName, data.asText());
jsonNodes.remove(fieldName);
jsonNodes.put(fieldName + AIRBYTE_SUFFIX, data.isTextual() ? data.asText() : data.toString());
} else {
LOGGER.debug("WARNING Field list out of sync, Document doesn't contain field: {}", fieldName);
}
Expand Down
43 changes: 43 additions & 0 deletions airbyte-db/db-lib/src/test/java/io/airbyte/db/MongoUtilsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.db;

import static io.airbyte.db.mongodb.MongoUtils.AIRBYTE_SUFFIX;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.airbyte.commons.json.Jsons;
import io.airbyte.db.mongodb.MongoUtils;
import java.util.List;
import org.junit.jupiter.api.Test;

class MongoUtilsTest {

@Test
void testTransformToStringIfMarked() {
final List<String> columnNames = List.of("_id", "createdAt", "connectedWallets", "connectedAccounts_aibyte_transform");
final String fieldName = "connectedAccounts";
final JsonNode node = Jsons.deserialize(
"{\"_id\":\"12345678as\",\"createdAt\":\"2022-11-11 12:13:14\",\"connectedWallets\":\"wallet1\"," +
"\"connectedAccounts\":" +
"{\"google\":{\"provider\":\"google\",\"refreshToken\":\"test-rfrsh-google-token-1\",\"accessToken\":\"test-access-google-token-1\",\"expiresAt\":\"2020-09-01T21:07:00Z\",\"createdAt\":\"2020-09-01T20:07:01Z\"},"
+
"\"figma\":{\"provider\":\"figma\",\"refreshToken\":\"test-rfrsh-figma-token-1\",\"accessToken\":\"test-access-figma-token-1\",\"expiresAt\":\"2020-12-13T22:08:03Z\",\"createdAt\":\"2020-09-14T22:08:03Z\",\"figmaInfo\":{\"teamID\":\"501087711831561793\"}},"
+
"\"slack\":{\"provider\":\"slack\",\"accessToken\":\"test-access-slack-token-1\",\"createdAt\":\"2020-09-01T20:15:07Z\",\"slackInfo\":{\"userID\":\"UM5AD2YCE\",\"teamID\":\"T2VGY5GH5\"}}}}");
assertTrue(node.get(fieldName).isObject());

MongoUtils.transformToStringIfMarked((ObjectNode) node, columnNames, fieldName);

assertNull(node.get(fieldName));
assertNotNull(node.get(fieldName + AIRBYTE_SUFFIX));
assertTrue(node.get(fieldName + AIRBYTE_SUFFIX).isTextual());

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ ENV ENABLE_SENTRY true

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.2.0
LABEL io.airbyte.version=1.2.1
LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Field.Builder;
import com.google.cloud.bigquery.Field.Mode;
import com.google.cloud.bigquery.FieldList;
import com.google.cloud.bigquery.LegacySQLTypeName;
import com.google.cloud.bigquery.QueryParameterValue;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
Expand Down Expand Up @@ -168,6 +170,17 @@ private JsonNode getArrayNode(final FieldList fields, final JsonNode root) {

private JsonNode getObjectNode(final FieldList fields, final JsonNode root) {
final List<String> fieldNames = fields.stream().map(Field::getName).collect(Collectors.toList());

fields.stream()
.filter(f -> f.getType().equals(LegacySQLTypeName.STRING))
.filter(field -> root.get(field.getName()) != null)
.filter(f -> root.get(f.getName()).isObject())
.forEach(f -> {
final String value = root.get(f.getName()).toString();
((ObjectNode) root).remove(f.getName());
((ObjectNode) root).put(f.getName(), new TextNode(value));
});

return Jsons.jsonNode(Jsons.keys(root).stream()
.filter(key -> {
final boolean validKey = fieldNames.contains(namingResolver.getIdentifier(key));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ void testJsonReferenceDefinition() throws Exception {
retrieveRecordsAsJson(USERS_STREAM_NAME).stream().flatMap(x -> extractJsonValues(x, "users").stream()).collect(Collectors.toSet());

final Set<String> expected = Sets.set(
"{\"name\":\"John\",\"surname\":\"Adams\"}",
"\"{\\\"name\\\":\\\"John\\\",\\\"surname\\\":\\\"Adams\\\"}\"",
null // we expect one record to have not had the users field set
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-mongodb-strict-encrypt

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=0.1.9
LABEL io.airbyte.version=0.1.10
LABEL io.airbyte.name=airbyte/source-mongodb-strict-encrypt
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-mongodb-v2

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=0.1.16
LABEL io.airbyte.version=0.1.17
LABEL io.airbyte.name=airbyte/source-mongodb-v2
1 change: 1 addition & 0 deletions docs/integrations/destinations/bigquery.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ Now that you have set up the BigQuery destination connector, check out the follo

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:----------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------|
| 1.2.1 | 2022-09-10 | [16401](https://github.com/airbytehq/airbyte/pull/16401) | Wrapping string objects with TextNode |
| 1.2.0 | 2022-09-09 | [#14023](https://github.com/airbytehq/airbyte/pull/14023) | Cover arrays only if they are nested |
| 1.1.16 | 2022-09-01 | [#16243](https://github.com/airbytehq/airbyte/pull/16243) | Fix Json to Avro conversion when there is field name clash from combined restrictions (`anyOf`, `oneOf`, `allOf` fields) |
| 1.1.15 | 2022-08-03 | [14784](https://github.com/airbytehq/airbyte/pull/14784) | Enabling Application Default Credentials |
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/mongodb-v2.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ For more information regarding configuration parameters, please see [MongoDb Doc

| Version | Date | Pull Request | Subject |
| :--- | :--- | :--- | :--- |
| 0.1.17 | 2022-09-08 | [16401](https://github.com/airbytehq/airbyte/pull/16401) | Fixed bug with empty strings in fields with __aibyte_transform_ |
| 0.1.16 | 2022-08-18 | [14356](https://github.com/airbytehq/airbyte/pull/14356) | DB Sources: only show a table can sync incrementally if at least one column can be used as a cursor field |
| 0.1.15 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors |
| 0.1.14 | 2022-05-05 | [12428](https://github.com/airbytehq/airbyte/pull/12428) | JsonSchema: Add properties to fields with type 'object' |
Expand Down

0 comments on commit 022b8db

Please sign in to comment.