diff --git a/README.md b/README.md
index 4655a4f..372b014 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
# Overview
-Following plugins are available in this repository.
+The following plugins are available in this repository.
* Image Extractor Transform
* Document Extractor Transform
diff --git a/checkstyle.xml b/checkstyle.xml
index dca5d9d..48560de 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -55,7 +55,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
-
@@ -115,7 +115,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
-
+
+
- io.cdap.plugin
- hydrator-common
- ${hydrator.version}
+ com.google.protobuf
+ protobuf-java
+ ${google.protobuf.version}
- org.apache.hadoop
- hadoop-common
- ${hadoop.version}
- provided
+ com.google.cloud
+ google-cloud-vision
+ ${google.cloud.vision.version}
- commons-logging
- commons-logging
-
-
- log4j
- log4j
-
-
- org.slf4j
- slf4j-log4j12
-
-
- org.apache.avro
- avro
+ com.google.code.findbugs
+ jsr305
- org.apache.zookeeper
- zookeeper
+ javax.annotation
+ javax.annotation-api
- com.google.guava
- guava
+ org.apache.hbase
+ hbase-shaded-client
- jersey-core
- com.sun.jersey
-
-
- jersey-json
- com.sun.jersey
-
-
- jersey-server
- com.sun.jersey
-
-
- servlet-api
- javax.servlet
-
-
- org.mortbay.jetty
- jetty
-
-
- org.mortbay.jetty
- jetty-util
-
-
- jasper-compiler
- tomcat
-
-
- jasper-runtime
- tomcat
-
-
- jsp-api
- javax.servlet.jsp
-
-
- slf4j-api
- org.slf4j
-
-
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
- ${hadoop.version}
- provided
-
-
- org.slf4j
- slf4j-log4j12
+ org.apache.hbase
+ hbase-shaded-server
- com.google.inject.extensions
- guice-servlet
-
-
- com.sun.jersey
- jersey-core
+ org.apache.avro
+ avro
- com.sun.jersey
- jersey-server
+ com.google.api.grpc
+ proto-google-cloud-vision-v1p1beta1
- com.sun.jersey
- jersey-json
+ com.google.api.grpc
+ proto-google-cloud-vision-v1p3beta1
- com.sun.jersey.contribs
- jersey-guice
+ com.google.api.grpc
+ proto-google-cloud-vision-v1p2beta1
- javax.servlet
- servlet-api
+ com.google.api.grpc
+ proto-google-cloud-vision-v1p4beta1
com.google.guava
guava
- com.google.inject
- guice
+ com.google.protobuf
+ protobuf-java
-
- com.google.protobuf
- protobuf-java
- ${google.protobuf.version}
-
-
- com.google.cloud
- google-cloud-vision
- ${google.cloud.vision.version}
-
com.google.cloud
- google-cloud-vision
- ${google.cloud.vision.version}
-
-
- com.google.guava
- guava
- ${guava.version}
-
-
- com.google.guava
- guava
- ${guava.version}
-
-
- commons-codec
- commons-codec
- ${common.codec.version}
-
-
- com.google.code.gson
- gson
- ${gson.version}
+ google-cloud-storage
+ ${google.cloud.storage.version}
+
+
+
- com.fasterxml.jackson.core
- jackson-databind
- ${jackson.core.version}
+ io.cdap.plugin
+ google-cloud
+ ${google.cloud.version}
+ test
-
io.cdap.cdap
hydrator-test
@@ -309,31 +221,16 @@
${junit.version}
test
-
- org.mockito
- mockito-all
- ${mockito.version}
- test
-
+
org.apache.httpcomponents
httpclient
${httpcomponents.version}
test
-
- com.github.tomakehurst
- wiremock
- ${wiremock.version}
- test
-
-
-
-
-
org.apache.maven.plugins
@@ -352,7 +249,7 @@
-Xmx3g -Djava.awt.headless=true -XX:MaxPermSize=256m -XX:+UseConcMarkSweepGC
-Djava.net.preferIPv4Stack=true
- ${surefire.redirectTestOutputToFile}
+ ${project.surefire.redirectTestOutputToFile}
false
plain
@@ -405,6 +302,7 @@
LICENSE*.txt
+ **/*.txt
*.rst
*.md
**/*.cdap
@@ -458,7 +356,7 @@
com.puppycrawl.tools
checkstyle
- 8.18
+ 6.19
@@ -482,6 +380,29 @@
+
+ org.apache.maven.plugins
+ maven-enforcer-plugin
+ 3.0.0-M3
+
+
+ enforce-version
+
+ enforce
+
+
+
+
+
+
+ log4j:log4j:[0.0,1.2.17)
+
+
+
+
+
+
+
org.apache.felix
maven-bundle-plugin
@@ -490,7 +411,22 @@
<_exportcontents>
- io.cdap.plugin.cloud.vision.*
+ com.google.api,com.google.api.client.*,com.google.api.core,com.google.api.gax.batching,
+ com.google.api.gax.core,com.google.api.gax.grpc,com.google.api.gax.longrunning,
+ com.google.api.gax.paging,com.google.api.gax.retrying,com.google.api.gax.rpc,
+ com.google.api.gax.tracing,com.google.api.resourcenames,
+ com.google.api.services.storage.model,com.google.auth.*,com.google.cloud,
+ com.google.cloud.http,com.google.cloud.spi,com.google.cloud.storage,
+ com.google.cloud.storage.spi,com.google.cloud.storage.spi.v1,com.google.cloud.vision.v1.*,
+ com.google.common.base,com.google.common.collect,com.google.common.graph,
+ com.google.common.hash,com.google.common.io,com.google.common.util.concurrent,
+ com.google.common.util.concurrent.internal,com.google.errorprone.annotations,
+ com.google.iam.v1,com.google.longrunning,com.google.longrunning.stub,com.google.protobuf.*,
+ com.google.rpc,com.google.type,io.cdap.plugin.cloud.vision.*,io.grpc,io.grpc.stub,
+ io.opencensus.common,io.opencensus.trace,io.opencensus.trace.config,
+ io.opencensus.trace.export,io.opencensus.trace.propagation,
+ org.checkerframework.checker.nullness.compatqual,org.checkerframework.checker.nullness.qual,
+ org.checkerframework.framework.qual,org.threeten.bp,org.threeten.bp.*
*;inline=false;scope=compile
true
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConfig.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConstants.java b/src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConstants.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/CredentialsHelper.java b/src/main/java/io/cdap/plugin/cloud/vision/CredentialsHelper.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java b/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java
old mode 100644
new mode 100755
index 3c9e84d..64a14eb
--- a/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java
@@ -19,40 +19,46 @@
/**
* Cloud Vision Action constants.
*/
-public class ActionConstants {
+public final class ActionConstants {
/**
* Configuration property name used to specify source path.
*/
public static final String SOURCE_PATH = "sourcePath";
-
/**
* Configuration property name used to specify destination path.
*/
public static final String DESTINATION_PATH = "destinationPath";
-
/**
* Configuration property name used to specify batch size.
*/
public static final String BATCH_SIZE = "batchSize";
-
/**
* Configuration property name used to specify the features to extract from images.
*/
public static final String FEATURES = "features";
-
/**
* Configuration property name used to specify optional hints.
*/
public static final String LANGUAGE_HINTS = "languageHints";
-
/**
* Configuration property name used to specify aspect ratios.
*/
public static final String ASPECT_RATIOS = "aspectRatios";
-
/**
* Configuration property name used to specify includeGeoResults.
*/
public static final String INCLUDE_GEO_RESULTS = "includeGeoResults";
+ /**
+ * Maximum number of Images that can be send at a time to the Cloud Vision API
+ *
+ * @see bath information
+ */
+ public static final int MAX_NUMBER_OF_IMAGES_PER_BATCH = 2000;
+
+ /**
+ * Prevent instantiation.
+ */
+ private ActionConstants() {
+ }
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/GcsBucketHelper.java b/src/main/java/io/cdap/plugin/cloud/vision/action/GcsBucketHelper.java
new file mode 100644
index 0000000..f9b2a77
--- /dev/null
+++ b/src/main/java/io/cdap/plugin/cloud/vision/action/GcsBucketHelper.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2019 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package io.cdap.plugin.cloud.vision.action;
+
+import com.google.auth.Credentials;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.Bucket;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * Helper class used to retrieve a list of Blobs from a GCSPath.
+ */
+
+public class GcsBucketHelper {
+ /**
+ * Helper function that returns a List of Blobs that are found in a given path on GCS.
+ *
+ * @param sourceFolderPath
+ * @param credentials
+ * @return List of Blobs
+ */
+ public static List getAllFilesInPath(String sourceFolderPath, Credentials credentials) {
+ List results = new ArrayList<>();
+
+ Storage storage = StorageOptions.newBuilder().setCredentials(credentials)
+ .build().getService();
+
+ // Loop through the buckets
+ for (Bucket currentBucket : storage.list().iterateAll()) {
+ // Loop though the blobs and check if the path is what we want, based on sourceFolderPath
+ for (Blob blob : currentBucket.list().iterateAll()) {
+ if (blob.getName().endsWith("/")) { // It's a "folder"
+ continue; // Ignore
+ }
+ // Rebuild the full path of the blob
+ String fullBlobPath = "gs://" + blob.getBucket() + "/" + blob.getName();
+ // Is the blob part of the path we have been given?
+ if (fullBlobPath.startsWith(sourceFolderPath)) {
+ results.add(blob);
+ }
+ }
+ }
+ return results;
+ }
+}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorAction.java b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorAction.java
old mode 100644
new mode 100755
index a8da8a3..ac5e803
--- a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorAction.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorAction.java
@@ -18,6 +18,7 @@
import com.google.api.gax.core.FixedCredentialsProvider;
import com.google.auth.Credentials;
+import com.google.cloud.storage.Blob;
import com.google.cloud.vision.v1.AnnotateImageRequest;
import com.google.cloud.vision.v1.AsyncBatchAnnotateImagesRequest;
import com.google.cloud.vision.v1.CropHintsParams;
@@ -39,23 +40,31 @@
import io.cdap.cdap.etl.api.action.Action;
import io.cdap.cdap.etl.api.action.ActionContext;
import io.cdap.plugin.cloud.vision.CredentialsHelper;
-
-import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nullable;
+import static io.cdap.plugin.cloud.vision.action.ActionConstants.MAX_NUMBER_OF_IMAGES_PER_BATCH;
/**
* Action that runs offline image extractor.
*/
@Plugin(type = Action.PLUGIN_TYPE)
-@Name(OfflineImageExtractorAction.NAME)
+@Name(OfflineImageExtractorAction.PLUGIN_NAME)
@Description("Action that runs offline image extractor.")
public class OfflineImageExtractorAction extends Action {
- public static final String NAME = "OfflineImageExtractor";
-
+ public static final String PLUGIN_NAME = "OfflineImageExtractor";
private final OfflineImageExtractorActionConfig config;
+ private static final Logger LOG = LoggerFactory.getLogger(OfflineImageExtractorAction.class);
public OfflineImageExtractorAction(OfflineImageExtractorActionConfig config) {
+ if (config.getSourcePath() != null) {
+ config.setSourcePath(config.getSourcePath().trim()); // Remove whitespace
+ }
+ if (config.getDestinationPath() != null) {
+ config.setDestinationPath(config.getDestinationPath().trim()); // Remove whitespace
+ }
this.config = config;
}
@@ -63,6 +72,7 @@ public OfflineImageExtractorAction(OfflineImageExtractorActionConfig config) {
public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
FailureCollector collector = pipelineConfigurer.getStageConfigurer().getFailureCollector();
config.validate(collector);
+ collector.getOrThrowException();
}
@Override
@@ -73,53 +83,88 @@ public void run(ActionContext actionContext) throws Exception {
Credentials credentials = CredentialsHelper.getCredentials(config.getServiceFilePath());
- ImageAnnotatorSettings imageAnnotatorSettings = ImageAnnotatorSettings.newBuilder()
- .setCredentialsProvider(FixedCredentialsProvider.create(credentials))
+ // Destination in GCS where the results will be stored
+ String destinationPath = config.getDestinationPath();
+ // Add a '/' at the end if it's not already there
+ if (!destinationPath.endsWith("/")) {
+ destinationPath += "/";
+ }
+ GcsDestination gcsDestination = GcsDestination.newBuilder()
+ .setUri(destinationPath)
.build();
- try (ImageAnnotatorClient imageAnnotatorClient = ImageAnnotatorClient.create(imageAnnotatorSettings)) {
-
- ImageSource source = ImageSource.newBuilder().setImageUri(config.getSourcePath()).build();
- Image image = Image.newBuilder().setSource(source).build();
+ OutputConfig outputConfig = OutputConfig.newBuilder()
+ .setGcsDestination(gcsDestination)
+ .setBatchSize(config.getBatchSizeValue())
+ .build();
- List features =
- Arrays.asList(Feature.newBuilder().setType(config.getImageFeature().getFeatureType()).build());
- AnnotateImageRequest.Builder builder =
- AnnotateImageRequest.newBuilder().setImage(image).addAllFeatures(features);
+ ImageAnnotatorSettings imageAnnotatorSettings = ImageAnnotatorSettings.newBuilder()
+ .setCredentialsProvider(FixedCredentialsProvider.create(credentials))
+ .build();
- ImageContext imageContext = getImageContext();
- if (imageContext != null) {
- builder.setImageContext(imageContext);
- }
+ // Get all the blobs in the source path
+ List blobs = GcsBucketHelper.getAllFilesInPath(config.getSourcePath(), credentials);
+ if (blobs.isEmpty()) {
+ LOG.warn("Nothing found to process in path: " + config.getSourcePath());
+ return;
+ }
- AnnotateImageRequest requestsElement = builder.build();
- List requests = Arrays.asList(requestsElement);
- GcsDestination gcsDestination = GcsDestination.newBuilder().setUri(config.getDestinationPath()).build();
+ // Prepare the list of requests
+ List imageRequests = new ArrayList<>(blobs.size());
- // The max number of responses to output in each JSON file
- int batchSize = config.getBatchSizeValue();
- OutputConfig outputConfig =
- OutputConfig.newBuilder()
- .setGcsDestination(gcsDestination)
- .setBatchSize(batchSize)
- .build();
+ // Feature we are going to ask for
+ Feature feature = Feature.newBuilder()
+ .setType(config.getImageFeature().getFeatureType())
+ .build();
- AsyncBatchAnnotateImagesRequest asyncRequest =
- AsyncBatchAnnotateImagesRequest.newBuilder()
- .addAllRequests(requests)
+ try (ImageAnnotatorClient imageAnnotatorClient = ImageAnnotatorClient.create(imageAnnotatorSettings)) {
+ // Create batches of images to send for processing
+ // We need to do this because there is a limit on the vision API that will raise an error if there are more
+ // than MAX_NUMBER_OF_IMAGES_PER_BATCH in a single batch
+ for (int batchId = 0;
+ batchId < (1 + blobs.size() / MAX_NUMBER_OF_IMAGES_PER_BATCH);
+ batchId++) {
+ for (int index = batchId * MAX_NUMBER_OF_IMAGES_PER_BATCH;
+ (index < (batchId + 1) * MAX_NUMBER_OF_IMAGES_PER_BATCH) && (index < blobs.size());
+ index++) {
+ Blob blob = blobs.get(index);
+ // Rebuild the full path of the blob
+ String fullBlobPath = "gs://" + blob.getBucket() + "/" + blob.getName();
+
+ ImageSource imageSource = ImageSource.newBuilder().setImageUri(fullBlobPath).build();
+
+ Image image = Image.newBuilder().setSource(imageSource).build();
+
+ AnnotateImageRequest.Builder builder =
+ AnnotateImageRequest.newBuilder().setImage(image).addFeatures(feature);
+
+ ImageContext imageContext = getImageContext();
+ if (imageContext != null) {
+ builder.setImageContext(imageContext);
+ }
+
+ AnnotateImageRequest annotateImageRequest = builder.build();
+ imageRequests.add(annotateImageRequest);
+ }
+
+ // Send the requests
+ AsyncBatchAnnotateImagesRequest asyncRequest = AsyncBatchAnnotateImagesRequest.newBuilder()
+ .addAllRequests(imageRequests)
.setOutputConfig(outputConfig)
.build();
- imageAnnotatorClient.asyncBatchAnnotateImagesAsync(asyncRequest)
- .getInitialFuture()
- .get();
+ // Wait for the future to complete
+ imageAnnotatorClient.asyncBatchAnnotateImagesAsync(asyncRequest)
+ .getInitialFuture()
+ .get();
+ }
} catch (Exception exception) {
throw new IllegalStateException(exception);
}
}
@Nullable
- private ImageContext getImageContext() {
+ protected ImageContext getImageContext() {
switch (config.getImageFeature()) {
case TEXT:
return Strings.isNullOrEmpty(config.getLanguageHints()) ? null
@@ -138,5 +183,4 @@ private ImageContext getImageContext() {
return null;
}
}
-
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfig.java
old mode 100644
new mode 100755
index 9fa674c..89cddd1
--- a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfig.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfig.java
@@ -24,7 +24,6 @@
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.plugin.cloud.vision.CloudVisionConstants;
import io.cdap.plugin.cloud.vision.transform.ImageFeature;
-
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@@ -33,7 +32,7 @@
import javax.annotation.Nullable;
/**
- * Configuration for OfflineImageExtractorAction
+ * Configuration for OfflineImageExtractorAction.
*/
public class OfflineImageExtractorActionConfig extends PluginConfig {
@@ -86,8 +85,17 @@ public OfflineImageExtractorActionConfig(@Nullable String serviceFilePath, Strin
@Nullable Boolean includeGeoResults, String sourcePath,
String destinationPath, @Nullable String batchSize) {
this.serviceFilePath = serviceFilePath;
+
+ if (sourcePath != null) {
+ sourcePath = sourcePath.trim();
+ }
this.sourcePath = sourcePath;
+
+ if (destinationPath != null) {
+ destinationPath = destinationPath.trim();
+ }
this.destinationPath = destinationPath;
+
this.features = features;
this.batchSize = batchSize;
this.languageHints = languageHints;
@@ -97,8 +105,8 @@ public OfflineImageExtractorActionConfig(@Nullable String serviceFilePath, Strin
private OfflineImageExtractorActionConfig(Builder builder) {
serviceFilePath = builder.serviceFilePath;
- sourcePath = builder.sourcePath;
- destinationPath = builder.destinationPath;
+ sourcePath = builder.sourcePath.trim();
+ destinationPath = builder.destinationPath.trim();
features = builder.features;
batchSize = builder.batchSize;
languageHints = builder.languageHints;
@@ -106,10 +114,21 @@ private OfflineImageExtractorActionConfig(Builder builder) {
includeGeoResults = builder.includeGeoResults;
}
+ /**
+ * Getter to retrieve a Builder object.
+ *
+ * @return a Builder object.
+ */
public static Builder builder() {
return new Builder();
}
+ /**
+ * Helper function to get a Builder object based on an existing configuration.
+ *
+ * @param copy Configuration object to use as the source to copy from.
+ * @return Builer object.
+ */
public static Builder builder(OfflineImageExtractorActionConfig copy) {
return builder()
.setServiceFilePath(copy.getServiceFilePath())
@@ -175,6 +194,12 @@ public boolean getIncludeGeoResults() {
return includeGeoResults != null ? includeGeoResults : false;
}
+ /**
+ * Slit a comma separated list of properties and turn them into a List.
+ *
+ * @param property Comma separated list of properties.
+ * @return A list of strings after splitting on commas.
+ */
private List convertPropertyToList(String property) {
if (!Strings.isNullOrEmpty(property)) {
return Arrays.asList(property.split(","));
@@ -183,6 +208,11 @@ private List convertPropertyToList(String property) {
}
}
+ /**
+ * Validate that the configuration is correct. If not, use the FailureCollector object passed to report errors.
+ *
+ * @param collector FailureCollector object to use to report errors.
+ */
public void validate(FailureCollector collector) {
ImageFeature feature = getImageFeature();
if (feature == null) {
@@ -196,7 +226,7 @@ public void validate(FailureCollector collector) {
batch = Integer.parseInt(batchSize);
} catch (NumberFormatException e) {
collector.addFailure(String.format("Incorrect value '%s' for Batch Size.", batchSize),
- "Provide correct value.")
+ "Provide correct value.")
.withConfigProperty(ActionConstants.BATCH_SIZE);
return;
}
@@ -219,8 +249,16 @@ public void validate(FailureCollector collector) {
}
}
+ public void setSourcePath(String sourcePath) {
+ this.sourcePath = sourcePath;
+ }
+
+ public void setDestinationPath(String destinationPath) {
+ this.destinationPath = destinationPath;
+ }
+
/**
- * Builder for creating a {@link OfflineImageExtractorActionConfig}
+ * Builder for creating a {@link OfflineImageExtractorActionConfig}.
*/
public static final class Builder {
@Nullable
@@ -241,12 +279,20 @@ private Builder() {
}
public Builder setSourcePath(String sourcePath) {
- this.sourcePath = sourcePath;
+ if (sourcePath != null) {
+ this.sourcePath = sourcePath.trim();
+ } else {
+ this.sourcePath = sourcePath;
+ }
return this;
}
public Builder setDestinationPath(String destinationPath) {
- this.destinationPath = destinationPath;
+ if (destinationPath != null) {
+ this.destinationPath = destinationPath.trim();
+ } else {
+ this.destinationPath = destinationPath;
+ }
return this;
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorAction.java b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorAction.java
new file mode 100644
index 0000000..6d9ea7a
--- /dev/null
+++ b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorAction.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright © 2019 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package io.cdap.plugin.cloud.vision.action;
+
+import com.google.api.gax.core.FixedCredentialsProvider;
+import com.google.auth.Credentials;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.vision.v1.AsyncAnnotateFileRequest;
+import com.google.cloud.vision.v1.AsyncBatchAnnotateFilesRequest;
+import com.google.cloud.vision.v1.Feature;
+import com.google.cloud.vision.v1.GcsDestination;
+import com.google.cloud.vision.v1.GcsSource;
+import com.google.cloud.vision.v1.ImageAnnotatorClient;
+import com.google.cloud.vision.v1.ImageAnnotatorSettings;
+import com.google.cloud.vision.v1.ImageContext;
+import com.google.cloud.vision.v1.InputConfig;
+import com.google.cloud.vision.v1.OutputConfig;
+import io.cdap.cdap.api.annotation.Description;
+import io.cdap.cdap.api.annotation.Name;
+import io.cdap.cdap.api.annotation.Plugin;
+import io.cdap.cdap.etl.api.FailureCollector;
+import io.cdap.cdap.etl.api.PipelineConfigurer;
+import io.cdap.cdap.etl.api.action.Action;
+import io.cdap.cdap.etl.api.action.ActionContext;
+import io.cdap.plugin.cloud.vision.CredentialsHelper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+import static io.cdap.plugin.cloud.vision.action.ActionConstants.MAX_NUMBER_OF_IMAGES_PER_BATCH;
+
+/**
+ * Action that runs offline document text extractor.
+ */
+@Plugin(type = Action.PLUGIN_TYPE)
+@Name(OfflineTextExtractorAction.PLUGIN_NAME)
+@Description("Action that runs offline document text extractor")
+public class OfflineTextExtractorAction extends Action {
+ public static final String PLUGIN_NAME = "OfflineTextExtractor";
+ private final OfflineTextExtractorActionConfig config;
+ private static final Logger LOG = LoggerFactory.getLogger(OfflineTextExtractorAction.class);
+
+ public OfflineTextExtractorAction(OfflineTextExtractorActionConfig config) {
+ if (config.getSourcePath() != null) {
+ config.setSourcePath(config.getSourcePath().trim()); // Remove whitespace
+ }
+ if (config.getDestinationPath() != null) {
+ config.setDestinationPath(config.getDestinationPath().trim());
+ }
+ this.config = config;
+ }
+
+ @Override
+ public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
+ FailureCollector collector = pipelineConfigurer.getStageConfigurer().getFailureCollector();
+ config.validate(collector);
+ collector.getOrThrowException();
+ }
+
+ @Override
+ public void run(ActionContext actionContext) throws Exception {
+ FailureCollector collector = actionContext.getFailureCollector();
+ config.validate(collector);
+ collector.getOrThrowException();
+
+ Credentials credentials = CredentialsHelper.getCredentials(config.getServiceAccountFilePath());
+
+ // Destination in GCS where the results will be stored
+ String destinationPath = config.getDestinationPath();
+ // Add a / at the end if it's not already there
+ if (!destinationPath.endsWith("/")) {
+ destinationPath += "/";
+ }
+ GcsDestination gcsDestination = GcsDestination.newBuilder()
+ .setUri(destinationPath)
+ .build();
+
+ OutputConfig outputConfig = OutputConfig.newBuilder()
+ .setBatchSize(config.getBatchSize())
+ .setGcsDestination(gcsDestination)
+ .build();
+
+ ImageAnnotatorSettings imageAnnotatorSettings = ImageAnnotatorSettings.newBuilder()
+ .setCredentialsProvider(FixedCredentialsProvider.create(credentials))
+ .build();
+
+ // Get all the blobs in the source path
+ List blobs = GcsBucketHelper.getAllFilesInPath(config.getSourcePath(), credentials);
+ if (blobs.isEmpty()) {
+ LOG.warn("Nothing found to process in path: " + config.getSourcePath());
+ return;
+ }
+
+ // Prepare the list of requests
+ List requests = new ArrayList<>(blobs.size());
+
+ // Feature we are going to ask for
+ Feature feature = Feature.newBuilder()
+ .setType(Feature.Type.DOCUMENT_TEXT_DETECTION)
+ .build();
+
+ try (ImageAnnotatorClient client = ImageAnnotatorClient.create(imageAnnotatorSettings)) {
+ // Create batches of images to send for processing
+ // We need to do this because there is a limit on the vision API that will raise an error if there are more
+ // than MAX_NUMBER_OF_IMAGES_PER_BATCH in a single batch
+ for (int batchId = 0;
+ batchId < (1 + blobs.size() / MAX_NUMBER_OF_IMAGES_PER_BATCH);
+ batchId++) {
+ for (int index = batchId * MAX_NUMBER_OF_IMAGES_PER_BATCH;
+ (index < (batchId + 1) * MAX_NUMBER_OF_IMAGES_PER_BATCH) && (index < blobs.size());
+ index++) {
+ Blob blob = blobs.get(index);
+ // Rebuild the full path of the blob
+ String fullBlobPath = "gs://" + blob.getBucket() + "/" + blob.getName();
+
+ GcsSource gcsSource = GcsSource.newBuilder()
+ .setUri(fullBlobPath)
+ .build();
+
+ InputConfig inputConfig = InputConfig.newBuilder()
+ .setMimeType(config.getMimeType())
+ .setGcsSource(gcsSource)
+ .build();
+
+ ImageContext context = ImageContext.newBuilder()
+ .addAllLanguageHints(config.getLanguageHintsList())
+ .build();
+
+ AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder()
+ .addFeatures(feature)
+ .setImageContext(context)
+ .setInputConfig(inputConfig)
+ .setOutputConfig(outputConfig)
+ .build();
+
+ // Add this request to the list
+ requests.add(request);
+ }
+
+ // Send the requests
+ AsyncBatchAnnotateFilesRequest asyncRequest = AsyncBatchAnnotateFilesRequest.newBuilder()
+ .addAllRequests(requests)
+ .build();
+
+ // Wait for the future to complete
+ client.asyncBatchAnnotateFilesAsync(asyncRequest)
+ .getInitialFuture()
+ .get();
+ }
+ } catch (Exception exception) {
+ throw new IllegalStateException(exception);
+ }
+ }
+}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfig.java
similarity index 79%
rename from src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfig.java
rename to src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfig.java
index cdebcef..16f97c2 100644
--- a/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfig.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfig.java
@@ -23,49 +23,44 @@
import io.cdap.cdap.api.plugin.PluginConfig;
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.plugin.cloud.vision.CloudVisionConstants;
-
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import javax.annotation.Nullable;
/**
- * Config class for {@link TextExtractorAction}.
+ * Config class for {@link OfflineTextExtractorAction}.
*/
-public class TextExtractorActionConfig extends PluginConfig {
-
- @Name(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH)
- @Description("Path on the local file system of the service account key used "
- + "for authorization. Can be set to 'auto-detect' when running on a Dataproc cluster. "
- + "When running on other clusters, the file must be present on every node in the cluster.")
- @Macro
- private String serviceFilePath;
+public class OfflineTextExtractorActionConfig extends PluginConfig {
@Name(ActionConstants.SOURCE_PATH)
@Macro
@Description("Path to the location of the directory on GCS where the input files are stored.")
- private final String sourcePath;
-
+ private String sourcePath;
@Name(ActionConstants.DESTINATION_PATH)
@Macro
@Description("Path to the location of the directory on GCS where output files should be stored.")
- private final String destinationPath;
-
+ private String destinationPath;
@Name(CloudVisionConstants.MIME_TYPE)
@Description("Document type.")
private final String mimeType;
-
@Name(ActionConstants.BATCH_SIZE)
@Description("The max number of responses.")
private final Integer batchSize;
-
@Name(CloudVisionConstants.LANGUAGE_HINTS)
@Nullable
@Description("Optional hints to provide to Cloud Vision API.")
private final String languageHints;
+ @Name(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH)
+ @Description("Path on the local file system of the service account key used "
+ + "for authorization. Can be set to 'auto-detect' when running on a Dataproc cluster. "
+ + "When running on other clusters, the file must be present on every node in the cluster.")
+ @Macro
+ private String serviceFilePath;
- public TextExtractorActionConfig(String serviceFilePath, String sourcePath, String destinationPath, String mimeType,
- Integer batchSize, @Nullable String languageHints) {
+ public OfflineTextExtractorActionConfig(String serviceFilePath, String sourcePath,
+ String destinationPath, String mimeType,
+ Integer batchSize, @Nullable String languageHints) {
this.serviceFilePath = serviceFilePath;
this.sourcePath = sourcePath;
this.destinationPath = destinationPath;
@@ -74,7 +69,7 @@ public TextExtractorActionConfig(String serviceFilePath, String sourcePath, Stri
this.languageHints = languageHints;
}
- private TextExtractorActionConfig(Builder builder) {
+ private OfflineTextExtractorActionConfig(Builder builder) {
serviceFilePath = builder.serviceFilePath;
sourcePath = builder.sourcePath;
destinationPath = builder.destinationPath;
@@ -87,7 +82,13 @@ public static Builder builder() {
return new Builder();
}
- public static Builder builder(TextExtractorActionConfig copy) {
+ /**
+ * Helper function to get a Builder object based on an existing configuration.
+ *
+ * @param copy Configuration object to use as the source to copy from.
+ * @return Builer object.
+ */
+ public static Builder builder(OfflineTextExtractorActionConfig copy) {
Builder builder = new Builder();
builder.setServiceFilePath(copy.getServiceFilePath());
@@ -106,7 +107,8 @@ public String getServiceFilePath() {
@Nullable
public String getServiceAccountFilePath() {
- if (containsMacro(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH) || Strings.isNullOrEmpty(serviceFilePath)) {
+ if (containsMacro(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH)
+ || Strings.isNullOrEmpty(serviceFilePath)) {
return null;
}
@@ -142,8 +144,22 @@ public List getLanguageHintsList() {
return Collections.emptyList();
}
+ public void setSourcePath(String sourcePath) {
+ this.sourcePath = sourcePath;
+ }
+
+ public void setDestinationPath(String destinationPath) {
+ this.destinationPath = destinationPath;
+ }
+
+ /**
+ * Validate that the configuration is correct. If not, use the FailureCollector object passed to report errors.
+ *
+ * @param collector FailureCollector object to use to report errors.
+ */
public void validate(FailureCollector collector) {
- if (!containsMacro(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH) && Strings.isNullOrEmpty(serviceFilePath)) {
+ if (!containsMacro(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH)
+ && Strings.isNullOrEmpty(serviceFilePath)) {
collector.addFailure("Service account file path must be specified.", null)
.withConfigProperty(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH);
}
@@ -160,7 +176,7 @@ public void validate(FailureCollector collector) {
}
/**
- * Builder for creating a {@link TextExtractorActionConfig}.
+ * Builder for creating a {@link OfflineTextExtractorActionConfig}.
*/
public static final class Builder {
private String serviceFilePath;
@@ -205,8 +221,8 @@ public Builder setLanguageHints(@Nullable String languageHints) {
return this;
}
- public TextExtractorActionConfig build() {
- return new TextExtractorActionConfig(this);
+ public OfflineTextExtractorActionConfig build() {
+ return new OfflineTextExtractorActionConfig(this);
}
}
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorAction.java b/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorAction.java
deleted file mode 100644
index 79c93a9..0000000
--- a/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorAction.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright © 2019 Cask Data, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package io.cdap.plugin.cloud.vision.action;
-
-import com.google.api.gax.core.FixedCredentialsProvider;
-import com.google.auth.Credentials;
-import com.google.cloud.vision.v1.AsyncAnnotateFileRequest;
-import com.google.cloud.vision.v1.AsyncBatchAnnotateFilesRequest;
-import com.google.cloud.vision.v1.Feature;
-import com.google.cloud.vision.v1.GcsDestination;
-import com.google.cloud.vision.v1.GcsSource;
-import com.google.cloud.vision.v1.ImageAnnotatorClient;
-import com.google.cloud.vision.v1.ImageAnnotatorSettings;
-import com.google.cloud.vision.v1.ImageContext;
-import com.google.cloud.vision.v1.InputConfig;
-import com.google.cloud.vision.v1.OutputConfig;
-import io.cdap.cdap.api.annotation.Description;
-import io.cdap.cdap.api.annotation.Name;
-import io.cdap.cdap.api.annotation.Plugin;
-import io.cdap.cdap.etl.api.FailureCollector;
-import io.cdap.cdap.etl.api.PipelineConfigurer;
-import io.cdap.cdap.etl.api.action.Action;
-import io.cdap.cdap.etl.api.action.ActionContext;
-import io.cdap.plugin.cloud.vision.CredentialsHelper;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Action that runs offline document text extractor.
- */
-@Plugin(type = Action.PLUGIN_TYPE)
-@Name(TextExtractorAction.PLUGIN_NAME)
-@Description("Action that runs offline document text extractor")
-public class TextExtractorAction extends Action {
- public static final String PLUGIN_NAME = "TextExtractorOffline";
- private static final Logger LOG = LoggerFactory.getLogger(TextExtractorAction.class);
-
- private final TextExtractorActionConfig config;
-
- public TextExtractorAction(TextExtractorActionConfig config) {
- this.config = config;
- }
-
- @Override
- public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
- FailureCollector collector = pipelineConfigurer.getStageConfigurer().getFailureCollector();
- config.validate(collector);
- collector.getOrThrowException();
- }
-
- @Override
- public void run(ActionContext actionContext) throws Exception {
- FailureCollector collector = actionContext.getFailureCollector();
- config.validate(collector);
- collector.getOrThrowException();
-
- Credentials credentials = CredentialsHelper.getCredentials(config.getServiceAccountFilePath());
- ImageAnnotatorSettings imageAnnotatorSettings = ImageAnnotatorSettings.newBuilder()
- .setCredentialsProvider(FixedCredentialsProvider.create(credentials))
- .build();
-
- try (ImageAnnotatorClient client = ImageAnnotatorClient.create(imageAnnotatorSettings)) {
- List requests = new ArrayList<>();
-
- GcsSource gcsSource = GcsSource.newBuilder()
- .setUri(config.getSourcePath())
- .build();
-
- GcsDestination gcsDestination = GcsDestination.newBuilder()
- .setUri(config.getDestinationPath())
- .build();
-
- InputConfig inputConfig = InputConfig.newBuilder()
- .setMimeType(config.getMimeType())
- .setGcsSource(gcsSource)
- .build();
-
- OutputConfig outputConfig = OutputConfig.newBuilder()
- .setBatchSize(config.getBatchSize())
- .setGcsDestination(gcsDestination)
- .build();
-
- Feature feature = Feature.newBuilder()
- .setType(Feature.Type.DOCUMENT_TEXT_DETECTION)
- .build();
-
- ImageContext context = ImageContext.newBuilder()
- .addAllLanguageHints(config.getLanguageHintsList())
- .build();
-
- AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder()
- .addFeatures(feature)
- .setImageContext(context)
- .setInputConfig(inputConfig)
- .setOutputConfig(outputConfig)
- .build();
-
- requests.add(request);
- AsyncBatchAnnotateFilesRequest asyncRequest = AsyncBatchAnnotateFilesRequest.newBuilder()
- .addAllRequests(requests)
- .build();
-
- client.asyncBatchAnnotateFilesAsync(asyncRequest)
- .getInitialFuture()
- .get();
- }
- }
-}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/exception/CloudVisionExecutionException.java b/src/main/java/io/cdap/plugin/cloud/vision/exception/CloudVisionExecutionException.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/CloudVisionClient.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/CloudVisionClient.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConstants.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConstants.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/ImageFeature.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/ImageFeature.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/ProductCategory.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/ProductCategory.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentAnnotatorClient.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentAnnotatorClient.java
old mode 100644
new mode 100755
index 607e44a..d2fc144
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentAnnotatorClient.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentAnnotatorClient.java
@@ -44,6 +44,14 @@ public DocumentAnnotatorClient(DocumentExtractorTransformConfig config) {
this.config = config;
}
+ /**
+ * Convenience method that wraps a call to extractDocumentFeature(InputConfig inputConfig) by building an
+ * {@link InputConfig} object from a Byte array containing the image bytes.
+ *
+ * @param content Byte array containing the image bytes to ask the cloud vision API to work on.
+ * @return {@link AnnotateFileResponse} with the information requested from the cloud vision API.
+ * @throws Exception if there was an error sent back by the cloud vision API.
+ */
public AnnotateFileResponse extractDocumentFeature(byte[] content) throws Exception {
InputConfig inputConfig = InputConfig.newBuilder()
.setContent(ByteString.copyFrom(content))
@@ -52,6 +60,14 @@ public AnnotateFileResponse extractDocumentFeature(byte[] content) throws Except
return extractDocumentFeature(inputConfig);
}
+ /**
+ * Convenience method that wraps a call to extractDocumentFeature(InputConfig inputConfig) by building an
+ * {@link InputConfig} object from a gcsPath pointing to a blob in GCS.
+ *
+ * @param gcsPath Path to the blob to ask the cloud vision API to work on.
+ * @return {@link AnnotateFileResponse} with the information requested from the cloud vision API.
+ * @throws Exception if there was an error sent back by the cloud vision API.
+ */
public AnnotateFileResponse extractDocumentFeature(String gcsPath) throws Exception {
InputConfig inputConfig = InputConfig.newBuilder()
.setGcsSource(GcsSource.newBuilder().setUri(gcsPath))
@@ -60,6 +76,11 @@ public AnnotateFileResponse extractDocumentFeature(String gcsPath) throws Except
return extractDocumentFeature(inputConfig);
}
+ /**
+ * @param inputConfig Object that contains the information to send to the Cloud vision API.
+ * @return {@link AnnotateFileResponse} with the information requested from the cloud vision API.
+ * @throws Exception if there was an error sent back by the cloud vision API.
+ */
public AnnotateFileResponse extractDocumentFeature(InputConfig inputConfig) throws Exception {
try (ImageAnnotatorClient client = createImageAnnotatorClient()) {
Feature.Type featureType = config.getImageFeature().getFeatureType();
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransform.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransform.java
old mode 100644
new mode 100755
index b8346c1..ca6771e
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransform.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransform.java
@@ -57,34 +57,26 @@ public class DocumentExtractorTransform extends Transform emitter) {
try {
+ // There are two ways the cloud vision API can be called.
+ // 1. By providing the path to a blob in GCS.
+ // 2. By providing the actual bytes of the image file.
if (!Strings.isNullOrEmpty(config.getPathField())) {
transformPath(input, emitter);
} else {
@@ -118,6 +113,13 @@ public void transform(StructuredRecord input, Emitter emitter)
}
}
+ /**
+ * Method that gets a response back from the cloud vision API by providing the path to an image blob in GCS.
+ *
+ * @param input {@link StructuredRecord} passed in by CDAP to work with. It contains the actual path to use.
+ * @param emitter {@link Emitter} object to use to send the response back to CDAP
+ * @throws Exception Raised if there was an error coming back from the cloud vision API.
+ */
private void transformPath(StructuredRecord input, Emitter emitter) throws Exception {
String documentPath = input.get(config.getPathField());
AnnotateFileResponse response = documentAnnotatorClient.extractDocumentFeature(documentPath);
@@ -125,6 +127,13 @@ private void transformPath(StructuredRecord input, Emitter emi
emitter.emit(transformed);
}
+ /**
+ * Method that gets a response back from the cloud vision API by providing the actual bytes of the image file.
+ *
+ * @param input {@link StructuredRecord} passed in by CDAP to work with. It contains the actual bytes to use.
+ * @param emitter {@link Emitter} object to use to send the response back to CDAP
+ * @throws Exception Raised if there was an error coming back from the cloud vision API.
+ */
private void transformBytes(StructuredRecord input, Emitter emitter) throws Exception {
Object content = input.get(config.getPathField());
byte[] contentBytes = content instanceof ByteBuffer ? Bytes.getBytes((ByteBuffer) content) : (byte[]) content;
@@ -133,14 +142,22 @@ private void transformBytes(StructuredRecord input, Emitter em
emitter.emit(transformed);
}
- public Schema getSchema() {
+ /**
+ * Get the output Schema to use by combining the input Schema from CDAP and add the fields needed to store the
+ * information coming back from the cloud vision API.
+ *
+ * @return {@link Schema}
+ */
+ public Schema getOutputSchema() {
List fields = new ArrayList<>();
- if (inputSchema.getFields() != null) {
+ // Add the input fields
+ if (inputSchema != null && inputSchema.getFields() != null) {
fields.addAll(inputSchema.getFields());
}
-
+ // Add the fields of the image feature schema
Schema pagesSchema = pagesSchema(config.getImageFeature().getSchema());
fields.add(Schema.Field.of(config.getOutputField(), pagesSchema));
+ // Build a schema combining all
return Schema.recordOf("record", fields);
}
@@ -148,7 +165,7 @@ public Schema getSchema() {
* File Annotation mapped to record with field "page" for page number and "feature" field for extracted image feature.
*
* @param imageFeatureSchema extracted image feature schema.
- * @return File Annotation page schema.
+ * @return File Annotation page {@link Schema}.
*/
private Schema pagesSchema(Schema imageFeatureSchema) {
return Schema.arrayOf(
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfig.java
old mode 100644
new mode 100755
index 1cd879b..ed4e753
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfig.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfig.java
@@ -78,6 +78,11 @@ public String getPages() {
return pages;
}
+ /**
+ * Convenience method that splits a comma separated string of values and turn those into a List.
+ *
+ * @return {@link List}
+ */
public List getPagesList() {
if (Strings.isNullOrEmpty(pages)) {
return Collections.emptyList();
@@ -117,14 +122,17 @@ public void validate(FailureCollector collector) {
*/
public void validateInputSchema(Schema inputSchema, FailureCollector collector) {
Schema.Field contentField = inputSchema.getField(getContentField());
- if (contentField != null) {
- collector.addFailure(String.format("Content field '%s' is expected to be 'bytes'", getContentField()), null)
- .withInputSchemaField(getContentField());
- }
Schema.Field pathField = inputSchema.getField(getPathField());
- if (pathField != null) {
+
+ if (pathField != null && pathField.getSchema().getType() != Schema.Type.STRING) {
collector.addFailure(String.format("Path field '%s' is expected to be a string", getPathField()), null)
.withInputSchemaField(getPathField());
+ return;
+ }
+
+ if (contentField != null && contentField.getSchema().getType() != Schema.Type.STRING) {
+ collector.addFailure(String.format("Content field '%s' is expected to be a string", getContentField()), null)
+ .withInputSchemaField(getContentField());
}
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConstants.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConstants.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/transformer/FileAnnotationToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/transformer/FileAnnotationToRecordTransformer.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageAnnotatorClient.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageAnnotatorClient.java
old mode 100644
new mode 100755
index f345b7b..87d0052
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageAnnotatorClient.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageAnnotatorClient.java
@@ -43,12 +43,19 @@ public ImageAnnotatorClient(ImageExtractorTransformConfig config) {
this.config = config;
}
+ /**
+ * @param gcsPath {@link String} that contains the path to a blon in GCS to use with the Cloud vision API.
+ * @return {@link AnnotateImageResponse} with the information requested from the cloud vision API.
+ * @throws Exception if there was an error sent back by the cloud vision API.
+ */
public AnnotateImageResponse extractImageFeature(String gcsPath) throws Exception {
try (com.google.cloud.vision.v1.ImageAnnotatorClient client = createImageAnnotatorClient()) {
ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
Image img = Image.newBuilder().setSource(imgSource).build();
+
Feature.Type featureType = config.getImageFeature().getFeatureType();
Feature feature = Feature.newBuilder().setType(featureType).build();
+
AnnotateImageRequest.Builder request = AnnotateImageRequest.newBuilder().addFeatures(feature).setImage(img);
ImageContext imageContext = getImageContext();
if (imageContext != null) {
@@ -56,13 +63,13 @@ public AnnotateImageResponse extractImageFeature(String gcsPath) throws Exceptio
}
BatchAnnotateImagesResponse response = client.batchAnnotateImages(Collections.singletonList(request.build()));
+
AnnotateImageResponse annotateImageResponse = response.getResponses(SINGLE_RESPONSE_INDEX);
if (annotateImageResponse.hasError()) {
String errorMessage = String.format("Unable to extract '%s' feature of image '%s' due to: '%s'", featureType,
gcsPath, annotateImageResponse.getError().getMessage());
throw new CloudVisionExecutionException(errorMessage);
}
-
return annotateImageResponse;
}
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransform.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransform.java
old mode 100644
new mode 100755
index 0c97575..14d35e6
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransform.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransform.java
@@ -55,27 +55,21 @@ public class ImageExtractorTransform extends Transform emitter)
}
}
- public Schema getSchema() {
+ /**
+ * Get the output Schema to use by combining the input Schema from CDAP and add the fields needed to store the
+ * information coming back from the cloud vision API.
+ *
+ * @return {@link Schema}
+ */
+ protected Schema getOutputSchema(Schema inputSchema) {
List fields = new ArrayList<>();
- if (inputSchema.getFields() != null) {
+ // Add the input fields
+ if (inputSchema != null && inputSchema.getFields() != null) {
fields.addAll(inputSchema.getFields());
}
-
+ // Add the fields of the image feature schema
fields.add(Schema.Field.of(config.getOutputField(), config.getImageFeature().getSchema()));
+ // Build a schema combining all
return Schema.recordOf("record", fields);
}
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfig.java
old mode 100644
new mode 100755
index fddb321..b344b36
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfig.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfig.java
@@ -23,6 +23,7 @@
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Macro;
import io.cdap.cdap.api.annotation.Name;
+import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.api.plugin.PluginConfig;
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.plugin.cloud.vision.transform.ExtractorTransformConfig;
@@ -133,4 +134,21 @@ public void validate(FailureCollector collector) {
}
}
}
+
+ /**
+ * Validates input schema and checks for type compatibility.
+ *
+ * @param inputSchema input schema.
+ * @param collector failure collector.
+ */
+ public void validateInputSchema(Schema inputSchema, FailureCollector collector) {
+ Schema.Field pathField = inputSchema.getField(getPathField());
+ if (pathField != null && !(pathField.getSchema().getType() == Schema.Type.STRING)) {
+ collector.addFailure(
+ String.format("Path field '%s' is expected to be a string", getPathField()),
+ null).withInputSchemaField(getPathField());
+ }
+ }
+
+
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ColorInfoSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ColorInfoSchema.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/CropHintAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/CropHintAnnotationSchema.java
old mode 100644
new mode 100755
index 9aa356b..38334bb
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/CropHintAnnotationSchema.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/CropHintAnnotationSchema.java
@@ -45,7 +45,7 @@ private CropHintAnnotationSchema() {
public static final Schema SCHEMA = Schema.recordOf(
"crop-hint-annotation-component-record",
- Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)),
+ Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.getSchema("cropHintAnnotation-position"))),
Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
Schema.Field.of(IMPORTANCE_FRACTION_FIELD_NAME, Schema.of(Schema.Type.FLOAT)));
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationSchema.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationWithPositionSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationWithPositionSchema.java
old mode 100644
new mode 100755
index 053b9a8..ed657a5
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationWithPositionSchema.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationWithPositionSchema.java
@@ -40,7 +40,9 @@ private EntityAnnotationWithPositionSchema() {
Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
Schema.Field.of(TOPICALITY_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
Schema.Field.of(LOCATIONS_FIELD_NAME, Schema.arrayOf(LocationInfo.SCHEMA)),
- Schema.Field.of(POSITION_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(VertexSchema.SCHEMA))),
+ Schema.Field.of(POSITION_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(
+ VertexSchema.getSchema("entityAnnotation-position")))),
Schema.Field.of(PROPERTIES_FIELD_NAME, Schema.arrayOf(Property.SCHEMA))
);
+
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FaceAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FaceAnnotationSchema.java
old mode 100644
new mode 100755
index 3965d42..9cb6024
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FaceAnnotationSchema.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FaceAnnotationSchema.java
@@ -16,8 +16,11 @@
package io.cdap.plugin.cloud.vision.transform.schema;
+import com.google.cloud.vision.v1.FaceAnnotation;
import com.google.cloud.vision.v1.Likelihood;
import io.cdap.cdap.api.data.schema.Schema;
+import java.util.ArrayList;
+import java.util.List;
/**
* {@link com.google.cloud.vision.v1.FaceAnnotation} mapped to a record with following fields.
@@ -59,52 +62,54 @@ private FaceAnnotationSchema() {
/**
* Anger likelihood. Possible values are defined by {@link Likelihood}.
*/
- public static final String ANGER_FIELD_NAME = "anger";
+ public static final String ANGER_FIELD_NAME = "angerLikelihood";
/**
* Joy likelihood. Possible values are defined by {@link Likelihood}.
*/
- public static final String JOY_FIELD_NAME = "joy";
+ public static final String JOY_FIELD_NAME = "joyLikelihood";
/**
* Surprise likelihood. Possible values are defined by {@link Likelihood}.
*/
- public static final String SURPRISE_FIELD_NAME = "surprise";
+ public static final String SURPRISE_FIELD_NAME = "surpriseLikelihood";
/**
* Blurred likelihood. Possible values are defined by {@link Likelihood}.
*/
- public static final String BLURRED_FIELD_NAME = "blurred";
+ public static final String BLURRED_FIELD_NAME = "blurredLikelihood";
/**
* Under exposed likelihood. Possible values are defined by {@link Likelihood}.
*/
- public static final String UNDER_EXPOSED_FIELD_NAME = "underExposed";
+ public static final String UNDER_EXPOSED_FIELD_NAME = "underExposedLikelihood";
/**
* Sorrow likelihood. Possible values are defined by {@link Likelihood}.
*/
- public static final String SORROW_FIELD_NAME = "sorrow";
+ public static final String SORROW_FIELD_NAME = "sorrowLikelihood";
/**
* Headwear likelihood. Possible values are defined by {@link Likelihood}.
*/
- public static final String HEADWEAR_FIELD_NAME = "headwear";
+ public static final String HEADWEAR_FIELD_NAME = "headwearLikelihood";
/**
- * The bounding polygon around the face. The bounding box is computed to "frame" the face in accordance with human
- * expectations. It is based on the landmarker results. Note that one or more x and/or y coordinates may not be
- * generated if only a partial face appears in the image to be annotated.
+ * The bounding polygon around the face. The bounding box is computed to "frame" the face in
+ * accordance with human expectations. It is based on the landmarker results. Note that one or
+ * more x and/or y coordinates may not be generated if only a partial face appears in the image
+ * to be annotated.
*/
- public static final String POSITION_FIELD_NAME = "position";
+ public static final String BOUNDING_POLY_NAME = "boundingPoly";
/**
- * The bounding polygon which is tighter than the {@link FaceAnnotationSchema#POSITION_FIELD_NAME}, and encloses only
- * the skin part of the face. Typically, it is used to eliminate the face from any image analysis that detects the
- * "amount of skin" visible in an image. It is not based on the landmarker results, only on the initial face
- * detection, hence the fd
(face detection) prefix.
+ * The bounding polygon, tighter than the {@link FaceAnnotationSchema#BOUNDING_POLY_NAME},
+ * encloses only the skin part of the face. Typically, it is used to eliminate the face from any
+ * image analysis that detects the "amount of skin" visible in an image. It is not based on the
+ * landmarker results, only on the initial face detection, hence the fd
+ * (face detection) prefix.
*/
- public static final String FD_POSITION_FIELD_NAME = "fdPosition";
+ public static final String FD_BOUNDING_POLY_NAME = "fdBoundingPoly";
/**
* Detected face landmarks.
@@ -125,8 +130,8 @@ private FaceAnnotationSchema() {
Schema.Field.of(UNDER_EXPOSED_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(SORROW_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(HEADWEAR_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)),
- Schema.Field.of(FD_POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)),
+ Schema.Field.of(BOUNDING_POLY_NAME, Schema.arrayOf(VertexSchema.getSchema("bounding-vertex"))),
+ Schema.Field.of(FD_BOUNDING_POLY_NAME, Schema.arrayOf(VertexSchema.getSchema("fd-bounding-vertex"))),
Schema.Field.of(LANDMARKS_FIELD_NAME, Schema.arrayOf(FaceLandmark.SCHEMA))
);
@@ -135,6 +140,11 @@ private FaceAnnotationSchema() {
*/
public static class FaceLandmark {
+
+ private FaceLandmark() {
+ throw new AssertionError("Should not instantiate static utility class.");
+ }
+
/**
* Face landmark type.
*/
@@ -156,9 +166,26 @@ public static class FaceLandmark {
public static final String Z_FIELD_NAME = "z";
public static final Schema SCHEMA = Schema.recordOf("face-landmark-record",
- Schema.Field.of(TYPE_FIELD_NAME, Schema.of(Schema.Type.STRING)),
+ // The following field is not a simple type as it is defined as an Enum
+ Schema.Field.of(TYPE_FIELD_NAME, Schema.enumWith(FaceAnnotationSchema.getIterableFromEnum(
+ FaceAnnotation.Landmark.Type.values()))),
Schema.Field.of(X_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
Schema.Field.of(Y_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
Schema.Field.of(Z_FIELD_NAME, Schema.of(Schema.Type.FLOAT)));
}
+
+ /**
+ * Helper function to build a {@link List} from an array.
+ *
+ * @param input Array of type T.
+ * @param Type of the input array.
+ * @return {@link List}
+ */
+ protected static List getIterableFromEnum(T[] input) {
+ ArrayList list = new ArrayList<>(input.length);
+ for (T value : input) {
+ list.add(value.toString());
+ }
+ return list;
+ }
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FullTextAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FullTextAnnotationSchema.java
old mode 100644
new mode 100755
index 3a7aef2..8877e4c
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FullTextAnnotationSchema.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FullTextAnnotationSchema.java
@@ -43,7 +43,7 @@ private FullTextAnnotationSchema() {
public static final String PAGES_FIELD_NAME = "pages";
public static final Schema SCHEMA = Schema.recordOf(
- "document-text-annotation-component-record",
+ "fullTextAnnotation",
Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(PAGES_FIELD_NAME, Schema.arrayOf(TextPage.SCHEMA)));
@@ -85,7 +85,7 @@ public static class TextSymbol {
* 2----3
* | |
* 1----0
- * and the vertice order will still be (0, 1, 2, 3).
+ * and the vertex order will still be (0, 1, 2, 3).
*/
public static final String BOUNDING_BOX_FIELD_NAME = "boundingBox";
@@ -93,9 +93,11 @@ public static class TextSymbol {
"document-text-page-symbol-record",
Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
- Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))),
+ Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(
+ DetectedLanguage.getSchema("textSymbol-detectedLanguages")))),
Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))),
- Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)));
+ Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(
+ VertexSchema.getSchema("textSymbol-boundingBox"))));
}
/**
@@ -150,9 +152,11 @@ public static class TextWord {
Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
Schema.Field.of(SYMBOLS_FIELD_NAME, Schema.arrayOf(TextSymbol.SCHEMA)),
- Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))),
+ Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(
+ DetectedLanguage.getSchema("textWord-detectedLanguage")))),
Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))),
- Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)));
+ Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(
+ VertexSchema.getSchema("textWord-boundingBox"))));
}
/**
@@ -198,7 +202,7 @@ public static class TextParagraph {
* 2----3
* | |
* 1----0
- * and the vertice order will still be (0, 1, 2, 3).
+ * and the vertex order will still be (0, 1, 2, 3).
*/
public static final String BOUNDING_BOX_FIELD_NAME = "boundingBox";
@@ -207,9 +211,11 @@ public static class TextParagraph {
Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
Schema.Field.of(WORDS_FIELD_NAME, Schema.arrayOf(TextWord.SCHEMA)),
- Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))),
+ Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(
+ DetectedLanguage.getSchema("textParagraph-detectedLanguages")))),
Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))),
- Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)));
+ Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(
+ VertexSchema.getSchema("textParagraph-boundingBox"))));
}
/**
@@ -260,7 +266,7 @@ public static class TextBlock {
* 2----3
* | |
* 1----0
- * and the vertice order will still be (0, 1, 2, 3).
+ * and the vertex order will still be (0, 1, 2, 3).
*/
public static final String BOUNDING_BOX_FIELD_NAME = "boundingBox";
@@ -270,9 +276,11 @@ public static class TextBlock {
Schema.Field.of(BLOCK_TYPE_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
Schema.Field.of(PARAGRAPHS_FIELD_NAME, Schema.arrayOf(TextParagraph.SCHEMA)),
- Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))),
+ Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(
+ DetectedLanguage.getSchema("textBlock-detectedLanguages")))),
Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))),
- Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)));
+ Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(
+ VertexSchema.getSchema("textBlock-boundingBox"))));
}
/**
@@ -305,6 +313,11 @@ public static class TextPage {
*/
public static final String BLOCKS_FIELD_NAME = "blocks";
+ /**
+ * Property section that contains detected languages.
+ */
+ public static final String PROPERTY_FIELD_NAME = "property";
+
/**
* A list of detected languages together with confidence.
*/
@@ -318,11 +331,14 @@ public static class TextPage {
public static final Schema SCHEMA = Schema.recordOf(
"document-text-page-record",
Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(WIDTH_FIELD_NAME, Schema.of(Schema.Type.INT)),
- Schema.Field.of(HEIGHT_FIELD_NAME, Schema.of(Schema.Type.INT)),
+ Schema.Field.of(WIDTH_FIELD_NAME, Schema.of(Schema.Type.INT)), //
+ Schema.Field.of(HEIGHT_FIELD_NAME, Schema.of(Schema.Type.INT)), //
Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
- Schema.Field.of(BLOCKS_FIELD_NAME, Schema.arrayOf(TextBlock.SCHEMA)),
- Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))),
+ Schema.Field.of(BLOCKS_FIELD_NAME, Schema.arrayOf(TextBlock.SCHEMA)), //
+ Schema.Field.of(PROPERTY_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(
+ DetectedLanguage.getSchema("textPage-property")))),
+ Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(
+ DetectedLanguage.getSchema("textPage-detectedLanguages")))),
Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))));
}
@@ -333,19 +349,28 @@ public static class TextPage {
public static class DetectedLanguage {
/**
- * The BCP-47 language code, such as "en-US" or "sr-Latn". For more information, see
+ * The BCP-47 language code, such as "en-US" or "sr-Latin". For more information, see
* http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
*/
- public static final String CODE_FIELD_NAME = "code";
+ public static final String LANGUAGE_CODE_FIELD_NAME = "languageCode";
/**
* Confidence of detected language. Range [0, 1].
*/
public static final String CONFIDENCE_FIELD_NAME = "confidence";
- public static final Schema SCHEMA = Schema.recordOf(
- "detected-language-record",
- Schema.Field.of(CODE_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)));
+ /**
+ * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas
+ * that will be combined into a larger {@link Schema}.
+ *
+ * @param name {@link String} containing the name to give to the returned {@link Schema}.
+ * @return a {@link Schema} with the given name.
+ */
+ public static Schema getSchema(String name) {
+ return Schema.recordOf(
+ name,
+ Schema.Field.of(LANGUAGE_CODE_FIELD_NAME, Schema.of(Schema.Type.STRING)),
+ Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)));
+ }
}
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/LocalizedObjectAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/LocalizedObjectAnnotationSchema.java
old mode 100644
new mode 100755
index 26b7b7c..0dbab7d
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/LocalizedObjectAnnotationSchema.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/LocalizedObjectAnnotationSchema.java
@@ -54,12 +54,19 @@ protected LocalizedObjectAnnotationSchema() {
*/
public static final String POSITION_FIELD_NAME = "position";
+ /**
+ * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas
+ * that will be combined into a larger {@link Schema}.
+ *
+ * @param name {@link String} containing the name to give to the returned {@link Schema}.
+ * @return a {@link Schema} with the given name.
+ */
public static final Schema SCHEMA = Schema.recordOf(
"localized-object-annotation-component-record",
Schema.Field.of(MID_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(LANGUAGE_CODE_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
- Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA))
+ Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.getSchema("localizedObjectAnnotation-position")))
);
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ProductSearchResultsSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ProductSearchResultsSchema.java
old mode 100644
new mode 100755
index 026024e..dd8446b
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ProductSearchResultsSchema.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ProductSearchResultsSchema.java
@@ -46,9 +46,9 @@ private ProductSearchResultsSchema() {
*/
public static final String GROUPED_RESULTS_FIELD_NAME = "productGroupedResults";
- public static final Schema SCHEMA = Schema.recordOf("product-search-result-record",
+ public static final Schema SCHEMA = Schema.recordOf("productSearch-resultRecord",
Schema.Field.of(INDEX_TIME_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))),
- Schema.Field.of(RESULTS_FIELD_NAME, Schema.arrayOf(Result.SCHEMA)),
+ Schema.Field.of(RESULTS_FIELD_NAME, Schema.arrayOf(Result.getSchema("productSearch-result"))),
Schema.Field.of(GROUPED_RESULTS_FIELD_NAME, Schema.arrayOf(GroupedResult.SCHEMA))
);
@@ -72,11 +72,20 @@ public static class Result {
*/
public static final String PRODUCT_FIELD_NAME = "product";
- public static final Schema SCHEMA = Schema.recordOf("result-record",
- Schema.Field.of(IMAGE_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
- Schema.Field.of(PRODUCT_FIELD_NAME, Product.SCHEMA)
- );
+ /**
+ * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas
+ * that will be combined into a larger {@link Schema}.
+ *
+ * @param name {@link String} containing the name to give to the returned {@link Schema}.
+ * @return a {@link Schema} with the given name.
+ */
+ public static Schema getSchema(String name) {
+ return Schema.recordOf(name,
+ Schema.Field.of(IMAGE_FIELD_NAME, Schema.of(Schema.Type.STRING)),
+ Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
+ Schema.Field.of(PRODUCT_FIELD_NAME, Product.getSchema(name + "result-product"))
+ );
+ }
}
/**
@@ -111,13 +120,24 @@ public static class Product {
*/
public static final String PRODUCT_LABELS_FIELD_NAME = "productLabels";
- public static final Schema SCHEMA = Schema.recordOf("product-record",
- Schema.Field.of(NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(DISPLAY_NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(DESCRIPTION_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(PRODUCT_CATEGORY_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(PRODUCT_LABELS_FIELD_NAME, Schema.arrayOf(KeyValue.SCHEMA))
- );
+ /**
+ * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas
+ * that will be combined into a larger {@link Schema}.
+ *
+ * @param name {@link String} containing the name to give to the returned {@link Schema}.
+ * @return a {@link Schema} with the given name.
+ */
+ public static Schema getSchema(String name) {
+ return Schema.recordOf(name,
+ Schema.Field.of(NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)),
+ Schema.Field.of(DISPLAY_NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)),
+ Schema.Field.of(DESCRIPTION_FIELD_NAME, Schema.of(Schema.Type.STRING)),
+ Schema.Field.of(PRODUCT_CATEGORY_FIELD_NAME, Schema.of(Schema.Type.STRING)),
+ Schema.Field.of(PRODUCT_LABELS_FIELD_NAME, Schema.arrayOf(
+ KeyValue.getSchema(name + "-keyValue")))
+ );
+ }
+
}
/**
@@ -135,10 +155,19 @@ public static class KeyValue {
*/
public static final String VALUE_FIELD_NAME = "value";
- public static final Schema SCHEMA = Schema.recordOf("key-value-record",
- Schema.Field.of(KEY_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(VALUE_FIELD_NAME, Schema.of(Schema.Type.STRING))
- );
+ /**
+ * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas
+ * that will be combined into a larger {@link Schema}.
+ *
+ * @param name {@link String} containing the name to give to the returned {@link Schema}.
+ * @return a {@link Schema} with the given name.
+ */
+ public static Schema getSchema(String name) {
+ return Schema.recordOf(name,
+ Schema.Field.of(KEY_FIELD_NAME, Schema.of(Schema.Type.STRING)),
+ Schema.Field.of(VALUE_FIELD_NAME, Schema.of(Schema.Type.STRING))
+ );
+ }
}
/**
@@ -156,9 +185,10 @@ public static class GroupedResult {
*/
public static final String RESULTS_FIELD_NAME = "results";
- public static final Schema SCHEMA = Schema.recordOf("grouped-result-record",
- Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)),
- Schema.Field.of(RESULTS_FIELD_NAME, Schema.arrayOf(Result.SCHEMA))
+ public static final Schema SCHEMA = Schema.recordOf("groupedResult-record",
+ Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(
+ VertexSchema.getSchema("groupedResult-position"))),
+ Schema.Field.of(RESULTS_FIELD_NAME, Schema.arrayOf(Result.getSchema("groupedResult-results")))
);
}
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/SafeSearchAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/SafeSearchAnnotationSchema.java
old mode 100644
new mode 100755
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/TextAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/TextAnnotationSchema.java
old mode 100644
new mode 100755
index ac518dc..f5b15f9
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/TextAnnotationSchema.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/TextAnnotationSchema.java
@@ -23,6 +23,7 @@
*/
public class TextAnnotationSchema {
+
private TextAnnotationSchema() {
throw new AssertionError("Should not instantiate static utility class.");
}
@@ -47,5 +48,6 @@ private TextAnnotationSchema() {
"text-annotation-component-record",
Schema.Field.of(LOCALE_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))),
Schema.Field.of(DESCRIPTION_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)));
+ Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.getSchema("textAnnotation-position")))
+ );
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/VertexSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/VertexSchema.java
old mode 100644
new mode 100755
index c441735..ef5ec78
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/VertexSchema.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/VertexSchema.java
@@ -19,11 +19,14 @@
import io.cdap.cdap.api.data.schema.Schema;
/**
- * A vertex represents a 2D point in the image. {@link com.google.cloud.vision.v1.Vertex} mapped to a record with
+ * A vertex represents a 2D point in the image. {@link com.google.cloud.vision.v1.Vertex} mapped to a record with the
* following fields.
*/
public class VertexSchema {
+ /**
+ * Prevent instantiating this class.
+ */
private VertexSchema() {
throw new AssertionError("Should not instantiate static utility class.");
}
@@ -38,7 +41,16 @@ private VertexSchema() {
*/
public static final String Y_FIELD_NAME = "y";
- public static final Schema SCHEMA = Schema.recordOf("vertex-record",
- Schema.Field.of(X_FIELD_NAME, Schema.of(Schema.Type.INT)),
- Schema.Field.of(Y_FIELD_NAME, Schema.of(Schema.Type.INT)));
+ /**
+ * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas
+ * that will be combined into a larger {@link Schema}.
+ *
+ * @param name {@link String} containing the name to give to the returned {@link Schema}.
+ * @return a {@link Schema} with the given name.
+ */
+ public static Schema getSchema(String name) {
+ return Schema.recordOf(name,
+ Schema.Field.of(X_FIELD_NAME, Schema.of(Schema.Type.INT)),
+ Schema.Field.of(Y_FIELD_NAME, Schema.of(Schema.Type.INT)));
+ }
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/WebDetectionSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/WebDetectionSchema.java
old mode 100644
new mode 100755
index d68b015..c3c8be5
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/WebDetectionSchema.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/WebDetectionSchema.java
@@ -61,10 +61,10 @@ private WebDetectionSchema() {
public static final Schema SCHEMA = Schema.recordOf(
"web-detection-record",
Schema.Field.of(ENTITIES_FIELD_NAME, Schema.arrayOf(WebEntity.SCHEMA)),
- Schema.Field.of(FULL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.SCHEMA)),
- Schema.Field.of(PARTIAL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.SCHEMA)),
+ Schema.Field.of(FULL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.getSchema("fmiWebImage"))),
+ Schema.Field.of(PARTIAL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.getSchema("pmiWebImage"))),
Schema.Field.of(PAGES_WITH_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebPage.SCHEMA)),
- Schema.Field.of(VISUALLY_SIMILAR_IMAGES, Schema.arrayOf(WebImage.SCHEMA)),
+ Schema.Field.of(VISUALLY_SIMILAR_IMAGES, Schema.arrayOf(WebImage.getSchema("vsiWebImage"))),
Schema.Field.of(BEST_GUESS_LABELS_FIELD_NAME, Schema.arrayOf(BestGuessLabel.SCHEMA))
);
@@ -132,8 +132,8 @@ public static class WebPage {
Schema.Field.of(URL_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(PAGE_TITLE_FIELD_NAME, Schema.of(Schema.Type.STRING)),
Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)),
- Schema.Field.of(FULL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.SCHEMA)),
- Schema.Field.of(PARTIAL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.SCHEMA))
+ Schema.Field.of(FULL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.getSchema("webPage-fmiWebImage"))),
+ Schema.Field.of(PARTIAL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.getSchema("webPage-pmiWebImage")))
);
}
@@ -148,7 +148,7 @@ public static class BestGuessLabel {
public static final String LABEL_FIELD_NAME = "label";
/**
- * The BCP-47 language code, such as "en-US" or "sr-Latn". For more information, see
+ * The BCP-47 language code, such as "en-US" or "sr-Latin". For more information, see
* http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
*/
public static final String LANGUAGE_CODE_FIELD_NAME = "languageCode";
@@ -175,10 +175,18 @@ public static class WebImage {
*/
public static final String SCORE_FIELD_NAME = "score";
- public static final Schema SCHEMA = Schema.recordOf(
- "web-image-record",
- Schema.Field.of(URL_FIELD_NAME, Schema.of(Schema.Type.STRING)),
- Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT))
- );
+ /**
+ * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas
+ * that will be combined into a larger {@link Schema}.
+ *
+ * @param name {@link String} containing the name to give to the returned {@link Schema}.
+ * @return a {@link Schema} with the given name.
+ */
+ public static Schema getSchema(String name) {
+ return Schema.recordOf(name,
+ Schema.Field.of(URL_FIELD_NAME, Schema.of(Schema.Type.STRING)),
+ Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT))
+ );
+ }
}
}
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index eb579ea..37d9d14
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformer.java
@@ -35,8 +35,16 @@ public CropHintsAnnotationsToRecordTransformer(Schema schema, String outputField
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link CropHintAnnotationSchema}. This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
- public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
+ public StructuredRecord transform(StructuredRecord input,
+ AnnotateImageResponse annotateImageResponse) {
return getOutputRecordBuilder(input)
.set(outputFieldName, extractCropHintsAnnotations(annotateImageResponse))
.build();
@@ -48,9 +56,16 @@ private List extractCropHintsAnnotations(AnnotateImageResponse
.collect(Collectors.toList());
}
+ /**
+ * Extract a {@link StructuredRecord} from a {@link CropHint} passed in.
+ *
+ * @param hint Contains the {@link CropHint} information to use.
+ * @return {@link StructuredRecord} that contains the data mapped to the {@link Schema}.
+ */
private StructuredRecord extractCropHintRecord(CropHint hint) {
Schema hintSchema = getCropHintsAnnotationSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(hintSchema);
+
Schema.Field positionField = hintSchema.getField(CropHintAnnotationSchema.POSITION_FIELD_NAME);
if (positionField != null) {
Schema positionSchema = getComponentSchema(positionField);
@@ -74,7 +89,7 @@ private StructuredRecord extractCropHintRecord(CropHint hint) {
* Retrieves Crop Hints Annotation's non-nullable component schema. Crop Hints Annotation's schema is retrieved
* instead of using constant schema since users are free to choose to not include some of the fields.
*
- * @return Crop Hints Annotation's non-nullable component schema.
+ * @return Crop Hints Annotation's non-nullable component {@link Schema}.
*/
private Schema getCropHintsAnnotationSchema() {
Schema.Field cropHintsAnnotationsField = schema.getField(outputFieldName);
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index 4373fbe..e1d4ad6
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformer.java
@@ -35,19 +35,39 @@ public FaceAnnotationsToRecordTransformer(Schema schema, String outputFieldName)
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link FaceAnnotationSchema}. This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
- return getOutputRecordBuilder(input)
- .set(outputFieldName, extractFaceAnnotations(annotateImageResponse))
- .build();
+ StructuredRecord.Builder builder = getOutputRecordBuilder(input);
+ List extracted = extractFaceAnnotations(annotateImageResponse);
+ return builder.set(outputFieldName, extracted).build();
}
+ /**
+ * Extract a complete {@link List} of {@link StructuredRecord} objects mapped from the {@link AnnotateImageResponse}
+ * passed in, using the {@link FaceAnnotationSchema}.
+ *
+ * @param annotateImageResponse Input {@link AnnotateImageResponse} object to get the data from.
+ * @return {@link List} containing the {@link StructuredRecord} mapped from the input.
+ */
private List extractFaceAnnotations(AnnotateImageResponse annotateImageResponse) {
return annotateImageResponse.getFaceAnnotationsList().stream()
.map(this::extractFaceAnnotationRecord)
.collect(Collectors.toList());
}
+ /**
+ * Extract a {@link StructuredRecord} from a {@link FaceAnnotation} input.
+ *
+ * @param annotation The {@link FaceAnnotation} object to get the data from.
+ * @return {@link StructuredRecord} containing the data mapped to the {@link Schema}.
+ */
private StructuredRecord extractFaceAnnotationRecord(FaceAnnotation annotation) {
Schema faceSchema = getFaceAnnotationSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(faceSchema);
@@ -90,21 +110,21 @@ private StructuredRecord extractFaceAnnotationRecord(FaceAnnotation annotation)
String surprise = annotation.getSurpriseLikelihood().name();
builder.set(FaceAnnotationSchema.SURPRISE_FIELD_NAME, surprise);
}
- Schema.Field positionField = faceSchema.getField(FaceAnnotationSchema.POSITION_FIELD_NAME);
+ Schema.Field positionField = faceSchema.getField(FaceAnnotationSchema.BOUNDING_POLY_NAME);
if (positionField != null) {
Schema positionSchema = getComponentSchema(positionField);
List position = annotation.getBoundingPoly().getVerticesList().stream()
.map(v -> extractVertex(v, positionSchema))
.collect(Collectors.toList());
- builder.set(FaceAnnotationSchema.POSITION_FIELD_NAME, position);
+ builder.set(FaceAnnotationSchema.BOUNDING_POLY_NAME, position);
}
- Schema.Field fdPositionField = faceSchema.getField(FaceAnnotationSchema.FD_POSITION_FIELD_NAME);
+ Schema.Field fdPositionField = faceSchema.getField(FaceAnnotationSchema.FD_BOUNDING_POLY_NAME);
if (fdPositionField != null) {
Schema positionSchema = getComponentSchema(fdPositionField);
List position = annotation.getFdBoundingPoly().getVerticesList().stream()
.map(v -> extractVertex(v, positionSchema))
.collect(Collectors.toList());
- builder.set(FaceAnnotationSchema.FD_POSITION_FIELD_NAME, position);
+ builder.set(FaceAnnotationSchema.FD_BOUNDING_POLY_NAME, position);
}
Schema.Field landmarksField = faceSchema.getField(FaceAnnotationSchema.LANDMARKS_FIELD_NAME);
if (landmarksField != null) {
@@ -118,6 +138,13 @@ private StructuredRecord extractFaceAnnotationRecord(FaceAnnotation annotation)
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} from a {@link FaceAnnotation} input.
+ *
+ * @param landmark The {@link FaceAnnotation.Landmark} object to get the data from.
+ * @param schema The {@link Schema} to use for the mapping.
+ * @return {@link StructuredRecord} containing the data mapped to the {@link Schema}.
+ */
private StructuredRecord extractLandmark(FaceAnnotation.Landmark landmark, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(FaceAnnotationSchema.FaceLandmark.TYPE_FIELD_NAME) != null) {
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index efd371b..8378c5e
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformer.java
@@ -40,6 +40,13 @@ public FullTextAnnotationsToRecordTransformer(Schema schema, String outputFieldN
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link FullTextAnnotationSchema}. This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
TextAnnotation annotation = annotateImageResponse.getFullTextAnnotation();
@@ -48,6 +55,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse
.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the handwriting information from a {@link TextAnnotation} input
+ * using a {@link io.cdap.plugin.cloud.vision.transform.schema.TextAnnotationSchema}.
+ *
+ * @param annotation A {@link TextAnnotation} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractHandwritingAnnotation(TextAnnotation annotation) {
Schema hwSchema = getHandwritingAnnotationSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(hwSchema);
@@ -67,6 +81,14 @@ private StructuredRecord extractHandwritingAnnotation(TextAnnotation annotation)
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the page information from a {@link Page} input
+ * using a {@link Schema} for the mapping.
+ *
+ * @param page The {@link Page} object containing the data.
+ * @param schema The {@link Schema} to use for the mapping of the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractPage(Page page, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(FullTextAnnotationSchema.TextPage.TEXT_FIELD_NAME) != null) {
@@ -114,6 +136,14 @@ private StructuredRecord extractPage(Page page, Schema schema) {
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the block information from a {@link Block} input
+ * using a {@link Schema} for the mapping.
+ *
+ * @param block The {@link Block} object containing the data.
+ * @param schema The {@link Schema} to use for the mapping of the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractBlock(Block block, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(FullTextAnnotationSchema.TextBlock.TEXT_FIELD_NAME) != null) {
@@ -164,6 +194,14 @@ private StructuredRecord extractBlock(Block block, Schema schema) {
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the paragraph information from a {@link Paragraph} input
+ * using a {@link Schema} for the mapping.
+ *
+ * @param paragraph The {@link Paragraph} object containing the data.
+ * @param schema The {@link Schema} to use for the mapping of the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractParagraph(Paragraph paragraph, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(FullTextAnnotationSchema.TextParagraph.TEXT_FIELD_NAME) != null) {
@@ -209,6 +247,14 @@ private StructuredRecord extractParagraph(Paragraph paragraph, Schema schema) {
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the paragraph information from a {@link Word} input
+ * using a {@link Schema} for the mapping.
+ *
+ * @param word The {@link Word} object containing the data.
+ * @param schema The {@link Schema} to use for the mapping of the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractWord(Word word, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(FullTextAnnotationSchema.TextWord.TEXT_FIELD_NAME) != null) {
@@ -252,6 +298,14 @@ private StructuredRecord extractWord(Word word, Schema schema) {
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the paragraph information from a {@link Symbol} input
+ * using a {@link Schema} for the mapping.
+ *
+ * @param symbol The {@link Symbol} object containing the data.
+ * @param schema The {@link Schema} to use for the mapping of the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractSymbol(Symbol symbol, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(FullTextAnnotationSchema.TextSymbol.TEXT_FIELD_NAME) != null) {
@@ -284,10 +338,18 @@ private StructuredRecord extractSymbol(Symbol symbol, Schema schema) {
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the detected language information from a
+ * {@link TextAnnotation.DetectedLanguage} input using a {@link Schema} for the mapping.
+ *
+ * @param language The {@link Symbol} object containing the data.
+ * @param schema The {@link Schema} to use for the mapping of the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractDetectedLanguage(TextAnnotation.DetectedLanguage language, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
- if (schema.getField(FullTextAnnotationSchema.DetectedLanguage.CODE_FIELD_NAME) != null) {
- builder.set(FullTextAnnotationSchema.DetectedLanguage.CODE_FIELD_NAME, language.getLanguageCode());
+ if (schema.getField(FullTextAnnotationSchema.DetectedLanguage.LANGUAGE_CODE_FIELD_NAME) != null) {
+ builder.set(FullTextAnnotationSchema.DetectedLanguage.LANGUAGE_CODE_FIELD_NAME, language.getLanguageCode());
}
if (schema.getField(FullTextAnnotationSchema.DetectedLanguage.CONFIDENCE_FIELD_NAME) != null) {
builder.set(FullTextAnnotationSchema.DetectedLanguage.CONFIDENCE_FIELD_NAME, language.getConfidence());
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImageAnnotationToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImageAnnotationToRecordTransformer.java
old mode 100644
new mode 100755
index c1fb7a8..f494049
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImageAnnotationToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImageAnnotationToRecordTransformer.java
@@ -71,18 +71,29 @@ public ImageAnnotationToRecordTransformer(Schema schema, String outputFieldName)
*/
protected StructuredRecord.Builder getOutputRecordBuilder(StructuredRecord input) {
Schema inputRecordSchema = input.getSchema();
+ // schema is the output schema
StructuredRecord.Builder outputRecordBuilder = StructuredRecord.builder(schema);
- for (Schema.Field field : schema.getFields()) {
- if (inputRecordSchema.getField(field.getName()) == null) {
- continue;
+ // Loop through the input fields and copy them to the output
+ for (Schema.Field inputField : inputRecordSchema.getFields()) {
+ if (schema.getField(inputField.getName()) == null) {
+ continue; // Not found at the output
}
+
// copy input record field values
- outputRecordBuilder.set(field.getName(), input.get(field.getName()));
+ outputRecordBuilder.set(inputField.getName(), input.get(inputField.getName()));
}
return outputRecordBuilder;
}
+ /**
+ * Extract a {@link StructuredRecord} containing the vertex information from a
+ * {@link Vertex} input using a {@link Schema} for the mapping.
+ *
+ * @param vertex The {@link Vertex} object containing the data.
+ * @param schema The {@link Schema} to use for the mapping of the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
protected StructuredRecord extractVertex(Vertex vertex, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(VertexSchema.X_FIELD_NAME) != null) {
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index 4a4b74f..9cee518
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformer.java
@@ -35,6 +35,13 @@ public ImagePropertiesAnnotationsToRecordTransformer(Schema schema, String outpu
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link ColorInfoSchema}. This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
return getOutputRecordBuilder(input)
@@ -42,12 +49,26 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse
.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the dominant colors information from a
+ * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping.
+ *
+ * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private List extractDominantColors(AnnotateImageResponse annotateImageResponse) {
return annotateImageResponse.getImagePropertiesAnnotation().getDominantColors().getColorsList().stream()
.map(this::extractColorInfoRecord)
.collect(Collectors.toList());
}
+ /**
+ * Extract a {@link StructuredRecord} containing the color information from a
+ * {@link ColorInfo} input using a {@link Schema} for the mapping.
+ *
+ * @param colorInfo The {@link ColorInfo} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractColorInfoRecord(ColorInfo colorInfo) {
Schema faceSchema = getColorInfoSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(faceSchema);
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index 2d32da3..94ced78
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformer.java
@@ -37,6 +37,14 @@ public LabelAnnotationsToRecordTransformer(Schema schema, String outputFieldName
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link io.cdap.plugin.cloud.vision.transform.schema.LocalizedObjectAnnotationSchema}.
+ * This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
return getOutputRecordBuilder(input)
@@ -44,12 +52,26 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse
.build();
}
+ /**
+ * Extract a {@link List} of {@link StructuredRecord} containing the label information from a
+ * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping.
+ *
+ * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private List extractLabelAnnotations(AnnotateImageResponse annotateImageResponse) {
return annotateImageResponse.getLabelAnnotationsList().stream()
.map(this::extractAnnotation)
.collect(Collectors.toList());
}
+ /**
+ * Extract a {@link List} of {@link StructuredRecord} containing the entity annotation information from a
+ * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping.
+ *
+ * @param annotation The {@link EntityAnnotation} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
protected StructuredRecord extractAnnotation(EntityAnnotation annotation) {
Schema labelSchema = getEntityAnnotationSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(labelSchema);
@@ -88,6 +110,14 @@ protected StructuredRecord extractAnnotation(EntityAnnotation annotation) {
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the location information from a
+ * {@link LocationInfo} input using a {@link Schema} for the mapping.
+ *
+ * @param locationInfo The {@link LocationInfo} object containing the data.
+ * @param schema The {@link Schema} to use for the mapping.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
protected StructuredRecord extractLocation(LocationInfo locationInfo, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(EntityAnnotationSchema.LocationInfo.LATITUDE_FIELD_NAME) != null) {
@@ -102,6 +132,14 @@ protected StructuredRecord extractLocation(LocationInfo locationInfo, Schema sch
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the property information from a
+ * {@link Property} input using a {@link Schema} for the mapping.
+ *
+ * @param property The {@link Property} object containing the data.
+ * @param schema The {@link Schema} to use for the mapping.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
protected StructuredRecord extractProperty(Property property, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(EntityAnnotationSchema.Property.NAME_FIELD_NAME) != null) {
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index 76f08e4..a3c390a
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformer.java
@@ -36,18 +36,40 @@ public LandmarkAnnotationsToRecordTransformer(Schema schema, String outputFieldN
}
@Override
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link io.cdap.plugin.cloud.vision.transform.schema.LocalizedObjectAnnotationSchema}.
+ * This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
return getOutputRecordBuilder(input)
.set(outputFieldName, extractLandmarkAnnotations(annotateImageResponse))
.build();
}
+ /**
+ * Extract a {@link List} of {@link StructuredRecord} containing the landmark information from a
+ * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping.
+ *
+ * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private List extractLandmarkAnnotations(AnnotateImageResponse annotateImageResponse) {
return annotateImageResponse.getLandmarkAnnotationsList().stream()
.map(this::extractAnnotation)
.collect(Collectors.toList());
}
+ /**
+ * Extract a {@link StructuredRecord} containing the entity information from a
+ * {@link EntityAnnotation} input using a {@link Schema} for the mapping.
+ *
+ * @param annotation The {@link EntityAnnotation} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
@Override
protected StructuredRecord extractAnnotation(EntityAnnotation annotation) {
Schema landmarkSchema = getEntityAnnotationSchema();
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index 610a303..faf7d25
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformer.java
@@ -35,6 +35,14 @@ public LocalizedObjectAnnotationsToRecordTransformer(Schema schema, String outpu
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link io.cdap.plugin.cloud.vision.transform.schema.LocalizedObjectAnnotationSchema}.
+ * This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
return getOutputRecordBuilder(input)
@@ -42,12 +50,26 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse
.build();
}
+ /**
+ * Extract a {@link List} of {@link StructuredRecord} containing the localized object information from a
+ * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping.
+ *
+ * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private List extractLocalizedObjectAnnotations(AnnotateImageResponse annotateImageResponse) {
return annotateImageResponse.getLocalizedObjectAnnotationsList().stream()
.map(this::extractLocalizedObjectAnnotationRecord)
.collect(Collectors.toList());
}
+ /**
+ * Extract a {@link StructuredRecord} containing the localized object information from a
+ * {@link LocalizedObjectAnnotation} input using a {@link Schema} for the mapping.
+ *
+ * @param annotation The {@link LocalizedObjectAnnotation} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractLocalizedObjectAnnotationRecord(LocalizedObjectAnnotation annotation) {
Schema objSchema = getLocalizedObjectAnnotationSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(objSchema);
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index 9888050..b9a4c26
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformer.java
@@ -19,7 +19,6 @@
import com.google.cloud.vision.v1.AnnotateImageResponse;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
-
import java.util.List;
import java.util.stream.Collectors;
@@ -34,6 +33,14 @@ public LogoAnnotationsToRecordTransformer(Schema schema, String outputFieldName)
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link io.cdap.plugin.cloud.vision.transform.schema.LocalizedObjectAnnotationSchema}.
+ * This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
return getOutputRecordBuilder(input)
@@ -41,6 +48,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse
.build();
}
+ /**
+ * Extract a {@link List} of {@link StructuredRecord} containing the logo information from a
+ * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping.
+ *
+ * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private List extractLogoAnnotations(AnnotateImageResponse annotateImageResponse) {
return annotateImageResponse.getLogoAnnotationsList().stream()
.map(this::extractAnnotation)
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformer.java
old mode 100644
new mode 100755
index 69f335d..f8e69e0
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformer.java
@@ -37,6 +37,14 @@ public ProductSearchResultToRecordTransformer(Schema schema, String outputFieldN
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link ProductSearchResultsSchema}. This {@link StructuredRecord} can then be turned into
+ * a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
ProductSearchResults productSearchResults = annotateImageResponse.getProductSearchResults();
@@ -45,6 +53,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse
.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the product search information from a
+ * {@link ProductSearchResults} input using a {@link Schema} for the mapping.
+ *
+ * @param searchResults The {@link ProductSearchResults} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractProductSearchResults(ProductSearchResults searchResults) {
Schema schema = getProductSearchResultSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
@@ -74,6 +89,13 @@ private StructuredRecord extractProductSearchResults(ProductSearchResults search
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the search result information from a
+ * {@link ProductSearchResults.Result} input using a {@link Schema} for the mapping.
+ *
+ * @param result The {@link ProductSearchResults.Result} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractProductSearchResultRecord(ProductSearchResults.Result result, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(ProductSearchResultsSchema.Result.IMAGE_FIELD_NAME) != null) {
@@ -93,6 +115,14 @@ private StructuredRecord extractProductSearchResultRecord(ProductSearchResults.R
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the product information from a
+ * {@link Product} input using a {@link Schema} for the mapping.
+ *
+ * @param product The {@link Product} object containing the data.
+ * @param schema The {@link Schema} to use.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractProductRecord(Product product, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(ProductSearchResultsSchema.Product.NAME_FIELD_NAME) != null) {
@@ -119,6 +149,14 @@ private StructuredRecord extractProductRecord(Product product, Schema schema) {
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the label information from a
+ * {@link Product.KeyValue} input using a {@link Schema} for the mapping.
+ *
+ * @param label The {@link Product.KeyValue} object containing the data.
+ * @param schema The {@link Schema} to use.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractProductLabelRecord(Product.KeyValue label, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(ProductSearchResultsSchema.KeyValue.KEY_FIELD_NAME) != null) {
@@ -130,6 +168,14 @@ private StructuredRecord extractProductLabelRecord(Product.KeyValue label, Schem
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the grouped result information from a
+ * {@link Product.KeyValue} input using a {@link Schema} for the mapping.
+ *
+ * @param result The {@link ProductSearchResults.GroupedResult} object containing the data.
+ * @param schema The {@link Schema} to use.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractProductSearchResultRecord(ProductSearchResults.GroupedResult result, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
Schema.Field positionField = schema.getField(ProductSearchResultsSchema.GroupedResult.POSITION_FIELD_NAME);
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index be1b1d4..469bb25
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformer.java
@@ -33,6 +33,14 @@ public SafeSearchAnnotationsToRecordTransformer(Schema schema, String outputFiel
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link SafeSearchAnnotationSchema}. This {@link StructuredRecord} can then be turned into
+ * a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
SafeSearchAnnotation annotation = annotateImageResponse.getSafeSearchAnnotation();
@@ -41,6 +49,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse
.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the safe search information from a
+ * {@link SafeSearchAnnotation} input using a {@link Schema} for the mapping.
+ *
+ * @param annotation The {@link SafeSearchAnnotation} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractSafeSearchAnnotation(SafeSearchAnnotation annotation) {
Schema safeSearchAnnotationSchema = getSafeSearchAnnotationSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(safeSearchAnnotationSchema);
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformer.java
old mode 100644
new mode 100755
index fbefeaa..219212b
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformer.java
@@ -35,6 +35,13 @@ public TextAnnotationsToRecordTransformer(Schema schema, String outputFieldName)
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link TextAnnotationSchema}. This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
return getOutputRecordBuilder(input)
@@ -42,12 +49,26 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse
.build();
}
+ /**
+ * Extract a {@link List} of {@link StructuredRecord} containing the text information from a
+ * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping.
+ *
+ * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private List extractTextAnnotations(AnnotateImageResponse annotateImageResponse) {
return annotateImageResponse.getTextAnnotationsList().stream()
.map(this::extractTextAnnotationRecord)
.collect(Collectors.toList());
}
+ /**
+ * Extract a {@link StructuredRecord} containing the text annotation information from a
+ * {@link EntityAnnotation} input using a {@link Schema} for the mapping.
+ *
+ * @param annotation The {@link EntityAnnotation} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractTextAnnotationRecord(EntityAnnotation annotation) {
Schema textSchema = getTextAnnotationSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(textSchema);
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TransformerFactory.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TransformerFactory.java
old mode 100644
new mode 100755
index 52cd1b9..0cc0a59
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TransformerFactory.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TransformerFactory.java
@@ -19,8 +19,8 @@
import io.cdap.plugin.cloud.vision.transform.ImageFeature;
/**
- * A factory which creates instance of {@ImageAnnotationToRecordTransformer} in accordance to feature and output schema
- * configured in input config.
+ * A factory which creates instances of {@ImageAnnotationToRecordTransformer} in accordance to the feature and output
+ * schema configured in input config.
*/
public class TransformerFactory {
public static ImageAnnotationToRecordTransformer createInstance(ImageFeature imageFeature, String outputFieldName,
diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionToRecordTransformer.java
old mode 100644
new mode 100755
index 82b41f1..d4fb683
--- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionToRecordTransformer.java
+++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionToRecordTransformer.java
@@ -24,7 +24,6 @@
import java.util.List;
import java.util.stream.Collectors;
-
/**
* Transforms web detection of specified {@link AnnotateImageResponse} to {@link StructuredRecord} according
* to the specified schema.
@@ -35,6 +34,13 @@ public WebDetectionToRecordTransformer(Schema schema, String outputFieldName) {
super(schema, outputFieldName);
}
+ /**
+ * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord}
+ * using the {@link WebDetectionSchema}. This {@link StructuredRecord} can then be turned into a json document.
+ *
+ * @param input {@link StructuredRecord} to add to.
+ * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from.
+ */
@Override
public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) {
WebDetection webDetection = annotateImageResponse.getWebDetection();
@@ -43,6 +49,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse
.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the Web detection information from a
+ * {@link WebDetection} input using a {@link Schema} for the mapping.
+ *
+ * @param webDetection The {@link WebDetection} object containing the data.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractWebDetection(WebDetection webDetection) {
Schema webSchema = getWebDetectionSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(webSchema);
@@ -103,6 +116,14 @@ private StructuredRecord extractWebDetection(WebDetection webDetection) {
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the Web entity information from a
+ * {@link WebDetection.WebEntity} input using a {@link Schema} for the mapping.
+ *
+ * @param webEntity The {@link WebDetection.WebEntity} object containing the data.
+ * @param schema The {@link Schema} to use.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractEntity(WebDetection.WebEntity webEntity, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(WebDetectionSchema.WebEntity.ENTITY_ID_FIELD_NAME) != null) {
@@ -118,6 +139,14 @@ private StructuredRecord extractEntity(WebDetection.WebEntity webEntity, Schema
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the Web image information from a
+ * {@link WebDetection.WebImage} input using a {@link Schema} for the mapping.
+ *
+ * @param webImage The {@link WebDetection.WebImage} object containing the data.
+ * @param schema The {@link Schema} to use.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractWebImage(WebDetection.WebImage webImage, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(WebDetectionSchema.WebImage.URL_FIELD_NAME) != null) {
@@ -130,6 +159,14 @@ private StructuredRecord extractWebImage(WebDetection.WebImage webImage, Schema
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the Web label information from a
+ * {@link WebDetection.WebLabel} input using a {@link Schema} for the mapping.
+ *
+ * @param webLabel The {@link WebDetection.WebLabel} object containing the data.
+ * @param schema The {@link Schema} to use.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractWebLabel(WebDetection.WebLabel webLabel, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(WebDetectionSchema.BestGuessLabel.LABEL_FIELD_NAME) != null) {
@@ -142,6 +179,14 @@ private StructuredRecord extractWebLabel(WebDetection.WebLabel webLabel, Schema
return builder.build();
}
+ /**
+ * Extract a {@link StructuredRecord} containing the Web page information from a
+ * {@link WebDetection.WebPage} input using a {@link Schema} for the mapping.
+ *
+ * @param webPage The {@link WebDetection.WebPage} object containing the data.
+ * @param schema The {@link Schema} to use.
+ * @return A {@link StructuredRecord} containing the data mapped.
+ */
private StructuredRecord extractWebPage(WebDetection.WebPage webPage, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
if (schema.getField(WebDetectionSchema.WebPage.URL_FIELD_NAME) != null) {
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/CloudVisionConfigBuilder.java b/src/test/java/io/cdap/plugin/cloud/vision/CloudVisionConfigBuilder.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java b/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java
new file mode 100644
index 0000000..07b7a68
--- /dev/null
+++ b/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright © 2019 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package io.cdap.plugin.cloud.vision;
+
+import com.google.auth.Credentials;
+import com.google.cloud.storage.Blob;
+import io.cdap.plugin.cloud.vision.action.GcsBucketHelper;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import java.util.List;
+
+/**
+ * Test that we can access blobs using a GCSPath and that the batching computation doesn't trigger
+ * an exception.
+ */
+public class GcsBucketHelperTest {
+ protected static final String PATH = System.getProperty("path", "gs://vision-api-pbo2/images");
+ protected static final String SERVICE_ACCOUNT_FILE_PATH = System.getProperty("serviceFilePath",
+ "auto-detect");
+ // This is a limit imposed by the Cloud Vision API
+ protected static final int MAX_NUMBER_OF_IMAGES_PER_BATCH = 2000;
+
+ @Test
+ public void getAllFilesInPath() throws Exception {
+ Credentials credentials = CredentialsHelper.getCredentials(SERVICE_ACCOUNT_FILE_PATH);
+
+ List blobs = GcsBucketHelper.getAllFilesInPath(PATH, credentials);
+ if (blobs.isEmpty()) {
+ throw new Exception("Cannot get blobs in: " + PATH);
+ }
+
+ System.out.println();
+ System.out.println("Using service acount path: " + SERVICE_ACCOUNT_FILE_PATH);
+ System.out.println("Blobs found in: " + PATH);
+ for (Blob blob : blobs) {
+ System.out.println("blob.getName(): " + blob.getName());
+ }
+
+ int countBlobs = 0;
+ for (int batchId = 0; batchId < (1 + blobs.size() / MAX_NUMBER_OF_IMAGES_PER_BATCH); batchId++) {
+ for (int index = batchId * MAX_NUMBER_OF_IMAGES_PER_BATCH;
+ (index < (batchId + 1) * MAX_NUMBER_OF_IMAGES_PER_BATCH) && (index < blobs.size());
+ index++) {
+ countBlobs++;
+ }
+ }
+
+ // Make sure we have taken into the loop as many blobs as returned from GCS
+ Assert.assertEquals(countBlobs, blobs.size());
+ }
+}
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/ValidationAssertions.java b/src/test/java/io/cdap/plugin/cloud/vision/ValidationAssertions.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfigTest.java
old mode 100644
new mode 100755
index ad13ad8..1f87199
--- a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfigTest.java
+++ b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfigTest.java
@@ -23,7 +23,6 @@
import io.cdap.plugin.cloud.vision.transform.ImageFeature;
import org.junit.Assert;
import org.junit.Test;
-
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -122,6 +121,30 @@ public void testValidateAspectRatios() {
assertValidationFailed(collector, paramNames);
}
+ @Test
+ public void testValidateOneLanguageHint() {
+ // Those are all the language ids retrieved from 'widgets/DocumentExtractor-transform.json'
+ String[] languages = new String[]{"af", "sq", "ar", "hy", "be", "bn", "bg", "ca", "zh", "hr", "cs", "da", "nl",
+ "en", "et", "fil", "fi", "fr", "de", "el", "gu", "iw", "hu", "is", "id", "it", "ja", "kn", "km", "ko", "lo",
+ "lv", "lt", "mk", "ms", "ml", "mr", "ne", "no", "fa", "pl", "pt", "pa", "ro", "ru", "ru-PETR1708", "sr",
+ "sr-Latn", "sk", "sl", "es", "sv", "ta", "te", "th", "tr", "uk", "vi", "yi"};
+
+ MockFailureCollector collector = new MockFailureCollector(MOCK_STAGE);
+ List> paramNames = Collections.singletonList(
+ Collections.singletonList(ActionConstants.LANGUAGE_HINTS)
+ );
+
+ // Validate the languages one by one
+ for (String language : languages) {
+ OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(VALID_CONFIG)
+ .setFeatures(ImageFeature.TEXT.getDisplayName())
+ .setLanguageHints(language)
+ .build();
+ config.validate(collector);
+ }
+ Assert.assertTrue(collector.getValidationFailures().isEmpty());
+ }
+
private void assertValidationFailed(MockFailureCollector failureCollector, List> paramNames) {
List failureList = failureCollector.getValidationFailures();
Assert.assertEquals(paramNames.size(), failureList.size());
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java
old mode 100644
new mode 100755
index cb6eafd..49e110e
--- a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java
+++ b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java
@@ -29,8 +29,8 @@
import io.cdap.cdap.etl.mock.action.MockActionContext;
import io.cdap.plugin.cloud.vision.CloudVisionConstants;
import io.cdap.plugin.cloud.vision.CredentialsHelper;
-import io.cdap.plugin.cloud.vision.source.GCSPath;
import io.cdap.plugin.cloud.vision.transform.ImageFeature;
+import io.cdap.plugin.gcp.gcs.GCSPath;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -38,6 +38,7 @@
import java.io.FileInputStream;
import java.io.FileNotFoundException;
+import java.util.ArrayList;
import javax.annotation.Nullable;
/**
@@ -46,27 +47,51 @@
public class OfflineImageExtractorActionTest {
private static final String PROJECT = System.getProperty("project", CloudVisionConstants.AUTO_DETECT);
- private static final String SERVICE_ACCOUNT_FILE_PATH =
- System.getProperty("serviceFilePath", CloudVisionConstants.AUTO_DETECT);
- private static final String PATH = System.getProperty("path",
- "gs://cloud-vision-cdap-text-image-extractor-offline");
+
+ private static final String SERVICE_ACCOUNT_FILE_PATH = System.getProperty("serviceFilePath",
+ CloudVisionConstants.AUTO_DETECT);
+ private static final String PATH = System.getProperty("path", "gs://cloud-vision-cdap-tests");
+ private static final String IMAGE_PATH_IN_BUCKET = "images";
+
private static final String PATH_PATTERN = "%s/%s/";
private static final String RESULT_PATH_PATTERN = "%s/%s";
private static final String JPG_CONTENT_TYPE = "image/jpeg";
- private static final String OBJECT_IMAGE_NAME = "multiple_objects.jpg";
- private static final String SAFE_IMAGE_NAME = "safe_search.jpg";
- private static final String CROP_HINTS_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/crop_hints/bubble.jpeg";
- private static final String FACE_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/face/faces.jpeg";
- private static final String PROPERTIES_IMAGE_FILE_PATH =
- "gs://cloud-samples-data/vision/image_properties/bali.jpeg";
- private static final String LABELS_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/label/setagaya.jpeg";
- private static final String LANDMARKS_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/landmark/st_basils.jpeg";
- private static final String LOGOS_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/logo/google_logo.jpg";
- private static final String OBJECTS_IMAGE_FILE_PATH = String.format(RESULT_PATH_PATTERN, PATH, OBJECT_IMAGE_NAME);
- private static final String SAFE_SEARCH_IMAGE_FILE_PATH = String.format(RESULT_PATH_PATTERN, PATH, SAFE_IMAGE_NAME);
- private static final String TEXT_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/ocr/sign.jpg";
- private static final String WEB_DETECTION_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/web/carnaval.jpeg";
+
+ private static final String LOCAL_IMAGE_FOLDER_PATH = "examples/images/jpg";
+
+ private static final String CROP_HINTS_IMAGE_NAME = "bubble.jpg";
+ private static final String FACE_IMAGE_NAME = "faces.jpg";
+ private static final String LABELS_IMAGE_NAME = "setagaya.jpg";
+ private static final String LANDMARKS_IMAGE_NAME = "st_basils.jpg";
+ private static final String LOGOS_IMAGE_NAME = "google_logo.jpg";
+ private static final String OBJECT_IMAGE_NAME = "fallon-michael-8LKQfBumjMo.jpg";
+ private static final String PROPERTIES_IMAGE_NAME = "bali.jpg";
+ private static final String SAFE_SEARCH_IMAGE_NAME = "keyur-nandaniya-oEgiJNbYw8w.jpg";
+ private static final String TEXT_IMAGE_NAME = "sign.jpg";
+ private static final String WEB_DETECTION_IMAGE_NAME = "carnaval.jpg";
+
+ private static final String CROP_HINTS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET
+ + "/" + CROP_HINTS_IMAGE_NAME;
+ private static final String FACE_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET
+ + "/" + FACE_IMAGE_NAME;
+ private static final String LABELS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET
+ + "/" + LABELS_IMAGE_NAME;
+ private static final String LANDMARKS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET
+ + "/" + LANDMARKS_IMAGE_NAME;
+ private static final String LOGOS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET
+ + "/" + LOGOS_IMAGE_NAME;
+ private static final String OBJECTS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET
+ + "/" + OBJECT_IMAGE_NAME;
+ private static final String PROPERTIES_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET
+ + "/" + PROPERTIES_IMAGE_NAME;
+ private static final String SAFE_SEARCH_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET
+ + "/" + SAFE_SEARCH_IMAGE_NAME;
+ private static final String TEXT_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + "/"
+ + TEXT_IMAGE_NAME;
+ private static final String WEB_DETECTION_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET
+ + "/" + WEB_DETECTION_IMAGE_NAME;
+
private static final String RESULT_FILE_NAME = "output-1-to-1.json";
private static final String BATCH_SIZE = "20";
@@ -85,6 +110,42 @@ public class OfflineImageExtractorActionTest {
private static Storage storage;
private static Bucket bucket;
+ private static final String FOLDER_CONTENT_TYPE = "application/x-www-form-urlencoded;charset=UTF-8";
+
+ private static void createFolder(String folderName) throws FileNotFoundException {
+ System.out.println("Creating folder: " + folderName);
+ FileInputStream fileInputStream = new FileInputStream("examples/empty_file.txt");
+ bucket.create(folderName + "/",
+ fileInputStream,
+ FOLDER_CONTENT_TYPE,
+ Bucket.BlobWriteOption.doesNotExist());
+ }
+
+ private static void uploadImages() throws FileNotFoundException {
+ ArrayList allImages = new ArrayList<>(10);
+ allImages.add(CROP_HINTS_IMAGE_NAME);
+ allImages.add(FACE_IMAGE_NAME);
+ allImages.add(LABELS_IMAGE_NAME);
+ allImages.add(LANDMARKS_IMAGE_NAME);
+ allImages.add(LOGOS_IMAGE_NAME);
+ allImages.add(OBJECT_IMAGE_NAME);
+ allImages.add(PROPERTIES_IMAGE_NAME);
+ allImages.add(SAFE_SEARCH_IMAGE_NAME);
+ allImages.add(TEXT_IMAGE_NAME);
+ allImages.add(WEB_DETECTION_IMAGE_NAME);
+
+ for (String imageName : allImages) {
+ String localImagePath = LOCAL_IMAGE_FOLDER_PATH + "/" + imageName;
+
+ FileInputStream fileInputStream = new FileInputStream(localImagePath);
+ System.out.println("Uploading " + localImagePath + " to " + bucket.getName());
+ bucket.create(IMAGE_PATH_IN_BUCKET + "/" + imageName,
+ fileInputStream,
+ JPG_CONTENT_TYPE,
+ Bucket.BlobWriteOption.doesNotExist());
+ }
+ }
+
@Before
public void testSetup() throws Exception {
Credentials credentials = CredentialsHelper.getCredentials(SERVICE_ACCOUNT_FILE_PATH);
@@ -98,12 +159,13 @@ public void testSetup() throws Exception {
deleteBucket(storage, bucket);
}
+ System.out.println("Creating bucket: " + bucketName);
BucketInfo bucketInfo = BucketInfo.newBuilder(bucketName)
.setStorageClass(StorageClass.STANDARD)
- .setLocation("us-central1")
+ .setLocation("us-east1")
.build();
bucket = storage.create(bucketInfo);
- initTestsData();
+ uploadImages();
}
@After
@@ -111,22 +173,14 @@ public void destroy() {
deleteBucket(storage, bucket);
}
- private static void initTestsData() throws FileNotFoundException {
- String path = String.format("src/test/resources/%s", OBJECT_IMAGE_NAME);
-
- FileInputStream serviceAccountStream = new FileInputStream(path);
- bucket.create(OBJECT_IMAGE_NAME, serviceAccountStream, JPG_CONTENT_TYPE, Bucket.BlobWriteOption.doesNotExist());
-
- path = String.format("src/test/resources/%s", SAFE_IMAGE_NAME);
-
- serviceAccountStream = new FileInputStream(path);
- bucket.create(SAFE_IMAGE_NAME, serviceAccountStream, JPG_CONTENT_TYPE, Bucket.BlobWriteOption.doesNotExist());
- }
-
@Test
public void testRunCropHintsFeature() throws Exception {
String folder = "crop_hints";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+ System.out.println("testRunCropHintsFeature() - resultFolderPath: " + resultFolderPath);
+
+ createFolder(folder);
+
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(CROP_HINTS_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -145,6 +199,10 @@ public void testRunCropHintsFeature() throws Exception {
public void testRunFaceFeature() throws Exception {
String folder = "face";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+ System.out.println("testRunFaceFeature() - resultFolderPath: " + resultFolderPath);
+
+ createFolder(folder);
+
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(FACE_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -161,6 +219,10 @@ public void testRunFaceFeature() throws Exception {
public void testRunImagePropertiesFeature() throws Exception {
String folder = "properties";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+ System.out.println("testRunImagePropertiesFeature() - resultFolderPath: " + resultFolderPath);
+
+ createFolder(folder);
+
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(PROPERTIES_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -178,6 +240,10 @@ public void testRunImagePropertiesFeature() throws Exception {
public void testRunLabelsFeature() throws Exception {
String folder = "labels";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+ System.out.println("testRunLabelsFeature() - resultFolderPath: " + resultFolderPath);
+
+ createFolder(folder);
+
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(LABELS_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -195,6 +261,10 @@ public void testRunLabelsFeature() throws Exception {
public void testRunLandmarksFeature() throws Exception {
String folder = "landmarks";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+ System.out.println("testRunLandmarksFeature() - resultFolderPath: " + resultFolderPath);
+
+ createFolder(folder);
+
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(LANDMARKS_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -212,6 +282,10 @@ public void testRunLandmarksFeature() throws Exception {
public void testRunLogosFeature() throws Exception {
String folder = "logos";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+
+ createFolder(folder);
+
+ System.out.println("testRunLogosFeature() - resultFolderPath: " + resultFolderPath);
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(LOGOS_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -229,6 +303,10 @@ public void testRunLogosFeature() throws Exception {
public void testRunObjectLocalizationFeature() throws Exception {
String folder = "objects";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+ System.out.println("testRunObjectLocalizationFeature() - resultFolderPath: " + resultFolderPath);
+
+ createFolder(folder);
+
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(OBJECTS_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -246,6 +324,10 @@ public void testRunObjectLocalizationFeature() throws Exception {
public void testRunTextFeature() throws Exception {
String folder = "text";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+ System.out.println("testRunTextFeature() - resultFolderPath: " + resultFolderPath);
+
+ createFolder(folder);
+
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(TEXT_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -264,6 +346,10 @@ public void testRunTextFeature() throws Exception {
public void testRunSafeSearchFeature() throws Exception {
String folder = "safe";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+ System.out.println("testRunSafeSearchFeature() - resultFolderPath: " + resultFolderPath);
+
+ createFolder(folder);
+
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(SAFE_SEARCH_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -281,6 +367,10 @@ public void testRunSafeSearchFeature() throws Exception {
public void testRunWebDetectionFeature() throws Exception {
String folder = "web";
String resultFolderPath = String.format(PATH_PATTERN, PATH, folder);
+ System.out.println("testRunSafeSearchFeature() - resultFolderPath: " + resultFolderPath);
+
+ createFolder(folder);
+
OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG)
.setSourcePath(WEB_DETECTION_IMAGE_FILE_PATH)
.setDestinationPath(resultFolderPath)
@@ -295,9 +385,10 @@ public void testRunWebDetectionFeature() throws Exception {
}
private void validateResult(String folder, String expectedUri) throws InterruptedException {
- Thread.sleep(30000);
-
- Blob blob = bucket.get(String.format(RESULT_PATH_PATTERN, folder, RESULT_FILE_NAME));
+ Thread.sleep(10000); // Wait a bit to make sure the blob is available in the bucket
+ String blobPath = String.format(RESULT_PATH_PATTERN, folder, RESULT_FILE_NAME);
+ System.out.println("validateResult() - blobPath: " + blobPath);
+ Blob blob = bucket.get(blobPath);
Assert.assertTrue(blob.exists());
String content = new String(blob.getContent());
@@ -325,6 +416,10 @@ private static Storage getStorage(String project, @Nullable Credentials credenti
}
private static void deleteBucket(Storage storage, Bucket bucket) {
+ if (bucket == null || bucket.list() == null) {
+ return;
+ }
+ System.out.println("Deleting bucket: " + bucket.getName());
for (Blob blob : bucket.list().iterateAll()) {
storage.delete(blob.getBlobId());
}
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfigTest.java
similarity index 79%
rename from src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfigTest.java
rename to src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfigTest.java
index 498cb83..c54753d 100644
--- a/src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfigTest.java
+++ b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfigTest.java
@@ -23,11 +23,11 @@
import org.junit.Test;
/**
- * Test class for {@link TextExtractorActionConfigTest}.
+ * Test class for {@link OfflineTextExtractorActionConfigTest}.
*/
-public class TextExtractorActionConfigTest {
+public class OfflineTextExtractorActionConfigTest {
private static final String MOCK_STAGE = "mockStage";
- private static final TextExtractorActionConfig VALID_CONFIG = new TextExtractorActionConfig(
+ private static final OfflineTextExtractorActionConfig VALID_CONFIG = new OfflineTextExtractorActionConfig(
"/path",
"/path",
"/path",
@@ -45,19 +45,19 @@ public void testValidConfig() {
@Test
public void testEmptyServiceFilePath() {
- TextExtractorActionConfig config = TextExtractorActionConfig.builder(VALID_CONFIG)
+ OfflineTextExtractorActionConfig config = OfflineTextExtractorActionConfig.builder(VALID_CONFIG)
.setServiceFilePath("")
.build();
MockFailureCollector failureCollector = new MockFailureCollector(MOCK_STAGE);
config.validate(failureCollector);
ValidationAssertions.assertPropertyValidationFailed(failureCollector,
- CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH);
+ CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH);
}
@Test
public void testEmptySourcePath() {
- TextExtractorActionConfig config = TextExtractorActionConfig.builder(VALID_CONFIG)
+ OfflineTextExtractorActionConfig config = OfflineTextExtractorActionConfig.builder(VALID_CONFIG)
.setSourcePath("")
.build();
@@ -68,7 +68,7 @@ public void testEmptySourcePath() {
@Test
public void testEmptyDestinationPath() {
- TextExtractorActionConfig config = TextExtractorActionConfig.builder(VALID_CONFIG)
+ OfflineTextExtractorActionConfig config = OfflineTextExtractorActionConfig.builder(VALID_CONFIG)
.setDestinationPath("")
.build();
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionTest.java b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionTest.java
similarity index 92%
rename from src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionTest.java
rename to src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionTest.java
index 2cf1b52..9b4e62c 100644
--- a/src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionTest.java
+++ b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionTest.java
@@ -28,20 +28,19 @@
import io.cdap.cdap.etl.api.action.ActionContext;
import io.cdap.cdap.etl.mock.action.MockActionContext;
import io.cdap.plugin.cloud.vision.CredentialsHelper;
-import io.cdap.plugin.cloud.vision.source.GCSPath;
+import io.cdap.plugin.gcp.gcs.GCSPath;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import javax.annotation.Nullable;
/**
- * Test class for {@link TextExtractorAction}.
+ * Test class for {@link OfflineTextExtractorAction}.
*/
-public class TextExtractorActionTest {
+public class OfflineTextExtractorActionTest {
protected static final String PROJECT = System.getProperty("project", "auto-detect");
protected static final String SERVICE_ACCOUNT_FILE_PATH = System.getProperty("serviceFilePath", "auto-detect");
protected static final String PATH = System.getProperty("path", "gs://cloud-vision-cdap-text-offline");
@@ -94,7 +93,7 @@ private static void initTestsData() throws FileNotFoundException {
@Test
public void testRun() throws Exception {
- TextExtractorActionConfig config = new TextExtractorActionConfig(
+ OfflineTextExtractorActionConfig config = new OfflineTextExtractorActionConfig(
SERVICE_ACCOUNT_FILE_PATH,
PDF_FILE_PATH,
RESULT_FOLDER_PATH,
@@ -103,7 +102,7 @@ public void testRun() throws Exception {
null
);
- TextExtractorAction action = new TextExtractorAction(config);
+ OfflineTextExtractorAction action = new OfflineTextExtractorAction(config);
ActionContext context = new MockActionContext();
action.run(context);
@@ -144,6 +143,9 @@ private static Storage getStorage(String project, @Nullable Credentials credenti
}
private static void deleteBucket(Storage storage, Bucket bucket) {
+ if (bucket == null || bucket.list() == null) {
+ return;
+ }
for (Blob blob : bucket.list().iterateAll()) {
storage.delete(blob.getBlobId());
}
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigBuilder.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigBuilder.java
old mode 100644
new mode 100755
index d34f781..acbeae1
--- a/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigBuilder.java
+++ b/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigBuilder.java
@@ -20,6 +20,8 @@
/**
* Builder class that provides handy methods to construct {@link ExtractorTransformConfig} for testing.
+ *
+ * @param Generic type.
*/
public abstract class ExtractorTransformConfigBuilder
extends CloudVisionConfigBuilder {
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigBuilder.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigBuilder.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigBuilder.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigBuilder.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/BaseAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/BaseAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
index cf117a9..63d15ee
--- a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformerTest.java
+++ b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformerTest.java
@@ -152,9 +152,9 @@ private void assertAnnotationEquals(FaceAnnotation expected, StructuredRecord ac
Assert.assertEquals(expected.getUnderExposedLikelihood().name(),
actual.get(FaceAnnotationSchema.UNDER_EXPOSED_FIELD_NAME));
- List position = actual.get(FaceAnnotationSchema.POSITION_FIELD_NAME);
+ List position = actual.get(FaceAnnotationSchema.BOUNDING_POLY_NAME);
assertPositionEqual(expected.getBoundingPoly(), position);
- List fdPosition = actual.get(FaceAnnotationSchema.FD_POSITION_FIELD_NAME);
+ List fdPosition = actual.get(FaceAnnotationSchema.FD_BOUNDING_POLY_NAME);
assertPositionEqual(expected.getFdBoundingPoly(), fdPosition);
List landmarks = actual.get(FaceAnnotationSchema.LANDMARKS_FIELD_NAME);
assertLandmarksEqual(expected.getLandmarksList(), landmarks);
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
index bf0b9f0..04fe9c9
--- a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformerTest.java
+++ b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformerTest.java
@@ -190,7 +190,7 @@ private void assertLanguageEquals(TextAnnotation.DetectedLanguage expected, Stru
actual.get(FullTextAnnotationSchema.DetectedLanguage.CONFIDENCE_FIELD_NAME),
DELTA);
Assert.assertEquals(expected.getLanguageCode(),
- actual.get(FullTextAnnotationSchema.DetectedLanguage.CODE_FIELD_NAME));
+ actual.get(FullTextAnnotationSchema.DetectedLanguage.LANGUAGE_CODE_FIELD_NAME));
}
private void assertBlockEquals(Block expected, StructuredRecord actual) {
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionRecordTransformerTest.java
old mode 100644
new mode 100755
diff --git a/widgets/TextExtractorOffline-action.json b/widgets/OfflineTextExtractor-action.json
similarity index 98%
rename from widgets/TextExtractorOffline-action.json
rename to widgets/OfflineTextExtractor-action.json
index 2571335..f3a7ce8 100644
--- a/widgets/TextExtractorOffline-action.json
+++ b/widgets/OfflineTextExtractor-action.json
@@ -2,7 +2,7 @@
"metadata": {
"spec-version": "1.5"
},
- "display-name": "Text Extractor Offline",
+ "display-name": "Offline Text Extractor",
"configuration-groups": [
{
"label": "Basic",
@@ -33,7 +33,8 @@
"default": "application/pdf",
"values": [
"application/pdf",
- "image/tiff"
+ "image/tiff",
+ "image/gif"
]
}
},