From 00f3a3333de096ea0ff4d7a20487d485a3321010 Mon Sep 17 00:00:00 2001 From: Patrice Borne Date: Thu, 9 Apr 2020 19:28:52 -0400 Subject: [PATCH 1/3] Fixes to Actions and Transfomers --- README.md | 2 +- checkstyle.xml | 31 +- docs/DocumentExtractor-transform.md | 31 +- docs/ImageExtractor-transform.md | 7 +- ...tion.md => OfflineTextExtractor-action.md} | 0 pom.xml | 248 +++++-------- .../cloud/vision/CloudVisionConfig.java | 0 .../cloud/vision/CloudVisionConstants.java | 0 .../cloud/vision/CredentialsHelper.java | 0 .../cloud/vision/action/ActionConstants.java | 23 +- .../cloud/vision/action/GcsBucketHelper.java | 63 ++++ .../action/OfflineImageExtractorAction.java | 118 ++++-- .../OfflineImageExtractorActionConfig.java | 62 +++- .../action/OfflineTextExtractorAction.java | 168 +++++++++ ... => OfflineTextExtractorActionConfig.java} | 66 ++-- .../vision/action/TextExtractorAction.java | 125 ------- .../cloud/vision/action/package-info.java | 20 + .../CloudVisionExecutionException.java | 0 .../cloud/vision/exception/package-info.java | 20 + .../plugin/cloud/vision/package-info.java | 20 + .../vision/transform/CloudVisionClient.java | 0 .../transform/ExtractorTransformConfig.java | 5 +- .../ExtractorTransformConstants.java | 0 .../cloud/vision/transform/ImageFeature.java | 0 .../vision/transform/ProductCategory.java | 0 .../document/DocumentAnnotatorClient.java | 21 ++ .../document/DocumentExtractorTransform.java | 57 ++- .../DocumentExtractorTransformConfig.java | 18 +- .../DocumentExtractorTransformConstants.java | 0 .../transform/document/package-info.java | 20 + .../FileAnnotationToRecordTransformer.java | 0 .../transform/image/ImageAnnotatorClient.java | 9 +- .../image/ImageExtractorTransform.java | 38 +- .../image/ImageExtractorTransformConfig.java | 18 + .../vision/transform/image/package-info.java | 20 + .../cloud/vision/transform/package-info.java | 20 + .../transform/schema/ColorInfoSchema.java | 0 .../schema/CropHintAnnotationSchema.java | 2 +- .../schema/EntityAnnotationSchema.java | 0 .../EntityAnnotationWithPositionSchema.java | 4 +- .../schema/FaceAnnotationSchema.java | 65 +++- .../schema/FullTextAnnotationSchema.java | 69 ++-- .../LocalizedObjectAnnotationSchema.java | 9 +- .../schema/ProductSearchResultsSchema.java | 72 ++-- .../schema/SafeSearchAnnotationSchema.java | 0 .../schema/TextAnnotationSchema.java | 4 +- .../vision/transform/schema/VertexSchema.java | 20 +- .../transform/schema/WebDetectionSchema.java | 30 +- .../vision/transform/schema/package-info.java | 20 + ...opHintsAnnotationsToRecordTransformer.java | 19 +- .../FaceAnnotationsToRecordTransformer.java | 41 ++- ...ullTextAnnotationsToRecordTransformer.java | 66 +++- .../ImageAnnotationToRecordTransformer.java | 19 +- ...pertiesAnnotationsToRecordTransformer.java | 21 ++ .../LabelAnnotationsToRecordTransformer.java | 38 ++ ...andmarkAnnotationsToRecordTransformer.java | 22 ++ ...dObjectAnnotationsToRecordTransformer.java | 22 ++ .../LogoAnnotationsToRecordTransformer.java | 16 +- ...roductSearchResultToRecordTransformer.java | 46 +++ ...eSearchAnnotationsToRecordTransformer.java | 15 + .../TextAnnotationsToRecordTransformer.java | 21 ++ .../transformer/TransformerFactory.java | 4 +- .../WebDetectionToRecordTransformer.java | 47 ++- .../transform/transformer/package-info.java | 20 + .../vision/CloudVisionConfigBuilder.java | 0 .../cloud/vision/GcsBucketHelperTest.java | 74 ++++ .../cloud/vision/ValidationAssertions.java | 0 ...OfflineImageExtractorActionConfigTest.java | 25 +- .../OfflineImageExtractorActionTest.java | 341 +++++++++++------- ...OfflineTextExtractorActionConfigTest.java} | 14 +- ...va => OfflineTextExtractorActionTest.java} | 14 +- .../plugin/cloud/vision/package-info.java | 20 + .../ExtractorTransformConfigBuilder.java | 2 + .../ExtractorTransformConfigTest.java | 0 ...cumentExtractorTransformConfigBuilder.java | 0 .../DocumentExtractorTransformConfigTest.java | 0 .../ImageExtractorTransformConfigBuilder.java | 0 .../ImageExtractorTransformConfigTest.java | 0 ...aseAnnotationsToRecordTransformerTest.java | 0 ...ntsAnnotationsToRecordTransformerTest.java | 0 ...aceAnnotationsToRecordTransformerTest.java | 4 +- ...extAnnotationsToRecordTransformerTest.java | 2 +- ...iesAnnotationsToRecordTransformerTest.java | 0 ...belAnnotationsToRecordTransformerTest.java | 0 ...arkAnnotationsToRecordTransformerTest.java | 0 ...ectAnnotationsToRecordTransformerTest.java | 0 ...ogoAnnotationsToRecordTransformerTest.java | 0 ...ctSearchResultToRecordTransformerTest.java | 0 ...rchAnnotationsToRecordTransformerTest.java | 0 ...extAnnotationsToRecordTransformerTest.java | 0 .../WebDetectionRecordTransformerTest.java | 0 ....json => OfflineTextExtractor-action.json} | 5 +- 92 files changed, 1751 insertions(+), 692 deletions(-) rename docs/{TextExtractorOffline-action.md => OfflineTextExtractor-action.md} (100%) mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConfig.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConstants.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/CredentialsHelper.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/action/GcsBucketHelper.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorAction.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfig.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorAction.java rename src/main/java/io/cdap/plugin/cloud/vision/action/{TextExtractorActionConfig.java => OfflineTextExtractorActionConfig.java} (79%) delete mode 100644 src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorAction.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/action/package-info.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/exception/CloudVisionExecutionException.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/exception/package-info.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/package-info.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/CloudVisionClient.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConstants.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/ImageFeature.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/ProductCategory.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentAnnotatorClient.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransform.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfig.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConstants.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/document/package-info.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/document/transformer/FileAnnotationToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageAnnotatorClient.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransform.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfig.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/image/package-info.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/package-info.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ColorInfoSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/CropHintAnnotationSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationWithPositionSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FaceAnnotationSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FullTextAnnotationSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/LocalizedObjectAnnotationSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ProductSearchResultsSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/SafeSearchAnnotationSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/TextAnnotationSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/VertexSchema.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/WebDetectionSchema.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/package-info.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImageAnnotationToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformer.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TransformerFactory.java mode change 100644 => 100755 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionToRecordTransformer.java create mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/package-info.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/CloudVisionConfigBuilder.java create mode 100644 src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/ValidationAssertions.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfigTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java rename src/test/java/io/cdap/plugin/cloud/vision/action/{TextExtractorActionConfigTest.java => OfflineTextExtractorActionConfigTest.java} (79%) rename src/test/java/io/cdap/plugin/cloud/vision/action/{TextExtractorActionTest.java => OfflineTextExtractorActionTest.java} (92%) create mode 100644 src/test/java/io/cdap/plugin/cloud/vision/package-info.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigBuilder.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigBuilder.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigBuilder.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/BaseAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformerTest.java mode change 100644 => 100755 src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionRecordTransformerTest.java rename widgets/{TextExtractorOffline-action.json => OfflineTextExtractor-action.json} (98%) diff --git a/README.md b/README.md index 4655a4f..372b014 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # Overview -Following plugins are available in this repository. +The following plugins are available in this repository. * Image Extractor Transform * Document Extractor Transform diff --git a/checkstyle.xml b/checkstyle.xml index dca5d9d..2cc4e9f 100644 --- a/checkstyle.xml +++ b/checkstyle.xml @@ -17,7 +17,7 @@ + "http://checkstyle.sourceforge.net/dtds/configuration_1_3.dtd"> - @@ -70,9 +70,6 @@ page at http://checkstyle.sourceforge.net/config.html --> - - - - - + + - - - - - - - - - - - - - - - - + \ No newline at end of file diff --git a/docs/DocumentExtractor-transform.md b/docs/DocumentExtractor-transform.md index 3229f7f..46fa678 100644 --- a/docs/DocumentExtractor-transform.md +++ b/docs/DocumentExtractor-transform.md @@ -2,8 +2,8 @@ Description ----------- -This transform plugin can detect and transcribe text from small(up to 5 pages) PDF and TIFF files stored in Cloud -Storage in an online manner. +This transform plugin uses the Google Cloud Vision API to detect and transcribe text from (up to 5 pages) documents +(.pdf, .tiff, .gif) files stored in Google Cloud Storage. Credentials ----------- @@ -12,8 +12,9 @@ provided and can be set to 'auto-detect'. Credentials will be automatically read from the cluster environment. If the plugin is not run on a Dataproc cluster, the path to a service account key must be provided. -The service account key can be found on the Dashboard in the Cloud Platform Console. -Make sure the account key has permission to access Google Cloud Vision. +The service account key can be found on the Dashboard in the Google Cloud Platform Console. + +Make sure the account key has permission to access the Google Cloud Vision API. The service account key file needs to be available on every node in your cluster and must be readable by all users running the job. @@ -27,22 +28,24 @@ When running on other clusters, the file must be present on every node in the cl **Project ID**: Google Cloud Project ID, which uniquely identifies a project. It can be found on the Dashboard in the Google Cloud Platform Console. -**Path Field**: Field in the input schema containing the path to the image. +**Path Field**: Name of the field in the input schema containing the path to the image. + +**Content Field**: Name of the field in the input schema containing the file content, represented as a stream of bytes. -**Output Field**: Field to store the extracted image features. If the specified output field name already exists in the -input record, it will be overwritten. +Either 'Path Field' or 'Content Field' must be not null. They are mutually exclusive and cannot be both specified. -**Content Field**: Field in the input schema containing the file content, represented as a stream of bytes. +**Output Field**: Name of the field to store the extracted image features into. If the specified output field name +already exists in the input record, it will be overwritten. -**Mime Type**: The type of the file. Currently only 'application/pdf', 'image/tiff' and 'image/gif' are supported. -Wildcards are not supported. +**Mime Type**: The type of the file(s) that will be processed. Currently only 'application/pdf', 'image/tiff' and +'image/gif' are supported. Wildcards are not supported. -**Pages**: The pages in the file to perform image annotation. +**Pages**: The list of pages in the file(s) to perform image annotation on. Enter the list as Comma Separated Values. -**Features**: Features to extract from images. +**Features**: Features to extract from the documents. -**Language Hints**: Hints to detect the language of the text in the images. +**Language Hints**: Hints to detect the language of the text in the documents. **Aspect Ratios**: Ratio of the width to the height of the image. If not specified, the best possible crop is returned. -**Include Geo Results**: Whether to include results derived from the geo information in the image. +**Include Geo Results**: Whether to include results derived from the geo information in the document. diff --git a/docs/ImageExtractor-transform.md b/docs/ImageExtractor-transform.md index 45f6336..049333e 100644 --- a/docs/ImageExtractor-transform.md +++ b/docs/ImageExtractor-transform.md @@ -27,10 +27,11 @@ When running on other clusters, the file must be present on every node in the cl **Project ID**: Google Cloud Project ID, which uniquely identifies a project. It can be found on the Dashboard in the Google Cloud Platform Console. -**Path Field**: Field in the input schema containing the path to the image. +**Path Field**: Name of the field in the input schema containing the path to images in a Goggle Cloud Storage bucket. +Usually this is 'body' when the source is reading a file that contains one path per line. -**Output Field**: Field to store the extracted image features. If the specified output field name already exists in the -input record, it will be overwritten. +**Output Field**: Name of the field to store the extracted image features. If the specified output field name already +exists in the input record, it will be overwritten. **Features**: Features to extract from images. diff --git a/docs/TextExtractorOffline-action.md b/docs/OfflineTextExtractor-action.md similarity index 100% rename from docs/TextExtractorOffline-action.md rename to docs/OfflineTextExtractor-action.md diff --git a/pom.xml b/pom.xml index 62b0f84..01c2d23 100644 --- a/pom.xml +++ b/pom.xml @@ -77,20 +77,17 @@ 3.1.6 6.1.0-SNAPSHOT - 1.12 docs - 1.97.0 - 1.97.0 - 3.7.1 - 2.8.5 - 27.0.1-jre - 2.3.0 + 0.13.2 + 1.99.2 + 1.105.1 + 3.11.4 4.5.9 2.3.0-SNAPSHOT - 2.9.10.1 4.11 - 1.10.19 - 1.49 + UTF-8 + UTF-8 + true @@ -119,172 +116,87 @@ io.thekraken grok + + com.google.guava + guava + + + + - io.cdap.plugin - hydrator-common - ${hydrator.version} + com.google.protobuf + protobuf-java + ${google.protobuf.version} - org.apache.hadoop - hadoop-common - ${hadoop.version} - provided + com.google.cloud + google-cloud-vision + ${google.cloud.vision.version} - commons-logging - commons-logging - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - org.apache.avro - avro + com.google.code.findbugs + jsr305 - org.apache.zookeeper - zookeeper + javax.annotation + javax.annotation-api - com.google.guava - guava + org.apache.hbase + hbase-shaded-client - jersey-core - com.sun.jersey - - - jersey-json - com.sun.jersey - - - jersey-server - com.sun.jersey - - - servlet-api - javax.servlet - - - org.mortbay.jetty - jetty - - - org.mortbay.jetty - jetty-util - - - jasper-compiler - tomcat - - - jasper-runtime - tomcat - - - jsp-api - javax.servlet.jsp - - - slf4j-api - org.slf4j - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - ${hadoop.version} - provided - - - org.slf4j - slf4j-log4j12 + org.apache.hbase + hbase-shaded-server - com.google.inject.extensions - guice-servlet - - - com.sun.jersey - jersey-core + org.apache.avro + avro - com.sun.jersey - jersey-server + com.google.api.grpc + proto-google-cloud-vision-v1p1beta1 - com.sun.jersey - jersey-json + com.google.api.grpc + proto-google-cloud-vision-v1p3beta1 - com.sun.jersey.contribs - jersey-guice + com.google.api.grpc + proto-google-cloud-vision-v1p2beta1 - javax.servlet - servlet-api + com.google.api.grpc + proto-google-cloud-vision-v1p4beta1 com.google.guava guava - com.google.inject - guice + com.google.protobuf + protobuf-java - - com.google.protobuf - protobuf-java - ${google.protobuf.version} - - - com.google.cloud - google-cloud-vision - ${google.cloud.vision.version} - com.google.cloud - google-cloud-vision - ${google.cloud.vision.version} - - - com.google.guava - guava - ${guava.version} - - - com.google.guava - guava - ${guava.version} - - - commons-codec - commons-codec - ${common.codec.version} - - - com.google.code.gson - gson - ${gson.version} + google-cloud-storage + ${google.cloud.storage.version} + + + - com.fasterxml.jackson.core - jackson-databind - ${jackson.core.version} + io.cdap.plugin + google-cloud + ${google.cloud.version} + test - io.cdap.cdap hydrator-test @@ -309,31 +221,16 @@ ${junit.version} test - - org.mockito - mockito-all - ${mockito.version} - test - + org.apache.httpcomponents httpclient ${httpcomponents.version} test - - com.github.tomakehurst - wiremock - ${wiremock.version} - test - - - - - org.apache.maven.plugins @@ -352,7 +249,7 @@ -Xmx3g -Djava.awt.headless=true -XX:MaxPermSize=256m -XX:+UseConcMarkSweepGC -Djava.net.preferIPv4Stack=true - ${surefire.redirectTestOutputToFile} + ${project.surefire.redirectTestOutputToFile} false plain @@ -405,6 +302,7 @@ LICENSE*.txt + **/*.txt *.rst *.md **/*.cdap @@ -435,7 +333,7 @@ org.apache.maven.plugins maven-checkstyle-plugin - 2.17 + 3.1.1 validate @@ -482,6 +380,29 @@ + + org.apache.maven.plugins + maven-enforcer-plugin + 3.0.0-M3 + + + enforce-version + + enforce + + + + + + + log4j:log4j:[0.0,1.2.17) + + + + + + + org.apache.felix maven-bundle-plugin @@ -490,7 +411,22 @@ <_exportcontents> - io.cdap.plugin.cloud.vision.* + com.google.api,com.google.api.client.*,com.google.api.core,com.google.api.gax.batching, + com.google.api.gax.core,com.google.api.gax.grpc,com.google.api.gax.longrunning, + com.google.api.gax.paging,com.google.api.gax.retrying,com.google.api.gax.rpc, + com.google.api.gax.tracing,com.google.api.resourcenames, + com.google.api.services.storage.model,com.google.auth.*,com.google.cloud, + com.google.cloud.http,com.google.cloud.spi,com.google.cloud.storage, + com.google.cloud.storage.spi,com.google.cloud.storage.spi.v1,com.google.cloud.vision.v1.*, + com.google.common.base,com.google.common.collect,com.google.common.graph, + com.google.common.hash,com.google.common.io,com.google.common.util.concurrent, + com.google.common.util.concurrent.internal,com.google.errorprone.annotations, + com.google.iam.v1,com.google.longrunning,com.google.longrunning.stub,com.google.protobuf.*, + com.google.rpc,com.google.type,io.cdap.plugin.cloud.vision.*,io.grpc,io.grpc.stub, + io.opencensus.common,io.opencensus.trace,io.opencensus.trace.config, + io.opencensus.trace.export,io.opencensus.trace.propagation, + org.checkerframework.checker.nullness.compatqual,org.checkerframework.checker.nullness.qual, + org.checkerframework.framework.qual,org.threeten.bp,org.threeten.bp.* *;inline=false;scope=compile true diff --git a/src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConfig.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConstants.java b/src/main/java/io/cdap/plugin/cloud/vision/CloudVisionConstants.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/CredentialsHelper.java b/src/main/java/io/cdap/plugin/cloud/vision/CredentialsHelper.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java b/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java old mode 100644 new mode 100755 index 3c9e84d..b21bf6a --- a/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java @@ -19,40 +19,47 @@ /** * Cloud Vision Action constants. */ -public class ActionConstants { +public final class ActionConstants { /** * Configuration property name used to specify source path. */ public static final String SOURCE_PATH = "sourcePath"; - /** * Configuration property name used to specify destination path. */ public static final String DESTINATION_PATH = "destinationPath"; - /** * Configuration property name used to specify batch size. */ public static final String BATCH_SIZE = "batchSize"; - /** - * Configuration property name used to specify the features to extract from images. + * Configuration property name used to specify the features to extract + * from images. */ public static final String FEATURES = "features"; - /** * Configuration property name used to specify optional hints. */ public static final String LANGUAGE_HINTS = "languageHints"; - /** * Configuration property name used to specify aspect ratios. */ public static final String ASPECT_RATIOS = "aspectRatios"; - /** * Configuration property name used to specify includeGeoResults. */ public static final String INCLUDE_GEO_RESULTS = "includeGeoResults"; + /** + * Maximum number of Images that can be send at a time to the Cloud Vision API + * + * @see bath information + */ + public static final int MAX_NUMBER_OF_IMAGES_PER_BATCH = 2000; + + /** + * Prevent instantiation. + */ + private ActionConstants() { + } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/GcsBucketHelper.java b/src/main/java/io/cdap/plugin/cloud/vision/action/GcsBucketHelper.java new file mode 100644 index 0000000..f9b2a77 --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/action/GcsBucketHelper.java @@ -0,0 +1,63 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.cloud.vision.action; + +import com.google.auth.Credentials; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageOptions; +import java.util.ArrayList; +import java.util.List; + + +/** + * Helper class used to retrieve a list of Blobs from a GCSPath. + */ + +public class GcsBucketHelper { + /** + * Helper function that returns a List of Blobs that are found in a given path on GCS. + * + * @param sourceFolderPath + * @param credentials + * @return List of Blobs + */ + public static List getAllFilesInPath(String sourceFolderPath, Credentials credentials) { + List results = new ArrayList<>(); + + Storage storage = StorageOptions.newBuilder().setCredentials(credentials) + .build().getService(); + + // Loop through the buckets + for (Bucket currentBucket : storage.list().iterateAll()) { + // Loop though the blobs and check if the path is what we want, based on sourceFolderPath + for (Blob blob : currentBucket.list().iterateAll()) { + if (blob.getName().endsWith("/")) { // It's a "folder" + continue; // Ignore + } + // Rebuild the full path of the blob + String fullBlobPath = "gs://" + blob.getBucket() + "/" + blob.getName(); + // Is the blob part of the path we have been given? + if (fullBlobPath.startsWith(sourceFolderPath)) { + results.add(blob); + } + } + } + return results; + } +} diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorAction.java b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorAction.java old mode 100644 new mode 100755 index a8da8a3..ac5e803 --- a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorAction.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorAction.java @@ -18,6 +18,7 @@ import com.google.api.gax.core.FixedCredentialsProvider; import com.google.auth.Credentials; +import com.google.cloud.storage.Blob; import com.google.cloud.vision.v1.AnnotateImageRequest; import com.google.cloud.vision.v1.AsyncBatchAnnotateImagesRequest; import com.google.cloud.vision.v1.CropHintsParams; @@ -39,23 +40,31 @@ import io.cdap.cdap.etl.api.action.Action; import io.cdap.cdap.etl.api.action.ActionContext; import io.cdap.plugin.cloud.vision.CredentialsHelper; - -import java.util.Arrays; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; import java.util.List; import javax.annotation.Nullable; +import static io.cdap.plugin.cloud.vision.action.ActionConstants.MAX_NUMBER_OF_IMAGES_PER_BATCH; /** * Action that runs offline image extractor. */ @Plugin(type = Action.PLUGIN_TYPE) -@Name(OfflineImageExtractorAction.NAME) +@Name(OfflineImageExtractorAction.PLUGIN_NAME) @Description("Action that runs offline image extractor.") public class OfflineImageExtractorAction extends Action { - public static final String NAME = "OfflineImageExtractor"; - + public static final String PLUGIN_NAME = "OfflineImageExtractor"; private final OfflineImageExtractorActionConfig config; + private static final Logger LOG = LoggerFactory.getLogger(OfflineImageExtractorAction.class); public OfflineImageExtractorAction(OfflineImageExtractorActionConfig config) { + if (config.getSourcePath() != null) { + config.setSourcePath(config.getSourcePath().trim()); // Remove whitespace + } + if (config.getDestinationPath() != null) { + config.setDestinationPath(config.getDestinationPath().trim()); // Remove whitespace + } this.config = config; } @@ -63,6 +72,7 @@ public OfflineImageExtractorAction(OfflineImageExtractorActionConfig config) { public void configurePipeline(PipelineConfigurer pipelineConfigurer) { FailureCollector collector = pipelineConfigurer.getStageConfigurer().getFailureCollector(); config.validate(collector); + collector.getOrThrowException(); } @Override @@ -73,53 +83,88 @@ public void run(ActionContext actionContext) throws Exception { Credentials credentials = CredentialsHelper.getCredentials(config.getServiceFilePath()); - ImageAnnotatorSettings imageAnnotatorSettings = ImageAnnotatorSettings.newBuilder() - .setCredentialsProvider(FixedCredentialsProvider.create(credentials)) + // Destination in GCS where the results will be stored + String destinationPath = config.getDestinationPath(); + // Add a '/' at the end if it's not already there + if (!destinationPath.endsWith("/")) { + destinationPath += "/"; + } + GcsDestination gcsDestination = GcsDestination.newBuilder() + .setUri(destinationPath) .build(); - try (ImageAnnotatorClient imageAnnotatorClient = ImageAnnotatorClient.create(imageAnnotatorSettings)) { - - ImageSource source = ImageSource.newBuilder().setImageUri(config.getSourcePath()).build(); - Image image = Image.newBuilder().setSource(source).build(); + OutputConfig outputConfig = OutputConfig.newBuilder() + .setGcsDestination(gcsDestination) + .setBatchSize(config.getBatchSizeValue()) + .build(); - List features = - Arrays.asList(Feature.newBuilder().setType(config.getImageFeature().getFeatureType()).build()); - AnnotateImageRequest.Builder builder = - AnnotateImageRequest.newBuilder().setImage(image).addAllFeatures(features); + ImageAnnotatorSettings imageAnnotatorSettings = ImageAnnotatorSettings.newBuilder() + .setCredentialsProvider(FixedCredentialsProvider.create(credentials)) + .build(); - ImageContext imageContext = getImageContext(); - if (imageContext != null) { - builder.setImageContext(imageContext); - } + // Get all the blobs in the source path + List blobs = GcsBucketHelper.getAllFilesInPath(config.getSourcePath(), credentials); + if (blobs.isEmpty()) { + LOG.warn("Nothing found to process in path: " + config.getSourcePath()); + return; + } - AnnotateImageRequest requestsElement = builder.build(); - List requests = Arrays.asList(requestsElement); - GcsDestination gcsDestination = GcsDestination.newBuilder().setUri(config.getDestinationPath()).build(); + // Prepare the list of requests + List imageRequests = new ArrayList<>(blobs.size()); - // The max number of responses to output in each JSON file - int batchSize = config.getBatchSizeValue(); - OutputConfig outputConfig = - OutputConfig.newBuilder() - .setGcsDestination(gcsDestination) - .setBatchSize(batchSize) - .build(); + // Feature we are going to ask for + Feature feature = Feature.newBuilder() + .setType(config.getImageFeature().getFeatureType()) + .build(); - AsyncBatchAnnotateImagesRequest asyncRequest = - AsyncBatchAnnotateImagesRequest.newBuilder() - .addAllRequests(requests) + try (ImageAnnotatorClient imageAnnotatorClient = ImageAnnotatorClient.create(imageAnnotatorSettings)) { + // Create batches of images to send for processing + // We need to do this because there is a limit on the vision API that will raise an error if there are more + // than MAX_NUMBER_OF_IMAGES_PER_BATCH in a single batch + for (int batchId = 0; + batchId < (1 + blobs.size() / MAX_NUMBER_OF_IMAGES_PER_BATCH); + batchId++) { + for (int index = batchId * MAX_NUMBER_OF_IMAGES_PER_BATCH; + (index < (batchId + 1) * MAX_NUMBER_OF_IMAGES_PER_BATCH) && (index < blobs.size()); + index++) { + Blob blob = blobs.get(index); + // Rebuild the full path of the blob + String fullBlobPath = "gs://" + blob.getBucket() + "/" + blob.getName(); + + ImageSource imageSource = ImageSource.newBuilder().setImageUri(fullBlobPath).build(); + + Image image = Image.newBuilder().setSource(imageSource).build(); + + AnnotateImageRequest.Builder builder = + AnnotateImageRequest.newBuilder().setImage(image).addFeatures(feature); + + ImageContext imageContext = getImageContext(); + if (imageContext != null) { + builder.setImageContext(imageContext); + } + + AnnotateImageRequest annotateImageRequest = builder.build(); + imageRequests.add(annotateImageRequest); + } + + // Send the requests + AsyncBatchAnnotateImagesRequest asyncRequest = AsyncBatchAnnotateImagesRequest.newBuilder() + .addAllRequests(imageRequests) .setOutputConfig(outputConfig) .build(); - imageAnnotatorClient.asyncBatchAnnotateImagesAsync(asyncRequest) - .getInitialFuture() - .get(); + // Wait for the future to complete + imageAnnotatorClient.asyncBatchAnnotateImagesAsync(asyncRequest) + .getInitialFuture() + .get(); + } } catch (Exception exception) { throw new IllegalStateException(exception); } } @Nullable - private ImageContext getImageContext() { + protected ImageContext getImageContext() { switch (config.getImageFeature()) { case TEXT: return Strings.isNullOrEmpty(config.getLanguageHints()) ? null @@ -138,5 +183,4 @@ private ImageContext getImageContext() { return null; } } - } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfig.java old mode 100644 new mode 100755 index 9fa674c..89cddd1 --- a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfig.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfig.java @@ -24,7 +24,6 @@ import io.cdap.cdap.etl.api.FailureCollector; import io.cdap.plugin.cloud.vision.CloudVisionConstants; import io.cdap.plugin.cloud.vision.transform.ImageFeature; - import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -33,7 +32,7 @@ import javax.annotation.Nullable; /** - * Configuration for OfflineImageExtractorAction + * Configuration for OfflineImageExtractorAction. */ public class OfflineImageExtractorActionConfig extends PluginConfig { @@ -86,8 +85,17 @@ public OfflineImageExtractorActionConfig(@Nullable String serviceFilePath, Strin @Nullable Boolean includeGeoResults, String sourcePath, String destinationPath, @Nullable String batchSize) { this.serviceFilePath = serviceFilePath; + + if (sourcePath != null) { + sourcePath = sourcePath.trim(); + } this.sourcePath = sourcePath; + + if (destinationPath != null) { + destinationPath = destinationPath.trim(); + } this.destinationPath = destinationPath; + this.features = features; this.batchSize = batchSize; this.languageHints = languageHints; @@ -97,8 +105,8 @@ public OfflineImageExtractorActionConfig(@Nullable String serviceFilePath, Strin private OfflineImageExtractorActionConfig(Builder builder) { serviceFilePath = builder.serviceFilePath; - sourcePath = builder.sourcePath; - destinationPath = builder.destinationPath; + sourcePath = builder.sourcePath.trim(); + destinationPath = builder.destinationPath.trim(); features = builder.features; batchSize = builder.batchSize; languageHints = builder.languageHints; @@ -106,10 +114,21 @@ private OfflineImageExtractorActionConfig(Builder builder) { includeGeoResults = builder.includeGeoResults; } + /** + * Getter to retrieve a Builder object. + * + * @return a Builder object. + */ public static Builder builder() { return new Builder(); } + /** + * Helper function to get a Builder object based on an existing configuration. + * + * @param copy Configuration object to use as the source to copy from. + * @return Builer object. + */ public static Builder builder(OfflineImageExtractorActionConfig copy) { return builder() .setServiceFilePath(copy.getServiceFilePath()) @@ -175,6 +194,12 @@ public boolean getIncludeGeoResults() { return includeGeoResults != null ? includeGeoResults : false; } + /** + * Slit a comma separated list of properties and turn them into a List. + * + * @param property Comma separated list of properties. + * @return A list of strings after splitting on commas. + */ private List convertPropertyToList(String property) { if (!Strings.isNullOrEmpty(property)) { return Arrays.asList(property.split(",")); @@ -183,6 +208,11 @@ private List convertPropertyToList(String property) { } } + /** + * Validate that the configuration is correct. If not, use the FailureCollector object passed to report errors. + * + * @param collector FailureCollector object to use to report errors. + */ public void validate(FailureCollector collector) { ImageFeature feature = getImageFeature(); if (feature == null) { @@ -196,7 +226,7 @@ public void validate(FailureCollector collector) { batch = Integer.parseInt(batchSize); } catch (NumberFormatException e) { collector.addFailure(String.format("Incorrect value '%s' for Batch Size.", batchSize), - "Provide correct value.") + "Provide correct value.") .withConfigProperty(ActionConstants.BATCH_SIZE); return; } @@ -219,8 +249,16 @@ public void validate(FailureCollector collector) { } } + public void setSourcePath(String sourcePath) { + this.sourcePath = sourcePath; + } + + public void setDestinationPath(String destinationPath) { + this.destinationPath = destinationPath; + } + /** - * Builder for creating a {@link OfflineImageExtractorActionConfig} + * Builder for creating a {@link OfflineImageExtractorActionConfig}. */ public static final class Builder { @Nullable @@ -241,12 +279,20 @@ private Builder() { } public Builder setSourcePath(String sourcePath) { - this.sourcePath = sourcePath; + if (sourcePath != null) { + this.sourcePath = sourcePath.trim(); + } else { + this.sourcePath = sourcePath; + } return this; } public Builder setDestinationPath(String destinationPath) { - this.destinationPath = destinationPath; + if (destinationPath != null) { + this.destinationPath = destinationPath.trim(); + } else { + this.destinationPath = destinationPath; + } return this; } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorAction.java b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorAction.java new file mode 100644 index 0000000..6d9ea7a --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorAction.java @@ -0,0 +1,168 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.cloud.vision.action; + +import com.google.api.gax.core.FixedCredentialsProvider; +import com.google.auth.Credentials; +import com.google.cloud.storage.Blob; +import com.google.cloud.vision.v1.AsyncAnnotateFileRequest; +import com.google.cloud.vision.v1.AsyncBatchAnnotateFilesRequest; +import com.google.cloud.vision.v1.Feature; +import com.google.cloud.vision.v1.GcsDestination; +import com.google.cloud.vision.v1.GcsSource; +import com.google.cloud.vision.v1.ImageAnnotatorClient; +import com.google.cloud.vision.v1.ImageAnnotatorSettings; +import com.google.cloud.vision.v1.ImageContext; +import com.google.cloud.vision.v1.InputConfig; +import com.google.cloud.vision.v1.OutputConfig; +import io.cdap.cdap.api.annotation.Description; +import io.cdap.cdap.api.annotation.Name; +import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.FailureCollector; +import io.cdap.cdap.etl.api.PipelineConfigurer; +import io.cdap.cdap.etl.api.action.Action; +import io.cdap.cdap.etl.api.action.ActionContext; +import io.cdap.plugin.cloud.vision.CredentialsHelper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.List; +import static io.cdap.plugin.cloud.vision.action.ActionConstants.MAX_NUMBER_OF_IMAGES_PER_BATCH; + +/** + * Action that runs offline document text extractor. + */ +@Plugin(type = Action.PLUGIN_TYPE) +@Name(OfflineTextExtractorAction.PLUGIN_NAME) +@Description("Action that runs offline document text extractor") +public class OfflineTextExtractorAction extends Action { + public static final String PLUGIN_NAME = "OfflineTextExtractor"; + private final OfflineTextExtractorActionConfig config; + private static final Logger LOG = LoggerFactory.getLogger(OfflineTextExtractorAction.class); + + public OfflineTextExtractorAction(OfflineTextExtractorActionConfig config) { + if (config.getSourcePath() != null) { + config.setSourcePath(config.getSourcePath().trim()); // Remove whitespace + } + if (config.getDestinationPath() != null) { + config.setDestinationPath(config.getDestinationPath().trim()); + } + this.config = config; + } + + @Override + public void configurePipeline(PipelineConfigurer pipelineConfigurer) { + FailureCollector collector = pipelineConfigurer.getStageConfigurer().getFailureCollector(); + config.validate(collector); + collector.getOrThrowException(); + } + + @Override + public void run(ActionContext actionContext) throws Exception { + FailureCollector collector = actionContext.getFailureCollector(); + config.validate(collector); + collector.getOrThrowException(); + + Credentials credentials = CredentialsHelper.getCredentials(config.getServiceAccountFilePath()); + + // Destination in GCS where the results will be stored + String destinationPath = config.getDestinationPath(); + // Add a / at the end if it's not already there + if (!destinationPath.endsWith("/")) { + destinationPath += "/"; + } + GcsDestination gcsDestination = GcsDestination.newBuilder() + .setUri(destinationPath) + .build(); + + OutputConfig outputConfig = OutputConfig.newBuilder() + .setBatchSize(config.getBatchSize()) + .setGcsDestination(gcsDestination) + .build(); + + ImageAnnotatorSettings imageAnnotatorSettings = ImageAnnotatorSettings.newBuilder() + .setCredentialsProvider(FixedCredentialsProvider.create(credentials)) + .build(); + + // Get all the blobs in the source path + List blobs = GcsBucketHelper.getAllFilesInPath(config.getSourcePath(), credentials); + if (blobs.isEmpty()) { + LOG.warn("Nothing found to process in path: " + config.getSourcePath()); + return; + } + + // Prepare the list of requests + List requests = new ArrayList<>(blobs.size()); + + // Feature we are going to ask for + Feature feature = Feature.newBuilder() + .setType(Feature.Type.DOCUMENT_TEXT_DETECTION) + .build(); + + try (ImageAnnotatorClient client = ImageAnnotatorClient.create(imageAnnotatorSettings)) { + // Create batches of images to send for processing + // We need to do this because there is a limit on the vision API that will raise an error if there are more + // than MAX_NUMBER_OF_IMAGES_PER_BATCH in a single batch + for (int batchId = 0; + batchId < (1 + blobs.size() / MAX_NUMBER_OF_IMAGES_PER_BATCH); + batchId++) { + for (int index = batchId * MAX_NUMBER_OF_IMAGES_PER_BATCH; + (index < (batchId + 1) * MAX_NUMBER_OF_IMAGES_PER_BATCH) && (index < blobs.size()); + index++) { + Blob blob = blobs.get(index); + // Rebuild the full path of the blob + String fullBlobPath = "gs://" + blob.getBucket() + "/" + blob.getName(); + + GcsSource gcsSource = GcsSource.newBuilder() + .setUri(fullBlobPath) + .build(); + + InputConfig inputConfig = InputConfig.newBuilder() + .setMimeType(config.getMimeType()) + .setGcsSource(gcsSource) + .build(); + + ImageContext context = ImageContext.newBuilder() + .addAllLanguageHints(config.getLanguageHintsList()) + .build(); + + AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder() + .addFeatures(feature) + .setImageContext(context) + .setInputConfig(inputConfig) + .setOutputConfig(outputConfig) + .build(); + + // Add this request to the list + requests.add(request); + } + + // Send the requests + AsyncBatchAnnotateFilesRequest asyncRequest = AsyncBatchAnnotateFilesRequest.newBuilder() + .addAllRequests(requests) + .build(); + + // Wait for the future to complete + client.asyncBatchAnnotateFilesAsync(asyncRequest) + .getInitialFuture() + .get(); + } + } catch (Exception exception) { + throw new IllegalStateException(exception); + } + } +} diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfig.java similarity index 79% rename from src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfig.java rename to src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfig.java index cdebcef..16f97c2 100644 --- a/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfig.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfig.java @@ -23,49 +23,44 @@ import io.cdap.cdap.api.plugin.PluginConfig; import io.cdap.cdap.etl.api.FailureCollector; import io.cdap.plugin.cloud.vision.CloudVisionConstants; - import java.util.Arrays; import java.util.Collections; import java.util.List; import javax.annotation.Nullable; /** - * Config class for {@link TextExtractorAction}. + * Config class for {@link OfflineTextExtractorAction}. */ -public class TextExtractorActionConfig extends PluginConfig { - - @Name(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH) - @Description("Path on the local file system of the service account key used " - + "for authorization. Can be set to 'auto-detect' when running on a Dataproc cluster. " - + "When running on other clusters, the file must be present on every node in the cluster.") - @Macro - private String serviceFilePath; +public class OfflineTextExtractorActionConfig extends PluginConfig { @Name(ActionConstants.SOURCE_PATH) @Macro @Description("Path to the location of the directory on GCS where the input files are stored.") - private final String sourcePath; - + private String sourcePath; @Name(ActionConstants.DESTINATION_PATH) @Macro @Description("Path to the location of the directory on GCS where output files should be stored.") - private final String destinationPath; - + private String destinationPath; @Name(CloudVisionConstants.MIME_TYPE) @Description("Document type.") private final String mimeType; - @Name(ActionConstants.BATCH_SIZE) @Description("The max number of responses.") private final Integer batchSize; - @Name(CloudVisionConstants.LANGUAGE_HINTS) @Nullable @Description("Optional hints to provide to Cloud Vision API.") private final String languageHints; + @Name(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH) + @Description("Path on the local file system of the service account key used " + + "for authorization. Can be set to 'auto-detect' when running on a Dataproc cluster. " + + "When running on other clusters, the file must be present on every node in the cluster.") + @Macro + private String serviceFilePath; - public TextExtractorActionConfig(String serviceFilePath, String sourcePath, String destinationPath, String mimeType, - Integer batchSize, @Nullable String languageHints) { + public OfflineTextExtractorActionConfig(String serviceFilePath, String sourcePath, + String destinationPath, String mimeType, + Integer batchSize, @Nullable String languageHints) { this.serviceFilePath = serviceFilePath; this.sourcePath = sourcePath; this.destinationPath = destinationPath; @@ -74,7 +69,7 @@ public TextExtractorActionConfig(String serviceFilePath, String sourcePath, Stri this.languageHints = languageHints; } - private TextExtractorActionConfig(Builder builder) { + private OfflineTextExtractorActionConfig(Builder builder) { serviceFilePath = builder.serviceFilePath; sourcePath = builder.sourcePath; destinationPath = builder.destinationPath; @@ -87,7 +82,13 @@ public static Builder builder() { return new Builder(); } - public static Builder builder(TextExtractorActionConfig copy) { + /** + * Helper function to get a Builder object based on an existing configuration. + * + * @param copy Configuration object to use as the source to copy from. + * @return Builer object. + */ + public static Builder builder(OfflineTextExtractorActionConfig copy) { Builder builder = new Builder(); builder.setServiceFilePath(copy.getServiceFilePath()); @@ -106,7 +107,8 @@ public String getServiceFilePath() { @Nullable public String getServiceAccountFilePath() { - if (containsMacro(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH) || Strings.isNullOrEmpty(serviceFilePath)) { + if (containsMacro(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH) + || Strings.isNullOrEmpty(serviceFilePath)) { return null; } @@ -142,8 +144,22 @@ public List getLanguageHintsList() { return Collections.emptyList(); } + public void setSourcePath(String sourcePath) { + this.sourcePath = sourcePath; + } + + public void setDestinationPath(String destinationPath) { + this.destinationPath = destinationPath; + } + + /** + * Validate that the configuration is correct. If not, use the FailureCollector object passed to report errors. + * + * @param collector FailureCollector object to use to report errors. + */ public void validate(FailureCollector collector) { - if (!containsMacro(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH) && Strings.isNullOrEmpty(serviceFilePath)) { + if (!containsMacro(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH) + && Strings.isNullOrEmpty(serviceFilePath)) { collector.addFailure("Service account file path must be specified.", null) .withConfigProperty(CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH); } @@ -160,7 +176,7 @@ public void validate(FailureCollector collector) { } /** - * Builder for creating a {@link TextExtractorActionConfig}. + * Builder for creating a {@link OfflineTextExtractorActionConfig}. */ public static final class Builder { private String serviceFilePath; @@ -205,8 +221,8 @@ public Builder setLanguageHints(@Nullable String languageHints) { return this; } - public TextExtractorActionConfig build() { - return new TextExtractorActionConfig(this); + public OfflineTextExtractorActionConfig build() { + return new OfflineTextExtractorActionConfig(this); } } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorAction.java b/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorAction.java deleted file mode 100644 index 79c93a9..0000000 --- a/src/main/java/io/cdap/plugin/cloud/vision/action/TextExtractorAction.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -package io.cdap.plugin.cloud.vision.action; - -import com.google.api.gax.core.FixedCredentialsProvider; -import com.google.auth.Credentials; -import com.google.cloud.vision.v1.AsyncAnnotateFileRequest; -import com.google.cloud.vision.v1.AsyncBatchAnnotateFilesRequest; -import com.google.cloud.vision.v1.Feature; -import com.google.cloud.vision.v1.GcsDestination; -import com.google.cloud.vision.v1.GcsSource; -import com.google.cloud.vision.v1.ImageAnnotatorClient; -import com.google.cloud.vision.v1.ImageAnnotatorSettings; -import com.google.cloud.vision.v1.ImageContext; -import com.google.cloud.vision.v1.InputConfig; -import com.google.cloud.vision.v1.OutputConfig; -import io.cdap.cdap.api.annotation.Description; -import io.cdap.cdap.api.annotation.Name; -import io.cdap.cdap.api.annotation.Plugin; -import io.cdap.cdap.etl.api.FailureCollector; -import io.cdap.cdap.etl.api.PipelineConfigurer; -import io.cdap.cdap.etl.api.action.Action; -import io.cdap.cdap.etl.api.action.ActionContext; -import io.cdap.plugin.cloud.vision.CredentialsHelper; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.List; - -/** - * Action that runs offline document text extractor. - */ -@Plugin(type = Action.PLUGIN_TYPE) -@Name(TextExtractorAction.PLUGIN_NAME) -@Description("Action that runs offline document text extractor") -public class TextExtractorAction extends Action { - public static final String PLUGIN_NAME = "TextExtractorOffline"; - private static final Logger LOG = LoggerFactory.getLogger(TextExtractorAction.class); - - private final TextExtractorActionConfig config; - - public TextExtractorAction(TextExtractorActionConfig config) { - this.config = config; - } - - @Override - public void configurePipeline(PipelineConfigurer pipelineConfigurer) { - FailureCollector collector = pipelineConfigurer.getStageConfigurer().getFailureCollector(); - config.validate(collector); - collector.getOrThrowException(); - } - - @Override - public void run(ActionContext actionContext) throws Exception { - FailureCollector collector = actionContext.getFailureCollector(); - config.validate(collector); - collector.getOrThrowException(); - - Credentials credentials = CredentialsHelper.getCredentials(config.getServiceAccountFilePath()); - ImageAnnotatorSettings imageAnnotatorSettings = ImageAnnotatorSettings.newBuilder() - .setCredentialsProvider(FixedCredentialsProvider.create(credentials)) - .build(); - - try (ImageAnnotatorClient client = ImageAnnotatorClient.create(imageAnnotatorSettings)) { - List requests = new ArrayList<>(); - - GcsSource gcsSource = GcsSource.newBuilder() - .setUri(config.getSourcePath()) - .build(); - - GcsDestination gcsDestination = GcsDestination.newBuilder() - .setUri(config.getDestinationPath()) - .build(); - - InputConfig inputConfig = InputConfig.newBuilder() - .setMimeType(config.getMimeType()) - .setGcsSource(gcsSource) - .build(); - - OutputConfig outputConfig = OutputConfig.newBuilder() - .setBatchSize(config.getBatchSize()) - .setGcsDestination(gcsDestination) - .build(); - - Feature feature = Feature.newBuilder() - .setType(Feature.Type.DOCUMENT_TEXT_DETECTION) - .build(); - - ImageContext context = ImageContext.newBuilder() - .addAllLanguageHints(config.getLanguageHintsList()) - .build(); - - AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder() - .addFeatures(feature) - .setImageContext(context) - .setInputConfig(inputConfig) - .setOutputConfig(outputConfig) - .build(); - - requests.add(request); - AsyncBatchAnnotateFilesRequest asyncRequest = AsyncBatchAnnotateFilesRequest.newBuilder() - .addAllRequests(requests) - .build(); - - client.asyncBatchAnnotateFilesAsync(asyncRequest) - .getInitialFuture() - .get(); - } - } -} diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/action/package-info.java new file mode 100644 index 0000000..2718b39 --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/action/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Actions to build CDAP pipelines that use the Cloud Vision API. + */ +package io.cdap.plugin.cloud.vision.action; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/exception/CloudVisionExecutionException.java b/src/main/java/io/cdap/plugin/cloud/vision/exception/CloudVisionExecutionException.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/exception/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/exception/package-info.java new file mode 100644 index 0000000..4854c8e --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/exception/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Exception when there is a problem with using the Cloud Vision API. + */ +package io.cdap.plugin.cloud.vision.exception; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/package-info.java new file mode 100644 index 0000000..5baad12 --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Transformers and Actions to build CDAP pipelines that use the Cloud Vision API. + */ +package io.cdap.plugin.cloud.vision; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/CloudVisionClient.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/CloudVisionClient.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java old mode 100644 new mode 100755 index 4f4d849..de98b16 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java @@ -48,7 +48,6 @@ public class ExtractorTransformConfig extends CloudVisionConfig { @Name(ExtractorTransformConstants.OUTPUT_FIELD) @Description("Field to store the extracted image features. If the specified output field name already exists " + "in the input record, it will be overwritten.") - @Macro private String outputField; @Name(ExtractorTransformConstants.FEATURES) @@ -200,8 +199,8 @@ public static void validateFieldsMatch(Schema inferredSchema, Schema providedSch Schema inferredFieldNonNullableSchema = isInferredFieldNullable ? inferredFieldSchema.getNonNullable() : inferredFieldSchema; - Schema providedFieldNonNullableSchema = isProvidedFieldNullable ? - providedFieldSchema.getNonNullable() : providedFieldSchema; + Schema providedFieldNonNullableSchema = isProvidedFieldNullable + ? providedFieldSchema.getNonNullable() : providedFieldSchema; Schema.Type inferredType = inferredFieldNonNullableSchema.getType(); Schema.LogicalType inferredLogicalType = inferredFieldNonNullableSchema.getLogicalType(); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConstants.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConstants.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/ImageFeature.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/ImageFeature.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/ProductCategory.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/ProductCategory.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentAnnotatorClient.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentAnnotatorClient.java old mode 100644 new mode 100755 index 607e44a..d2fc144 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentAnnotatorClient.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentAnnotatorClient.java @@ -44,6 +44,14 @@ public DocumentAnnotatorClient(DocumentExtractorTransformConfig config) { this.config = config; } + /** + * Convenience method that wraps a call to extractDocumentFeature(InputConfig inputConfig) by building an + * {@link InputConfig} object from a Byte array containing the image bytes. + * + * @param content Byte array containing the image bytes to ask the cloud vision API to work on. + * @return {@link AnnotateFileResponse} with the information requested from the cloud vision API. + * @throws Exception if there was an error sent back by the cloud vision API. + */ public AnnotateFileResponse extractDocumentFeature(byte[] content) throws Exception { InputConfig inputConfig = InputConfig.newBuilder() .setContent(ByteString.copyFrom(content)) @@ -52,6 +60,14 @@ public AnnotateFileResponse extractDocumentFeature(byte[] content) throws Except return extractDocumentFeature(inputConfig); } + /** + * Convenience method that wraps a call to extractDocumentFeature(InputConfig inputConfig) by building an + * {@link InputConfig} object from a gcsPath pointing to a blob in GCS. + * + * @param gcsPath Path to the blob to ask the cloud vision API to work on. + * @return {@link AnnotateFileResponse} with the information requested from the cloud vision API. + * @throws Exception if there was an error sent back by the cloud vision API. + */ public AnnotateFileResponse extractDocumentFeature(String gcsPath) throws Exception { InputConfig inputConfig = InputConfig.newBuilder() .setGcsSource(GcsSource.newBuilder().setUri(gcsPath)) @@ -60,6 +76,11 @@ public AnnotateFileResponse extractDocumentFeature(String gcsPath) throws Except return extractDocumentFeature(inputConfig); } + /** + * @param inputConfig Object that contains the information to send to the Cloud vision API. + * @return {@link AnnotateFileResponse} with the information requested from the cloud vision API. + * @throws Exception if there was an error sent back by the cloud vision API. + */ public AnnotateFileResponse extractDocumentFeature(InputConfig inputConfig) throws Exception { try (ImageAnnotatorClient client = createImageAnnotatorClient()) { Feature.Type featureType = config.getImageFeature().getFeatureType(); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransform.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransform.java old mode 100644 new mode 100755 index b8346c1..ca6771e --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransform.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransform.java @@ -57,34 +57,26 @@ public class DocumentExtractorTransform extends Transform emitter) { try { + // There are two ways the cloud vision API can be called. + // 1. By providing the path to a blob in GCS. + // 2. By providing the actual bytes of the image file. if (!Strings.isNullOrEmpty(config.getPathField())) { transformPath(input, emitter); } else { @@ -118,6 +113,13 @@ public void transform(StructuredRecord input, Emitter emitter) } } + /** + * Method that gets a response back from the cloud vision API by providing the path to an image blob in GCS. + * + * @param input {@link StructuredRecord} passed in by CDAP to work with. It contains the actual path to use. + * @param emitter {@link Emitter} object to use to send the response back to CDAP + * @throws Exception Raised if there was an error coming back from the cloud vision API. + */ private void transformPath(StructuredRecord input, Emitter emitter) throws Exception { String documentPath = input.get(config.getPathField()); AnnotateFileResponse response = documentAnnotatorClient.extractDocumentFeature(documentPath); @@ -125,6 +127,13 @@ private void transformPath(StructuredRecord input, Emitter emi emitter.emit(transformed); } + /** + * Method that gets a response back from the cloud vision API by providing the actual bytes of the image file. + * + * @param input {@link StructuredRecord} passed in by CDAP to work with. It contains the actual bytes to use. + * @param emitter {@link Emitter} object to use to send the response back to CDAP + * @throws Exception Raised if there was an error coming back from the cloud vision API. + */ private void transformBytes(StructuredRecord input, Emitter emitter) throws Exception { Object content = input.get(config.getPathField()); byte[] contentBytes = content instanceof ByteBuffer ? Bytes.getBytes((ByteBuffer) content) : (byte[]) content; @@ -133,14 +142,22 @@ private void transformBytes(StructuredRecord input, Emitter em emitter.emit(transformed); } - public Schema getSchema() { + /** + * Get the output Schema to use by combining the input Schema from CDAP and add the fields needed to store the + * information coming back from the cloud vision API. + * + * @return {@link Schema} + */ + public Schema getOutputSchema() { List fields = new ArrayList<>(); - if (inputSchema.getFields() != null) { + // Add the input fields + if (inputSchema != null && inputSchema.getFields() != null) { fields.addAll(inputSchema.getFields()); } - + // Add the fields of the image feature schema Schema pagesSchema = pagesSchema(config.getImageFeature().getSchema()); fields.add(Schema.Field.of(config.getOutputField(), pagesSchema)); + // Build a schema combining all return Schema.recordOf("record", fields); } @@ -148,7 +165,7 @@ public Schema getSchema() { * File Annotation mapped to record with field "page" for page number and "feature" field for extracted image feature. * * @param imageFeatureSchema extracted image feature schema. - * @return File Annotation page schema. + * @return File Annotation page {@link Schema}. */ private Schema pagesSchema(Schema imageFeatureSchema) { return Schema.arrayOf( diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfig.java old mode 100644 new mode 100755 index 1cd879b..ed4e753 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfig.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfig.java @@ -78,6 +78,11 @@ public String getPages() { return pages; } + /** + * Convenience method that splits a comma separated string of values and turn those into a List. + * + * @return {@link List} + */ public List getPagesList() { if (Strings.isNullOrEmpty(pages)) { return Collections.emptyList(); @@ -117,14 +122,17 @@ public void validate(FailureCollector collector) { */ public void validateInputSchema(Schema inputSchema, FailureCollector collector) { Schema.Field contentField = inputSchema.getField(getContentField()); - if (contentField != null) { - collector.addFailure(String.format("Content field '%s' is expected to be 'bytes'", getContentField()), null) - .withInputSchemaField(getContentField()); - } Schema.Field pathField = inputSchema.getField(getPathField()); - if (pathField != null) { + + if (pathField != null && pathField.getSchema().getType() != Schema.Type.STRING) { collector.addFailure(String.format("Path field '%s' is expected to be a string", getPathField()), null) .withInputSchemaField(getPathField()); + return; + } + + if (contentField != null && contentField.getSchema().getType() != Schema.Type.STRING) { + collector.addFailure(String.format("Content field '%s' is expected to be a string", getContentField()), null) + .withInputSchemaField(getContentField()); } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConstants.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConstants.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/package-info.java new file mode 100644 index 0000000..023e20a --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Contains classes to implement the Document Extraction Transform using the Cloud Vision API. + */ +package io.cdap.plugin.cloud.vision.transform.document; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/transformer/FileAnnotationToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/transformer/FileAnnotationToRecordTransformer.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageAnnotatorClient.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageAnnotatorClient.java old mode 100644 new mode 100755 index f345b7b..87d0052 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageAnnotatorClient.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageAnnotatorClient.java @@ -43,12 +43,19 @@ public ImageAnnotatorClient(ImageExtractorTransformConfig config) { this.config = config; } + /** + * @param gcsPath {@link String} that contains the path to a blon in GCS to use with the Cloud vision API. + * @return {@link AnnotateImageResponse} with the information requested from the cloud vision API. + * @throws Exception if there was an error sent back by the cloud vision API. + */ public AnnotateImageResponse extractImageFeature(String gcsPath) throws Exception { try (com.google.cloud.vision.v1.ImageAnnotatorClient client = createImageAnnotatorClient()) { ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build(); Image img = Image.newBuilder().setSource(imgSource).build(); + Feature.Type featureType = config.getImageFeature().getFeatureType(); Feature feature = Feature.newBuilder().setType(featureType).build(); + AnnotateImageRequest.Builder request = AnnotateImageRequest.newBuilder().addFeatures(feature).setImage(img); ImageContext imageContext = getImageContext(); if (imageContext != null) { @@ -56,13 +63,13 @@ public AnnotateImageResponse extractImageFeature(String gcsPath) throws Exceptio } BatchAnnotateImagesResponse response = client.batchAnnotateImages(Collections.singletonList(request.build())); + AnnotateImageResponse annotateImageResponse = response.getResponses(SINGLE_RESPONSE_INDEX); if (annotateImageResponse.hasError()) { String errorMessage = String.format("Unable to extract '%s' feature of image '%s' due to: '%s'", featureType, gcsPath, annotateImageResponse.getError().getMessage()); throw new CloudVisionExecutionException(errorMessage); } - return annotateImageResponse; } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransform.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransform.java old mode 100644 new mode 100755 index 0c97575..14d35e6 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransform.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransform.java @@ -55,27 +55,21 @@ public class ImageExtractorTransform extends Transform emitter) } } - public Schema getSchema() { + /** + * Get the output Schema to use by combining the input Schema from CDAP and add the fields needed to store the + * information coming back from the cloud vision API. + * + * @return {@link Schema} + */ + protected Schema getOutputSchema(Schema inputSchema) { List fields = new ArrayList<>(); - if (inputSchema.getFields() != null) { + // Add the input fields + if (inputSchema != null && inputSchema.getFields() != null) { fields.addAll(inputSchema.getFields()); } - + // Add the fields of the image feature schema fields.add(Schema.Field.of(config.getOutputField(), config.getImageFeature().getSchema())); + // Build a schema combining all return Schema.recordOf("record", fields); } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfig.java old mode 100644 new mode 100755 index fddb321..b344b36 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfig.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfig.java @@ -23,6 +23,7 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Macro; import io.cdap.cdap.api.annotation.Name; +import io.cdap.cdap.api.data.schema.Schema; import io.cdap.cdap.api.plugin.PluginConfig; import io.cdap.cdap.etl.api.FailureCollector; import io.cdap.plugin.cloud.vision.transform.ExtractorTransformConfig; @@ -133,4 +134,21 @@ public void validate(FailureCollector collector) { } } } + + /** + * Validates input schema and checks for type compatibility. + * + * @param inputSchema input schema. + * @param collector failure collector. + */ + public void validateInputSchema(Schema inputSchema, FailureCollector collector) { + Schema.Field pathField = inputSchema.getField(getPathField()); + if (pathField != null && !(pathField.getSchema().getType() == Schema.Type.STRING)) { + collector.addFailure( + String.format("Path field '%s' is expected to be a string", getPathField()), + null).withInputSchemaField(getPathField()); + } + } + + } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/package-info.java new file mode 100644 index 0000000..fdf0d18 --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Contains classes to implement the Image Extraction Transform using the Cloud Vision API. + */ +package io.cdap.plugin.cloud.vision.transform.image; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/package-info.java new file mode 100644 index 0000000..b079416 --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Transformers to build CDAP pipelines that use the Cloud Vision API. + */ +package io.cdap.plugin.cloud.vision.transform; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ColorInfoSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ColorInfoSchema.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/CropHintAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/CropHintAnnotationSchema.java old mode 100644 new mode 100755 index 9aa356b..38334bb --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/CropHintAnnotationSchema.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/CropHintAnnotationSchema.java @@ -45,7 +45,7 @@ private CropHintAnnotationSchema() { public static final Schema SCHEMA = Schema.recordOf( "crop-hint-annotation-component-record", - Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)), + Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.getSchema("cropHintAnnotation-position"))), Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), Schema.Field.of(IMPORTANCE_FRACTION_FIELD_NAME, Schema.of(Schema.Type.FLOAT))); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationSchema.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationWithPositionSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationWithPositionSchema.java old mode 100644 new mode 100755 index 053b9a8..ed657a5 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationWithPositionSchema.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/EntityAnnotationWithPositionSchema.java @@ -40,7 +40,9 @@ private EntityAnnotationWithPositionSchema() { Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), Schema.Field.of(TOPICALITY_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), Schema.Field.of(LOCATIONS_FIELD_NAME, Schema.arrayOf(LocationInfo.SCHEMA)), - Schema.Field.of(POSITION_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(VertexSchema.SCHEMA))), + Schema.Field.of(POSITION_FIELD_NAME, Schema.nullableOf(Schema.arrayOf( + VertexSchema.getSchema("entityAnnotation-position")))), Schema.Field.of(PROPERTIES_FIELD_NAME, Schema.arrayOf(Property.SCHEMA)) ); + } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FaceAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FaceAnnotationSchema.java old mode 100644 new mode 100755 index 3965d42..9cb6024 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FaceAnnotationSchema.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FaceAnnotationSchema.java @@ -16,8 +16,11 @@ package io.cdap.plugin.cloud.vision.transform.schema; +import com.google.cloud.vision.v1.FaceAnnotation; import com.google.cloud.vision.v1.Likelihood; import io.cdap.cdap.api.data.schema.Schema; +import java.util.ArrayList; +import java.util.List; /** * {@link com.google.cloud.vision.v1.FaceAnnotation} mapped to a record with following fields. @@ -59,52 +62,54 @@ private FaceAnnotationSchema() { /** * Anger likelihood. Possible values are defined by {@link Likelihood}. */ - public static final String ANGER_FIELD_NAME = "anger"; + public static final String ANGER_FIELD_NAME = "angerLikelihood"; /** * Joy likelihood. Possible values are defined by {@link Likelihood}. */ - public static final String JOY_FIELD_NAME = "joy"; + public static final String JOY_FIELD_NAME = "joyLikelihood"; /** * Surprise likelihood. Possible values are defined by {@link Likelihood}. */ - public static final String SURPRISE_FIELD_NAME = "surprise"; + public static final String SURPRISE_FIELD_NAME = "surpriseLikelihood"; /** * Blurred likelihood. Possible values are defined by {@link Likelihood}. */ - public static final String BLURRED_FIELD_NAME = "blurred"; + public static final String BLURRED_FIELD_NAME = "blurredLikelihood"; /** * Under exposed likelihood. Possible values are defined by {@link Likelihood}. */ - public static final String UNDER_EXPOSED_FIELD_NAME = "underExposed"; + public static final String UNDER_EXPOSED_FIELD_NAME = "underExposedLikelihood"; /** * Sorrow likelihood. Possible values are defined by {@link Likelihood}. */ - public static final String SORROW_FIELD_NAME = "sorrow"; + public static final String SORROW_FIELD_NAME = "sorrowLikelihood"; /** * Headwear likelihood. Possible values are defined by {@link Likelihood}. */ - public static final String HEADWEAR_FIELD_NAME = "headwear"; + public static final String HEADWEAR_FIELD_NAME = "headwearLikelihood"; /** - * The bounding polygon around the face. The bounding box is computed to "frame" the face in accordance with human - * expectations. It is based on the landmarker results. Note that one or more x and/or y coordinates may not be - * generated if only a partial face appears in the image to be annotated. + * The bounding polygon around the face. The bounding box is computed to "frame" the face in + * accordance with human expectations. It is based on the landmarker results. Note that one or + * more x and/or y coordinates may not be generated if only a partial face appears in the image + * to be annotated. */ - public static final String POSITION_FIELD_NAME = "position"; + public static final String BOUNDING_POLY_NAME = "boundingPoly"; /** - * The bounding polygon which is tighter than the {@link FaceAnnotationSchema#POSITION_FIELD_NAME}, and encloses only - * the skin part of the face. Typically, it is used to eliminate the face from any image analysis that detects the - * "amount of skin" visible in an image. It is not based on the landmarker results, only on the initial face - * detection, hence the fd (face detection) prefix. + * The bounding polygon, tighter than the {@link FaceAnnotationSchema#BOUNDING_POLY_NAME}, + * encloses only the skin part of the face. Typically, it is used to eliminate the face from any + * image analysis that detects the "amount of skin" visible in an image. It is not based on the + * landmarker results, only on the initial face detection, hence the fd + * (face detection) prefix. */ - public static final String FD_POSITION_FIELD_NAME = "fdPosition"; + public static final String FD_BOUNDING_POLY_NAME = "fdBoundingPoly"; /** * Detected face landmarks. @@ -125,8 +130,8 @@ private FaceAnnotationSchema() { Schema.Field.of(UNDER_EXPOSED_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(SORROW_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(HEADWEAR_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)), - Schema.Field.of(FD_POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)), + Schema.Field.of(BOUNDING_POLY_NAME, Schema.arrayOf(VertexSchema.getSchema("bounding-vertex"))), + Schema.Field.of(FD_BOUNDING_POLY_NAME, Schema.arrayOf(VertexSchema.getSchema("fd-bounding-vertex"))), Schema.Field.of(LANDMARKS_FIELD_NAME, Schema.arrayOf(FaceLandmark.SCHEMA)) ); @@ -135,6 +140,11 @@ private FaceAnnotationSchema() { */ public static class FaceLandmark { + + private FaceLandmark() { + throw new AssertionError("Should not instantiate static utility class."); + } + /** * Face landmark type. */ @@ -156,9 +166,26 @@ public static class FaceLandmark { public static final String Z_FIELD_NAME = "z"; public static final Schema SCHEMA = Schema.recordOf("face-landmark-record", - Schema.Field.of(TYPE_FIELD_NAME, Schema.of(Schema.Type.STRING)), + // The following field is not a simple type as it is defined as an Enum + Schema.Field.of(TYPE_FIELD_NAME, Schema.enumWith(FaceAnnotationSchema.getIterableFromEnum( + FaceAnnotation.Landmark.Type.values()))), Schema.Field.of(X_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), Schema.Field.of(Y_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), Schema.Field.of(Z_FIELD_NAME, Schema.of(Schema.Type.FLOAT))); } + + /** + * Helper function to build a {@link List} from an array. + * + * @param input Array of type T. + * @param Type of the input array. + * @return {@link List} + */ + protected static List getIterableFromEnum(T[] input) { + ArrayList list = new ArrayList<>(input.length); + for (T value : input) { + list.add(value.toString()); + } + return list; + } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FullTextAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FullTextAnnotationSchema.java old mode 100644 new mode 100755 index 3a7aef2..8877e4c --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FullTextAnnotationSchema.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/FullTextAnnotationSchema.java @@ -43,7 +43,7 @@ private FullTextAnnotationSchema() { public static final String PAGES_FIELD_NAME = "pages"; public static final Schema SCHEMA = Schema.recordOf( - "document-text-annotation-component-record", + "fullTextAnnotation", Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(PAGES_FIELD_NAME, Schema.arrayOf(TextPage.SCHEMA))); @@ -85,7 +85,7 @@ public static class TextSymbol { * 2----3 * | | * 1----0 - * and the vertice order will still be (0, 1, 2, 3). + * and the vertex order will still be (0, 1, 2, 3). */ public static final String BOUNDING_BOX_FIELD_NAME = "boundingBox"; @@ -93,9 +93,11 @@ public static class TextSymbol { "document-text-page-symbol-record", Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), - Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))), + Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf( + DetectedLanguage.getSchema("textSymbol-detectedLanguages")))), Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))), - Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA))); + Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf( + VertexSchema.getSchema("textSymbol-boundingBox")))); } /** @@ -150,9 +152,11 @@ public static class TextWord { Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), Schema.Field.of(SYMBOLS_FIELD_NAME, Schema.arrayOf(TextSymbol.SCHEMA)), - Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))), + Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf( + DetectedLanguage.getSchema("textWord-detectedLanguage")))), Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))), - Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA))); + Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf( + VertexSchema.getSchema("textWord-boundingBox")))); } /** @@ -198,7 +202,7 @@ public static class TextParagraph { * 2----3 * | | * 1----0 - * and the vertice order will still be (0, 1, 2, 3). + * and the vertex order will still be (0, 1, 2, 3). */ public static final String BOUNDING_BOX_FIELD_NAME = "boundingBox"; @@ -207,9 +211,11 @@ public static class TextParagraph { Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), Schema.Field.of(WORDS_FIELD_NAME, Schema.arrayOf(TextWord.SCHEMA)), - Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))), + Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf( + DetectedLanguage.getSchema("textParagraph-detectedLanguages")))), Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))), - Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA))); + Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf( + VertexSchema.getSchema("textParagraph-boundingBox")))); } /** @@ -260,7 +266,7 @@ public static class TextBlock { * 2----3 * | | * 1----0 - * and the vertice order will still be (0, 1, 2, 3). + * and the vertex order will still be (0, 1, 2, 3). */ public static final String BOUNDING_BOX_FIELD_NAME = "boundingBox"; @@ -270,9 +276,11 @@ public static class TextBlock { Schema.Field.of(BLOCK_TYPE_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), Schema.Field.of(PARAGRAPHS_FIELD_NAME, Schema.arrayOf(TextParagraph.SCHEMA)), - Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))), + Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf( + DetectedLanguage.getSchema("textBlock-detectedLanguages")))), Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))), - Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA))); + Schema.Field.of(BOUNDING_BOX_FIELD_NAME, Schema.arrayOf( + VertexSchema.getSchema("textBlock-boundingBox")))); } /** @@ -305,6 +313,11 @@ public static class TextPage { */ public static final String BLOCKS_FIELD_NAME = "blocks"; + /** + * Property section that contains detected languages. + */ + public static final String PROPERTY_FIELD_NAME = "property"; + /** * A list of detected languages together with confidence. */ @@ -318,11 +331,14 @@ public static class TextPage { public static final Schema SCHEMA = Schema.recordOf( "document-text-page-record", Schema.Field.of(TEXT_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(WIDTH_FIELD_NAME, Schema.of(Schema.Type.INT)), - Schema.Field.of(HEIGHT_FIELD_NAME, Schema.of(Schema.Type.INT)), + Schema.Field.of(WIDTH_FIELD_NAME, Schema.of(Schema.Type.INT)), // + Schema.Field.of(HEIGHT_FIELD_NAME, Schema.of(Schema.Type.INT)), // Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), - Schema.Field.of(BLOCKS_FIELD_NAME, Schema.arrayOf(TextBlock.SCHEMA)), - Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf(DetectedLanguage.SCHEMA))), + Schema.Field.of(BLOCKS_FIELD_NAME, Schema.arrayOf(TextBlock.SCHEMA)), // + Schema.Field.of(PROPERTY_FIELD_NAME, Schema.nullableOf(Schema.arrayOf( + DetectedLanguage.getSchema("textPage-property")))), + Schema.Field.of(DETECTED_LANGUAGES_FIELD_NAME, Schema.nullableOf(Schema.arrayOf( + DetectedLanguage.getSchema("textPage-detectedLanguages")))), Schema.Field.of(DETECTED_BREAK_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING)))); } @@ -333,19 +349,28 @@ public static class TextPage { public static class DetectedLanguage { /** - * The BCP-47 language code, such as "en-US" or "sr-Latn". For more information, see + * The BCP-47 language code, such as "en-US" or "sr-Latin". For more information, see * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. */ - public static final String CODE_FIELD_NAME = "code"; + public static final String LANGUAGE_CODE_FIELD_NAME = "languageCode"; /** * Confidence of detected language. Range [0, 1]. */ public static final String CONFIDENCE_FIELD_NAME = "confidence"; - public static final Schema SCHEMA = Schema.recordOf( - "detected-language-record", - Schema.Field.of(CODE_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT))); + /** + * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas + * that will be combined into a larger {@link Schema}. + * + * @param name {@link String} containing the name to give to the returned {@link Schema}. + * @return a {@link Schema} with the given name. + */ + public static Schema getSchema(String name) { + return Schema.recordOf( + name, + Schema.Field.of(LANGUAGE_CODE_FIELD_NAME, Schema.of(Schema.Type.STRING)), + Schema.Field.of(CONFIDENCE_FIELD_NAME, Schema.of(Schema.Type.FLOAT))); + } } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/LocalizedObjectAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/LocalizedObjectAnnotationSchema.java old mode 100644 new mode 100755 index 26b7b7c..0dbab7d --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/LocalizedObjectAnnotationSchema.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/LocalizedObjectAnnotationSchema.java @@ -54,12 +54,19 @@ protected LocalizedObjectAnnotationSchema() { */ public static final String POSITION_FIELD_NAME = "position"; + /** + * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas + * that will be combined into a larger {@link Schema}. + * + * @param name {@link String} containing the name to give to the returned {@link Schema}. + * @return a {@link Schema} with the given name. + */ public static final Schema SCHEMA = Schema.recordOf( "localized-object-annotation-component-record", Schema.Field.of(MID_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(LANGUAGE_CODE_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), - Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)) + Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.getSchema("localizedObjectAnnotation-position"))) ); } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ProductSearchResultsSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ProductSearchResultsSchema.java old mode 100644 new mode 100755 index 026024e..dd8446b --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ProductSearchResultsSchema.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/ProductSearchResultsSchema.java @@ -46,9 +46,9 @@ private ProductSearchResultsSchema() { */ public static final String GROUPED_RESULTS_FIELD_NAME = "productGroupedResults"; - public static final Schema SCHEMA = Schema.recordOf("product-search-result-record", + public static final Schema SCHEMA = Schema.recordOf("productSearch-resultRecord", Schema.Field.of(INDEX_TIME_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))), - Schema.Field.of(RESULTS_FIELD_NAME, Schema.arrayOf(Result.SCHEMA)), + Schema.Field.of(RESULTS_FIELD_NAME, Schema.arrayOf(Result.getSchema("productSearch-result"))), Schema.Field.of(GROUPED_RESULTS_FIELD_NAME, Schema.arrayOf(GroupedResult.SCHEMA)) ); @@ -72,11 +72,20 @@ public static class Result { */ public static final String PRODUCT_FIELD_NAME = "product"; - public static final Schema SCHEMA = Schema.recordOf("result-record", - Schema.Field.of(IMAGE_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), - Schema.Field.of(PRODUCT_FIELD_NAME, Product.SCHEMA) - ); + /** + * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas + * that will be combined into a larger {@link Schema}. + * + * @param name {@link String} containing the name to give to the returned {@link Schema}. + * @return a {@link Schema} with the given name. + */ + public static Schema getSchema(String name) { + return Schema.recordOf(name, + Schema.Field.of(IMAGE_FIELD_NAME, Schema.of(Schema.Type.STRING)), + Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), + Schema.Field.of(PRODUCT_FIELD_NAME, Product.getSchema(name + "result-product")) + ); + } } /** @@ -111,13 +120,24 @@ public static class Product { */ public static final String PRODUCT_LABELS_FIELD_NAME = "productLabels"; - public static final Schema SCHEMA = Schema.recordOf("product-record", - Schema.Field.of(NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(DISPLAY_NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(DESCRIPTION_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(PRODUCT_CATEGORY_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(PRODUCT_LABELS_FIELD_NAME, Schema.arrayOf(KeyValue.SCHEMA)) - ); + /** + * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas + * that will be combined into a larger {@link Schema}. + * + * @param name {@link String} containing the name to give to the returned {@link Schema}. + * @return a {@link Schema} with the given name. + */ + public static Schema getSchema(String name) { + return Schema.recordOf(name, + Schema.Field.of(NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)), + Schema.Field.of(DISPLAY_NAME_FIELD_NAME, Schema.of(Schema.Type.STRING)), + Schema.Field.of(DESCRIPTION_FIELD_NAME, Schema.of(Schema.Type.STRING)), + Schema.Field.of(PRODUCT_CATEGORY_FIELD_NAME, Schema.of(Schema.Type.STRING)), + Schema.Field.of(PRODUCT_LABELS_FIELD_NAME, Schema.arrayOf( + KeyValue.getSchema(name + "-keyValue"))) + ); + } + } /** @@ -135,10 +155,19 @@ public static class KeyValue { */ public static final String VALUE_FIELD_NAME = "value"; - public static final Schema SCHEMA = Schema.recordOf("key-value-record", - Schema.Field.of(KEY_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(VALUE_FIELD_NAME, Schema.of(Schema.Type.STRING)) - ); + /** + * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas + * that will be combined into a larger {@link Schema}. + * + * @param name {@link String} containing the name to give to the returned {@link Schema}. + * @return a {@link Schema} with the given name. + */ + public static Schema getSchema(String name) { + return Schema.recordOf(name, + Schema.Field.of(KEY_FIELD_NAME, Schema.of(Schema.Type.STRING)), + Schema.Field.of(VALUE_FIELD_NAME, Schema.of(Schema.Type.STRING)) + ); + } } /** @@ -156,9 +185,10 @@ public static class GroupedResult { */ public static final String RESULTS_FIELD_NAME = "results"; - public static final Schema SCHEMA = Schema.recordOf("grouped-result-record", - Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA)), - Schema.Field.of(RESULTS_FIELD_NAME, Schema.arrayOf(Result.SCHEMA)) + public static final Schema SCHEMA = Schema.recordOf("groupedResult-record", + Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf( + VertexSchema.getSchema("groupedResult-position"))), + Schema.Field.of(RESULTS_FIELD_NAME, Schema.arrayOf(Result.getSchema("groupedResult-results"))) ); } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/SafeSearchAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/SafeSearchAnnotationSchema.java old mode 100644 new mode 100755 diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/TextAnnotationSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/TextAnnotationSchema.java old mode 100644 new mode 100755 index ac518dc..f5b15f9 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/TextAnnotationSchema.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/TextAnnotationSchema.java @@ -23,6 +23,7 @@ */ public class TextAnnotationSchema { + private TextAnnotationSchema() { throw new AssertionError("Should not instantiate static utility class."); } @@ -47,5 +48,6 @@ private TextAnnotationSchema() { "text-annotation-component-record", Schema.Field.of(LOCALE_FIELD_NAME, Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of(DESCRIPTION_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.SCHEMA))); + Schema.Field.of(POSITION_FIELD_NAME, Schema.arrayOf(VertexSchema.getSchema("textAnnotation-position"))) + ); } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/VertexSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/VertexSchema.java old mode 100644 new mode 100755 index c441735..ef5ec78 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/VertexSchema.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/VertexSchema.java @@ -19,11 +19,14 @@ import io.cdap.cdap.api.data.schema.Schema; /** - * A vertex represents a 2D point in the image. {@link com.google.cloud.vision.v1.Vertex} mapped to a record with + * A vertex represents a 2D point in the image. {@link com.google.cloud.vision.v1.Vertex} mapped to a record with the * following fields. */ public class VertexSchema { + /** + * Prevent instantiating this class. + */ private VertexSchema() { throw new AssertionError("Should not instantiate static utility class."); } @@ -38,7 +41,16 @@ private VertexSchema() { */ public static final String Y_FIELD_NAME = "y"; - public static final Schema SCHEMA = Schema.recordOf("vertex-record", - Schema.Field.of(X_FIELD_NAME, Schema.of(Schema.Type.INT)), - Schema.Field.of(Y_FIELD_NAME, Schema.of(Schema.Type.INT))); + /** + * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas + * that will be combined into a larger {@link Schema}. + * + * @param name {@link String} containing the name to give to the returned {@link Schema}. + * @return a {@link Schema} with the given name. + */ + public static Schema getSchema(String name) { + return Schema.recordOf(name, + Schema.Field.of(X_FIELD_NAME, Schema.of(Schema.Type.INT)), + Schema.Field.of(Y_FIELD_NAME, Schema.of(Schema.Type.INT))); + } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/WebDetectionSchema.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/WebDetectionSchema.java old mode 100644 new mode 100755 index d68b015..c3c8be5 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/WebDetectionSchema.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/WebDetectionSchema.java @@ -61,10 +61,10 @@ private WebDetectionSchema() { public static final Schema SCHEMA = Schema.recordOf( "web-detection-record", Schema.Field.of(ENTITIES_FIELD_NAME, Schema.arrayOf(WebEntity.SCHEMA)), - Schema.Field.of(FULL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.SCHEMA)), - Schema.Field.of(PARTIAL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.SCHEMA)), + Schema.Field.of(FULL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.getSchema("fmiWebImage"))), + Schema.Field.of(PARTIAL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.getSchema("pmiWebImage"))), Schema.Field.of(PAGES_WITH_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebPage.SCHEMA)), - Schema.Field.of(VISUALLY_SIMILAR_IMAGES, Schema.arrayOf(WebImage.SCHEMA)), + Schema.Field.of(VISUALLY_SIMILAR_IMAGES, Schema.arrayOf(WebImage.getSchema("vsiWebImage"))), Schema.Field.of(BEST_GUESS_LABELS_FIELD_NAME, Schema.arrayOf(BestGuessLabel.SCHEMA)) ); @@ -132,8 +132,8 @@ public static class WebPage { Schema.Field.of(URL_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(PAGE_TITLE_FIELD_NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)), - Schema.Field.of(FULL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.SCHEMA)), - Schema.Field.of(PARTIAL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.SCHEMA)) + Schema.Field.of(FULL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.getSchema("webPage-fmiWebImage"))), + Schema.Field.of(PARTIAL_MATCHING_IMAGES_FIELD_NAME, Schema.arrayOf(WebImage.getSchema("webPage-pmiWebImage"))) ); } @@ -148,7 +148,7 @@ public static class BestGuessLabel { public static final String LABEL_FIELD_NAME = "label"; /** - * The BCP-47 language code, such as "en-US" or "sr-Latn". For more information, see + * The BCP-47 language code, such as "en-US" or "sr-Latin". For more information, see * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. */ public static final String LANGUAGE_CODE_FIELD_NAME = "languageCode"; @@ -175,10 +175,18 @@ public static class WebImage { */ public static final String SCORE_FIELD_NAME = "score"; - public static final Schema SCHEMA = Schema.recordOf( - "web-image-record", - Schema.Field.of(URL_FIELD_NAME, Schema.of(Schema.Type.STRING)), - Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)) - ); + /** + * Utility method to create a {@link Schema} with a specific name. This is useful to create uniquely named schemas + * that will be combined into a larger {@link Schema}. + * + * @param name {@link String} containing the name to give to the returned {@link Schema}. + * @return a {@link Schema} with the given name. + */ + public static Schema getSchema(String name) { + return Schema.recordOf(name, + Schema.Field.of(URL_FIELD_NAME, Schema.of(Schema.Type.STRING)), + Schema.Field.of(SCORE_FIELD_NAME, Schema.of(Schema.Type.FLOAT)) + ); + } } } diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/package-info.java new file mode 100644 index 0000000..7dcaba8 --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Contains classes to implement the Schemas used by the Actions and Transformers using the Cloud Vision API. + */ +package io.cdap.plugin.cloud.vision.transform.schema; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index eb579ea..37d9d14 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformer.java @@ -35,8 +35,16 @@ public CropHintsAnnotationsToRecordTransformer(Schema schema, String outputField super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link CropHintAnnotationSchema}. This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override - public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { + public StructuredRecord transform(StructuredRecord input, + AnnotateImageResponse annotateImageResponse) { return getOutputRecordBuilder(input) .set(outputFieldName, extractCropHintsAnnotations(annotateImageResponse)) .build(); @@ -48,9 +56,16 @@ private List extractCropHintsAnnotations(AnnotateImageResponse .collect(Collectors.toList()); } + /** + * Extract a {@link StructuredRecord} from a {@link CropHint} passed in. + * + * @param hint Contains the {@link CropHint} information to use. + * @return {@link StructuredRecord} that contains the data mapped to the {@link Schema}. + */ private StructuredRecord extractCropHintRecord(CropHint hint) { Schema hintSchema = getCropHintsAnnotationSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(hintSchema); + Schema.Field positionField = hintSchema.getField(CropHintAnnotationSchema.POSITION_FIELD_NAME); if (positionField != null) { Schema positionSchema = getComponentSchema(positionField); @@ -74,7 +89,7 @@ private StructuredRecord extractCropHintRecord(CropHint hint) { * Retrieves Crop Hints Annotation's non-nullable component schema. Crop Hints Annotation's schema is retrieved * instead of using constant schema since users are free to choose to not include some of the fields. * - * @return Crop Hints Annotation's non-nullable component schema. + * @return Crop Hints Annotation's non-nullable component {@link Schema}. */ private Schema getCropHintsAnnotationSchema() { Schema.Field cropHintsAnnotationsField = schema.getField(outputFieldName); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index 4373fbe..e1d4ad6 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformer.java @@ -35,19 +35,39 @@ public FaceAnnotationsToRecordTransformer(Schema schema, String outputFieldName) super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link FaceAnnotationSchema}. This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { - return getOutputRecordBuilder(input) - .set(outputFieldName, extractFaceAnnotations(annotateImageResponse)) - .build(); + StructuredRecord.Builder builder = getOutputRecordBuilder(input); + List extracted = extractFaceAnnotations(annotateImageResponse); + return builder.set(outputFieldName, extracted).build(); } + /** + * Extract a complete {@link List} of {@link StructuredRecord} objects mapped from the {@link AnnotateImageResponse} + * passed in, using the {@link FaceAnnotationSchema}. + * + * @param annotateImageResponse Input {@link AnnotateImageResponse} object to get the data from. + * @return {@link List} containing the {@link StructuredRecord} mapped from the input. + */ private List extractFaceAnnotations(AnnotateImageResponse annotateImageResponse) { return annotateImageResponse.getFaceAnnotationsList().stream() .map(this::extractFaceAnnotationRecord) .collect(Collectors.toList()); } + /** + * Extract a {@link StructuredRecord} from a {@link FaceAnnotation} input. + * + * @param annotation The {@link FaceAnnotation} object to get the data from. + * @return {@link StructuredRecord} containing the data mapped to the {@link Schema}. + */ private StructuredRecord extractFaceAnnotationRecord(FaceAnnotation annotation) { Schema faceSchema = getFaceAnnotationSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(faceSchema); @@ -90,21 +110,21 @@ private StructuredRecord extractFaceAnnotationRecord(FaceAnnotation annotation) String surprise = annotation.getSurpriseLikelihood().name(); builder.set(FaceAnnotationSchema.SURPRISE_FIELD_NAME, surprise); } - Schema.Field positionField = faceSchema.getField(FaceAnnotationSchema.POSITION_FIELD_NAME); + Schema.Field positionField = faceSchema.getField(FaceAnnotationSchema.BOUNDING_POLY_NAME); if (positionField != null) { Schema positionSchema = getComponentSchema(positionField); List position = annotation.getBoundingPoly().getVerticesList().stream() .map(v -> extractVertex(v, positionSchema)) .collect(Collectors.toList()); - builder.set(FaceAnnotationSchema.POSITION_FIELD_NAME, position); + builder.set(FaceAnnotationSchema.BOUNDING_POLY_NAME, position); } - Schema.Field fdPositionField = faceSchema.getField(FaceAnnotationSchema.FD_POSITION_FIELD_NAME); + Schema.Field fdPositionField = faceSchema.getField(FaceAnnotationSchema.FD_BOUNDING_POLY_NAME); if (fdPositionField != null) { Schema positionSchema = getComponentSchema(fdPositionField); List position = annotation.getFdBoundingPoly().getVerticesList().stream() .map(v -> extractVertex(v, positionSchema)) .collect(Collectors.toList()); - builder.set(FaceAnnotationSchema.FD_POSITION_FIELD_NAME, position); + builder.set(FaceAnnotationSchema.FD_BOUNDING_POLY_NAME, position); } Schema.Field landmarksField = faceSchema.getField(FaceAnnotationSchema.LANDMARKS_FIELD_NAME); if (landmarksField != null) { @@ -118,6 +138,13 @@ private StructuredRecord extractFaceAnnotationRecord(FaceAnnotation annotation) return builder.build(); } + /** + * Extract a {@link StructuredRecord} from a {@link FaceAnnotation} input. + * + * @param landmark The {@link FaceAnnotation.Landmark} object to get the data from. + * @param schema The {@link Schema} to use for the mapping. + * @return {@link StructuredRecord} containing the data mapped to the {@link Schema}. + */ private StructuredRecord extractLandmark(FaceAnnotation.Landmark landmark, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(FaceAnnotationSchema.FaceLandmark.TYPE_FIELD_NAME) != null) { diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index efd371b..8378c5e --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformer.java @@ -40,6 +40,13 @@ public FullTextAnnotationsToRecordTransformer(Schema schema, String outputFieldN super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link FullTextAnnotationSchema}. This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { TextAnnotation annotation = annotateImageResponse.getFullTextAnnotation(); @@ -48,6 +55,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse .build(); } + /** + * Extract a {@link StructuredRecord} containing the handwriting information from a {@link TextAnnotation} input + * using a {@link io.cdap.plugin.cloud.vision.transform.schema.TextAnnotationSchema}. + * + * @param annotation A {@link TextAnnotation} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractHandwritingAnnotation(TextAnnotation annotation) { Schema hwSchema = getHandwritingAnnotationSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(hwSchema); @@ -67,6 +81,14 @@ private StructuredRecord extractHandwritingAnnotation(TextAnnotation annotation) return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the page information from a {@link Page} input + * using a {@link Schema} for the mapping. + * + * @param page The {@link Page} object containing the data. + * @param schema The {@link Schema} to use for the mapping of the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractPage(Page page, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(FullTextAnnotationSchema.TextPage.TEXT_FIELD_NAME) != null) { @@ -114,6 +136,14 @@ private StructuredRecord extractPage(Page page, Schema schema) { return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the block information from a {@link Block} input + * using a {@link Schema} for the mapping. + * + * @param block The {@link Block} object containing the data. + * @param schema The {@link Schema} to use for the mapping of the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractBlock(Block block, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(FullTextAnnotationSchema.TextBlock.TEXT_FIELD_NAME) != null) { @@ -164,6 +194,14 @@ private StructuredRecord extractBlock(Block block, Schema schema) { return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the paragraph information from a {@link Paragraph} input + * using a {@link Schema} for the mapping. + * + * @param paragraph The {@link Paragraph} object containing the data. + * @param schema The {@link Schema} to use for the mapping of the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractParagraph(Paragraph paragraph, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(FullTextAnnotationSchema.TextParagraph.TEXT_FIELD_NAME) != null) { @@ -209,6 +247,14 @@ private StructuredRecord extractParagraph(Paragraph paragraph, Schema schema) { return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the paragraph information from a {@link Word} input + * using a {@link Schema} for the mapping. + * + * @param word The {@link Word} object containing the data. + * @param schema The {@link Schema} to use for the mapping of the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractWord(Word word, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(FullTextAnnotationSchema.TextWord.TEXT_FIELD_NAME) != null) { @@ -252,6 +298,14 @@ private StructuredRecord extractWord(Word word, Schema schema) { return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the paragraph information from a {@link Symbol} input + * using a {@link Schema} for the mapping. + * + * @param symbol The {@link Symbol} object containing the data. + * @param schema The {@link Schema} to use for the mapping of the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractSymbol(Symbol symbol, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(FullTextAnnotationSchema.TextSymbol.TEXT_FIELD_NAME) != null) { @@ -284,10 +338,18 @@ private StructuredRecord extractSymbol(Symbol symbol, Schema schema) { return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the detected language information from a + * {@link TextAnnotation.DetectedLanguage} input using a {@link Schema} for the mapping. + * + * @param language The {@link Symbol} object containing the data. + * @param schema The {@link Schema} to use for the mapping of the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractDetectedLanguage(TextAnnotation.DetectedLanguage language, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); - if (schema.getField(FullTextAnnotationSchema.DetectedLanguage.CODE_FIELD_NAME) != null) { - builder.set(FullTextAnnotationSchema.DetectedLanguage.CODE_FIELD_NAME, language.getLanguageCode()); + if (schema.getField(FullTextAnnotationSchema.DetectedLanguage.LANGUAGE_CODE_FIELD_NAME) != null) { + builder.set(FullTextAnnotationSchema.DetectedLanguage.LANGUAGE_CODE_FIELD_NAME, language.getLanguageCode()); } if (schema.getField(FullTextAnnotationSchema.DetectedLanguage.CONFIDENCE_FIELD_NAME) != null) { builder.set(FullTextAnnotationSchema.DetectedLanguage.CONFIDENCE_FIELD_NAME, language.getConfidence()); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImageAnnotationToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImageAnnotationToRecordTransformer.java old mode 100644 new mode 100755 index c1fb7a8..f494049 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImageAnnotationToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImageAnnotationToRecordTransformer.java @@ -71,18 +71,29 @@ public ImageAnnotationToRecordTransformer(Schema schema, String outputFieldName) */ protected StructuredRecord.Builder getOutputRecordBuilder(StructuredRecord input) { Schema inputRecordSchema = input.getSchema(); + // schema is the output schema StructuredRecord.Builder outputRecordBuilder = StructuredRecord.builder(schema); - for (Schema.Field field : schema.getFields()) { - if (inputRecordSchema.getField(field.getName()) == null) { - continue; + // Loop through the input fields and copy them to the output + for (Schema.Field inputField : inputRecordSchema.getFields()) { + if (schema.getField(inputField.getName()) == null) { + continue; // Not found at the output } + // copy input record field values - outputRecordBuilder.set(field.getName(), input.get(field.getName())); + outputRecordBuilder.set(inputField.getName(), input.get(inputField.getName())); } return outputRecordBuilder; } + /** + * Extract a {@link StructuredRecord} containing the vertex information from a + * {@link Vertex} input using a {@link Schema} for the mapping. + * + * @param vertex The {@link Vertex} object containing the data. + * @param schema The {@link Schema} to use for the mapping of the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ protected StructuredRecord extractVertex(Vertex vertex, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(VertexSchema.X_FIELD_NAME) != null) { diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index 4a4b74f..9cee518 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformer.java @@ -35,6 +35,13 @@ public ImagePropertiesAnnotationsToRecordTransformer(Schema schema, String outpu super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link ColorInfoSchema}. This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { return getOutputRecordBuilder(input) @@ -42,12 +49,26 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse .build(); } + /** + * Extract a {@link StructuredRecord} containing the dominant colors information from a + * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping. + * + * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private List extractDominantColors(AnnotateImageResponse annotateImageResponse) { return annotateImageResponse.getImagePropertiesAnnotation().getDominantColors().getColorsList().stream() .map(this::extractColorInfoRecord) .collect(Collectors.toList()); } + /** + * Extract a {@link StructuredRecord} containing the color information from a + * {@link ColorInfo} input using a {@link Schema} for the mapping. + * + * @param colorInfo The {@link ColorInfo} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractColorInfoRecord(ColorInfo colorInfo) { Schema faceSchema = getColorInfoSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(faceSchema); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index 2d32da3..94ced78 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformer.java @@ -37,6 +37,14 @@ public LabelAnnotationsToRecordTransformer(Schema schema, String outputFieldName super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link io.cdap.plugin.cloud.vision.transform.schema.LocalizedObjectAnnotationSchema}. + * This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { return getOutputRecordBuilder(input) @@ -44,12 +52,26 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse .build(); } + /** + * Extract a {@link List} of {@link StructuredRecord} containing the label information from a + * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping. + * + * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private List extractLabelAnnotations(AnnotateImageResponse annotateImageResponse) { return annotateImageResponse.getLabelAnnotationsList().stream() .map(this::extractAnnotation) .collect(Collectors.toList()); } + /** + * Extract a {@link List} of {@link StructuredRecord} containing the entity annotation information from a + * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping. + * + * @param annotation The {@link EntityAnnotation} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ protected StructuredRecord extractAnnotation(EntityAnnotation annotation) { Schema labelSchema = getEntityAnnotationSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(labelSchema); @@ -88,6 +110,14 @@ protected StructuredRecord extractAnnotation(EntityAnnotation annotation) { return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the location information from a + * {@link LocationInfo} input using a {@link Schema} for the mapping. + * + * @param locationInfo The {@link LocationInfo} object containing the data. + * @param schema The {@link Schema} to use for the mapping. + * @return A {@link StructuredRecord} containing the data mapped. + */ protected StructuredRecord extractLocation(LocationInfo locationInfo, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(EntityAnnotationSchema.LocationInfo.LATITUDE_FIELD_NAME) != null) { @@ -102,6 +132,14 @@ protected StructuredRecord extractLocation(LocationInfo locationInfo, Schema sch return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the property information from a + * {@link Property} input using a {@link Schema} for the mapping. + * + * @param property The {@link Property} object containing the data. + * @param schema The {@link Schema} to use for the mapping. + * @return A {@link StructuredRecord} containing the data mapped. + */ protected StructuredRecord extractProperty(Property property, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(EntityAnnotationSchema.Property.NAME_FIELD_NAME) != null) { diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index 76f08e4..a3c390a --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformer.java @@ -36,18 +36,40 @@ public LandmarkAnnotationsToRecordTransformer(Schema schema, String outputFieldN } @Override + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link io.cdap.plugin.cloud.vision.transform.schema.LocalizedObjectAnnotationSchema}. + * This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { return getOutputRecordBuilder(input) .set(outputFieldName, extractLandmarkAnnotations(annotateImageResponse)) .build(); } + /** + * Extract a {@link List} of {@link StructuredRecord} containing the landmark information from a + * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping. + * + * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private List extractLandmarkAnnotations(AnnotateImageResponse annotateImageResponse) { return annotateImageResponse.getLandmarkAnnotationsList().stream() .map(this::extractAnnotation) .collect(Collectors.toList()); } + /** + * Extract a {@link StructuredRecord} containing the entity information from a + * {@link EntityAnnotation} input using a {@link Schema} for the mapping. + * + * @param annotation The {@link EntityAnnotation} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ @Override protected StructuredRecord extractAnnotation(EntityAnnotation annotation) { Schema landmarkSchema = getEntityAnnotationSchema(); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index 610a303..faf7d25 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformer.java @@ -35,6 +35,14 @@ public LocalizedObjectAnnotationsToRecordTransformer(Schema schema, String outpu super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link io.cdap.plugin.cloud.vision.transform.schema.LocalizedObjectAnnotationSchema}. + * This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { return getOutputRecordBuilder(input) @@ -42,12 +50,26 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse .build(); } + /** + * Extract a {@link List} of {@link StructuredRecord} containing the localized object information from a + * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping. + * + * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private List extractLocalizedObjectAnnotations(AnnotateImageResponse annotateImageResponse) { return annotateImageResponse.getLocalizedObjectAnnotationsList().stream() .map(this::extractLocalizedObjectAnnotationRecord) .collect(Collectors.toList()); } + /** + * Extract a {@link StructuredRecord} containing the localized object information from a + * {@link LocalizedObjectAnnotation} input using a {@link Schema} for the mapping. + * + * @param annotation The {@link LocalizedObjectAnnotation} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractLocalizedObjectAnnotationRecord(LocalizedObjectAnnotation annotation) { Schema objSchema = getLocalizedObjectAnnotationSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(objSchema); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index 9888050..b9a4c26 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformer.java @@ -19,7 +19,6 @@ import com.google.cloud.vision.v1.AnnotateImageResponse; import io.cdap.cdap.api.data.format.StructuredRecord; import io.cdap.cdap.api.data.schema.Schema; - import java.util.List; import java.util.stream.Collectors; @@ -34,6 +33,14 @@ public LogoAnnotationsToRecordTransformer(Schema schema, String outputFieldName) super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link io.cdap.plugin.cloud.vision.transform.schema.LocalizedObjectAnnotationSchema}. + * This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { return getOutputRecordBuilder(input) @@ -41,6 +48,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse .build(); } + /** + * Extract a {@link List} of {@link StructuredRecord} containing the logo information from a + * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping. + * + * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private List extractLogoAnnotations(AnnotateImageResponse annotateImageResponse) { return annotateImageResponse.getLogoAnnotationsList().stream() .map(this::extractAnnotation) diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformer.java old mode 100644 new mode 100755 index 69f335d..f8e69e0 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformer.java @@ -37,6 +37,14 @@ public ProductSearchResultToRecordTransformer(Schema schema, String outputFieldN super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link ProductSearchResultsSchema}. This {@link StructuredRecord} can then be turned into + * a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { ProductSearchResults productSearchResults = annotateImageResponse.getProductSearchResults(); @@ -45,6 +53,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse .build(); } + /** + * Extract a {@link StructuredRecord} containing the product search information from a + * {@link ProductSearchResults} input using a {@link Schema} for the mapping. + * + * @param searchResults The {@link ProductSearchResults} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractProductSearchResults(ProductSearchResults searchResults) { Schema schema = getProductSearchResultSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(schema); @@ -74,6 +89,13 @@ private StructuredRecord extractProductSearchResults(ProductSearchResults search return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the search result information from a + * {@link ProductSearchResults.Result} input using a {@link Schema} for the mapping. + * + * @param result The {@link ProductSearchResults.Result} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractProductSearchResultRecord(ProductSearchResults.Result result, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(ProductSearchResultsSchema.Result.IMAGE_FIELD_NAME) != null) { @@ -93,6 +115,14 @@ private StructuredRecord extractProductSearchResultRecord(ProductSearchResults.R return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the product information from a + * {@link Product} input using a {@link Schema} for the mapping. + * + * @param product The {@link Product} object containing the data. + * @param schema The {@link Schema} to use. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractProductRecord(Product product, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(ProductSearchResultsSchema.Product.NAME_FIELD_NAME) != null) { @@ -119,6 +149,14 @@ private StructuredRecord extractProductRecord(Product product, Schema schema) { return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the label information from a + * {@link Product.KeyValue} input using a {@link Schema} for the mapping. + * + * @param label The {@link Product.KeyValue} object containing the data. + * @param schema The {@link Schema} to use. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractProductLabelRecord(Product.KeyValue label, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(ProductSearchResultsSchema.KeyValue.KEY_FIELD_NAME) != null) { @@ -130,6 +168,14 @@ private StructuredRecord extractProductLabelRecord(Product.KeyValue label, Schem return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the grouped result information from a + * {@link Product.KeyValue} input using a {@link Schema} for the mapping. + * + * @param result The {@link ProductSearchResults.GroupedResult} object containing the data. + * @param schema The {@link Schema} to use. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractProductSearchResultRecord(ProductSearchResults.GroupedResult result, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); Schema.Field positionField = schema.getField(ProductSearchResultsSchema.GroupedResult.POSITION_FIELD_NAME); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index be1b1d4..469bb25 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformer.java @@ -33,6 +33,14 @@ public SafeSearchAnnotationsToRecordTransformer(Schema schema, String outputFiel super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link SafeSearchAnnotationSchema}. This {@link StructuredRecord} can then be turned into + * a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { SafeSearchAnnotation annotation = annotateImageResponse.getSafeSearchAnnotation(); @@ -41,6 +49,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse .build(); } + /** + * Extract a {@link StructuredRecord} containing the safe search information from a + * {@link SafeSearchAnnotation} input using a {@link Schema} for the mapping. + * + * @param annotation The {@link SafeSearchAnnotation} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractSafeSearchAnnotation(SafeSearchAnnotation annotation) { Schema safeSearchAnnotationSchema = getSafeSearchAnnotationSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(safeSearchAnnotationSchema); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformer.java old mode 100644 new mode 100755 index fbefeaa..219212b --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformer.java @@ -35,6 +35,13 @@ public TextAnnotationsToRecordTransformer(Schema schema, String outputFieldName) super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link TextAnnotationSchema}. This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { return getOutputRecordBuilder(input) @@ -42,12 +49,26 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse .build(); } + /** + * Extract a {@link List} of {@link StructuredRecord} containing the text information from a + * {@link AnnotateImageResponse} input using a {@link Schema} for the mapping. + * + * @param annotateImageResponse The {@link AnnotateImageResponse} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private List extractTextAnnotations(AnnotateImageResponse annotateImageResponse) { return annotateImageResponse.getTextAnnotationsList().stream() .map(this::extractTextAnnotationRecord) .collect(Collectors.toList()); } + /** + * Extract a {@link StructuredRecord} containing the text annotation information from a + * {@link EntityAnnotation} input using a {@link Schema} for the mapping. + * + * @param annotation The {@link EntityAnnotation} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractTextAnnotationRecord(EntityAnnotation annotation) { Schema textSchema = getTextAnnotationSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(textSchema); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TransformerFactory.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TransformerFactory.java old mode 100644 new mode 100755 index 52cd1b9..0cc0a59 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TransformerFactory.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/TransformerFactory.java @@ -19,8 +19,8 @@ import io.cdap.plugin.cloud.vision.transform.ImageFeature; /** - * A factory which creates instance of {@ImageAnnotationToRecordTransformer} in accordance to feature and output schema - * configured in input config. + * A factory which creates instances of {@ImageAnnotationToRecordTransformer} in accordance to the feature and output + * schema configured in input config. */ public class TransformerFactory { public static ImageAnnotationToRecordTransformer createInstance(ImageFeature imageFeature, String outputFieldName, diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionToRecordTransformer.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionToRecordTransformer.java old mode 100644 new mode 100755 index 82b41f1..d4fb683 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionToRecordTransformer.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionToRecordTransformer.java @@ -24,7 +24,6 @@ import java.util.List; import java.util.stream.Collectors; - /** * Transforms web detection of specified {@link AnnotateImageResponse} to {@link StructuredRecord} according * to the specified schema. @@ -35,6 +34,13 @@ public WebDetectionToRecordTransformer(Schema schema, String outputFieldName) { super(schema, outputFieldName); } + /** + * Extract the entire mapping of a {@link AnnotateImageResponse} object to a {@link StructuredRecord} + * using the {@link WebDetectionSchema}. This {@link StructuredRecord} can then be turned into a json document. + * + * @param input {@link StructuredRecord} to add to. + * @param annotateImageResponse {@link AnnotateImageResponse} to get the data from. + */ @Override public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse annotateImageResponse) { WebDetection webDetection = annotateImageResponse.getWebDetection(); @@ -43,6 +49,13 @@ public StructuredRecord transform(StructuredRecord input, AnnotateImageResponse .build(); } + /** + * Extract a {@link StructuredRecord} containing the Web detection information from a + * {@link WebDetection} input using a {@link Schema} for the mapping. + * + * @param webDetection The {@link WebDetection} object containing the data. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractWebDetection(WebDetection webDetection) { Schema webSchema = getWebDetectionSchema(); StructuredRecord.Builder builder = StructuredRecord.builder(webSchema); @@ -103,6 +116,14 @@ private StructuredRecord extractWebDetection(WebDetection webDetection) { return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the Web entity information from a + * {@link WebDetection.WebEntity} input using a {@link Schema} for the mapping. + * + * @param webEntity The {@link WebDetection.WebEntity} object containing the data. + * @param schema The {@link Schema} to use. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractEntity(WebDetection.WebEntity webEntity, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(WebDetectionSchema.WebEntity.ENTITY_ID_FIELD_NAME) != null) { @@ -118,6 +139,14 @@ private StructuredRecord extractEntity(WebDetection.WebEntity webEntity, Schema return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the Web image information from a + * {@link WebDetection.WebImage} input using a {@link Schema} for the mapping. + * + * @param webImage The {@link WebDetection.WebImage} object containing the data. + * @param schema The {@link Schema} to use. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractWebImage(WebDetection.WebImage webImage, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(WebDetectionSchema.WebImage.URL_FIELD_NAME) != null) { @@ -130,6 +159,14 @@ private StructuredRecord extractWebImage(WebDetection.WebImage webImage, Schema return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the Web label information from a + * {@link WebDetection.WebLabel} input using a {@link Schema} for the mapping. + * + * @param webLabel The {@link WebDetection.WebLabel} object containing the data. + * @param schema The {@link Schema} to use. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractWebLabel(WebDetection.WebLabel webLabel, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(WebDetectionSchema.BestGuessLabel.LABEL_FIELD_NAME) != null) { @@ -142,6 +179,14 @@ private StructuredRecord extractWebLabel(WebDetection.WebLabel webLabel, Schema return builder.build(); } + /** + * Extract a {@link StructuredRecord} containing the Web page information from a + * {@link WebDetection.WebPage} input using a {@link Schema} for the mapping. + * + * @param webPage The {@link WebDetection.WebPage} object containing the data. + * @param schema The {@link Schema} to use. + * @return A {@link StructuredRecord} containing the data mapped. + */ private StructuredRecord extractWebPage(WebDetection.WebPage webPage, Schema schema) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); if (schema.getField(WebDetectionSchema.WebPage.URL_FIELD_NAME) != null) { diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/package-info.java new file mode 100644 index 0000000..744d2b7 --- /dev/null +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Classes that map answers from the Cloud Vision API to Records that can then be turned into json docs. + */ +package io.cdap.plugin.cloud.vision.transform.transformer; diff --git a/src/test/java/io/cdap/plugin/cloud/vision/CloudVisionConfigBuilder.java b/src/test/java/io/cdap/plugin/cloud/vision/CloudVisionConfigBuilder.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java b/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java new file mode 100644 index 0000000..87b446a --- /dev/null +++ b/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java @@ -0,0 +1,74 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.cloud.vision; + +import com.google.auth.Credentials; +import com.google.cloud.storage.Blob; +import io.cdap.plugin.cloud.vision.action.GcsBucketHelper; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import java.util.List; + +/** + * Test that we can access blobs using a GCSPath and that the batching computation doesn't trigger + * an exception. + */ +public class GcsBucketHelperTest { + protected static final String PATH = System.getProperty("path", "gs://vision-api-pbo2/images"); + protected static final String SERVICE_ACCOUNT_FILE_PATH = System.getProperty("serviceFilePath", "auto-detect"); + // This is a limit imposed by the Cloud Vision API + protected static final int MAX_NUMBER_OF_IMAGES_PER_BATCH = 2000; + + @Before + public void setUp() throws Exception { + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void getAllFilesInPath() throws Exception { + Credentials credentials = CredentialsHelper.getCredentials(SERVICE_ACCOUNT_FILE_PATH); + + List blobs = GcsBucketHelper.getAllFilesInPath(PATH, credentials); + if (blobs.isEmpty()) { + throw new Exception("Cannot get blobs in: " + PATH); + } + + System.out.println(); + System.out.println("Using service acount path: " + SERVICE_ACCOUNT_FILE_PATH); + System.out.println("Blobs found in: " + PATH); + for (Blob blob : blobs) { + System.out.println("blob.getName(): " + blob.getName()); + } + + int countBlobs = 0; + for (int batchId = 0; batchId < (1 + blobs.size() / MAX_NUMBER_OF_IMAGES_PER_BATCH); batchId++) { + for (int index = batchId * MAX_NUMBER_OF_IMAGES_PER_BATCH; + (index < (batchId + 1) * MAX_NUMBER_OF_IMAGES_PER_BATCH) && (index < blobs.size()); + index++) { + countBlobs++; + } + } + + // Make sure we have taken into the loop as many blobs as returned from GCS + Assert.assertEquals(countBlobs, blobs.size()); + } +} diff --git a/src/test/java/io/cdap/plugin/cloud/vision/ValidationAssertions.java b/src/test/java/io/cdap/plugin/cloud/vision/ValidationAssertions.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfigTest.java old mode 100644 new mode 100755 index ad13ad8..1f87199 --- a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfigTest.java +++ b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionConfigTest.java @@ -23,7 +23,6 @@ import io.cdap.plugin.cloud.vision.transform.ImageFeature; import org.junit.Assert; import org.junit.Test; - import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -122,6 +121,30 @@ public void testValidateAspectRatios() { assertValidationFailed(collector, paramNames); } + @Test + public void testValidateOneLanguageHint() { + // Those are all the language ids retrieved from 'widgets/DocumentExtractor-transform.json' + String[] languages = new String[]{"af", "sq", "ar", "hy", "be", "bn", "bg", "ca", "zh", "hr", "cs", "da", "nl", + "en", "et", "fil", "fi", "fr", "de", "el", "gu", "iw", "hu", "is", "id", "it", "ja", "kn", "km", "ko", "lo", + "lv", "lt", "mk", "ms", "ml", "mr", "ne", "no", "fa", "pl", "pt", "pa", "ro", "ru", "ru-PETR1708", "sr", + "sr-Latn", "sk", "sl", "es", "sv", "ta", "te", "th", "tr", "uk", "vi", "yi"}; + + MockFailureCollector collector = new MockFailureCollector(MOCK_STAGE); + List> paramNames = Collections.singletonList( + Collections.singletonList(ActionConstants.LANGUAGE_HINTS) + ); + + // Validate the languages one by one + for (String language : languages) { + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(VALID_CONFIG) + .setFeatures(ImageFeature.TEXT.getDisplayName()) + .setLanguageHints(language) + .build(); + config.validate(collector); + } + Assert.assertTrue(collector.getValidationFailures().isEmpty()); + } + private void assertValidationFailed(MockFailureCollector failureCollector, List> paramNames) { List failureList = failureCollector.getValidationFailures(); Assert.assertEquals(paramNames.size(), failureList.size()); diff --git a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java old mode 100644 new mode 100755 index cb6eafd..e1d81b6 --- a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java +++ b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java @@ -29,15 +29,15 @@ import io.cdap.cdap.etl.mock.action.MockActionContext; import io.cdap.plugin.cloud.vision.CloudVisionConstants; import io.cdap.plugin.cloud.vision.CredentialsHelper; -import io.cdap.plugin.cloud.vision.source.GCSPath; import io.cdap.plugin.cloud.vision.transform.ImageFeature; +import io.cdap.plugin.gcp.gcs.GCSPath; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; - import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.util.ArrayList; import javax.annotation.Nullable; /** @@ -46,64 +46,145 @@ public class OfflineImageExtractorActionTest { private static final String PROJECT = System.getProperty("project", CloudVisionConstants.AUTO_DETECT); - private static final String SERVICE_ACCOUNT_FILE_PATH = - System.getProperty("serviceFilePath", CloudVisionConstants.AUTO_DETECT); - private static final String PATH = System.getProperty("path", - "gs://cloud-vision-cdap-text-image-extractor-offline"); + + private static final String SERVICE_ACCOUNT_FILE_PATH = System.getProperty("serviceFilePath", + CloudVisionConstants.AUTO_DETECT); + private static final String BUCKET_NAME = System.getProperty("path", "gs://cloud-vision-cdap-tests"); + private static final String IMAGE_PATH_IN_BUCKET = "images"; + private static final String PATH_PATTERN = "%s/%s/"; private static final String RESULT_PATH_PATTERN = "%s/%s"; private static final String JPG_CONTENT_TYPE = "image/jpeg"; - private static final String OBJECT_IMAGE_NAME = "multiple_objects.jpg"; - private static final String SAFE_IMAGE_NAME = "safe_search.jpg"; - private static final String CROP_HINTS_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/crop_hints/bubble.jpeg"; - private static final String FACE_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/face/faces.jpeg"; - private static final String PROPERTIES_IMAGE_FILE_PATH = - "gs://cloud-samples-data/vision/image_properties/bali.jpeg"; - private static final String LABELS_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/label/setagaya.jpeg"; - private static final String LANDMARKS_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/landmark/st_basils.jpeg"; - private static final String LOGOS_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/logo/google_logo.jpg"; - private static final String OBJECTS_IMAGE_FILE_PATH = String.format(RESULT_PATH_PATTERN, PATH, OBJECT_IMAGE_NAME); - private static final String SAFE_SEARCH_IMAGE_FILE_PATH = String.format(RESULT_PATH_PATTERN, PATH, SAFE_IMAGE_NAME); - private static final String TEXT_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/ocr/sign.jpg"; - private static final String WEB_DETECTION_IMAGE_FILE_PATH = "gs://cloud-samples-data/vision/web/carnaval.jpeg"; + + private static final String LOCAL_IMAGE_FOLDER_PATH = "examples/images/jpg"; + + private static final String CROP_HINTS_IMAGE_NAME = "bubble.jpg"; + private static final String FACE_IMAGE_NAME = "faces.jpg"; + private static final String LABELS_IMAGE_NAME = "setagaya.jpg"; + private static final String LANDMARKS_IMAGE_NAME = "st_basils.jpg"; + private static final String LOGOS_IMAGE_NAME = "google_logo.jpg"; + private static final String OBJECT_IMAGE_NAME = "fallon-michael-8LKQfBumjMo.jpg"; + private static final String PROPERTIES_IMAGE_NAME = "bali.jpg"; + private static final String SAFE_SEARCH_IMAGE_NAME = "keyur-nandaniya-oEgiJNbYw8w.jpg"; + private static final String TEXT_IMAGE_NAME = "sign.jpg"; + private static final String WEB_DETECTION_IMAGE_NAME = "carnaval.jpg"; + + private static final String CROP_HINTS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + + "/" + CROP_HINTS_IMAGE_NAME; + private static final String FACE_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + + "/" + FACE_IMAGE_NAME; + private static final String LABELS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + + "/" + LABELS_IMAGE_NAME; + private static final String LANDMARKS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + + "/" + LANDMARKS_IMAGE_NAME; + private static final String LOGOS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + + "/" + LOGOS_IMAGE_NAME; + private static final String OBJECTS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + + "/" + OBJECT_IMAGE_NAME; + private static final String PROPERTIES_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + + "/" + PROPERTIES_IMAGE_NAME; + private static final String SAFE_SEARCH_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + + "/" + SAFE_SEARCH_IMAGE_NAME; + private static final String TEXT_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + "/" + + TEXT_IMAGE_NAME; + private static final String WEB_DETECTION_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + + "/" + WEB_DETECTION_IMAGE_NAME; + private static final String RESULT_FILE_NAME = "output-1-to-1.json"; private static final String BATCH_SIZE = "20"; private static final OfflineImageExtractorActionConfig CONFIG = new OfflineImageExtractorActionConfig( - SERVICE_ACCOUNT_FILE_PATH, - ImageFeature.FACE.getDisplayName(), - null, - null, - true, - null, - null, - BATCH_SIZE + SERVICE_ACCOUNT_FILE_PATH, + ImageFeature.FACE.getDisplayName(), + null, + null, + true, + null, + null, + BATCH_SIZE ); private static Storage storage; private static Bucket bucket; + private static final String FOLDER_CONTENT_TYPE = "application/x-www-form-urlencoded;charset=UTF-8"; + + private static void createFolder(String folderName) throws FileNotFoundException { + System.out.println("Creating folder: " + folderName); + FileInputStream fileInputStream = new FileInputStream("examples/empty_file.txt"); + bucket.create(folderName + "/", + fileInputStream, + FOLDER_CONTENT_TYPE, + Bucket.BlobWriteOption.doesNotExist()); + } + + private static void uploadImages() throws FileNotFoundException { + ArrayList allImages = new ArrayList<>(10); + allImages.add(CROP_HINTS_IMAGE_NAME); + allImages.add(FACE_IMAGE_NAME); + allImages.add(LABELS_IMAGE_NAME); + allImages.add(LANDMARKS_IMAGE_NAME); + allImages.add(LOGOS_IMAGE_NAME); + allImages.add(OBJECT_IMAGE_NAME); + allImages.add(PROPERTIES_IMAGE_NAME); + allImages.add(SAFE_SEARCH_IMAGE_NAME); + allImages.add(TEXT_IMAGE_NAME); + allImages.add(WEB_DETECTION_IMAGE_NAME); + + for (String imageName : allImages) { + String localImagePath = LOCAL_IMAGE_FOLDER_PATH + "/" + imageName; + + FileInputStream fileInputStream = new FileInputStream(localImagePath); + System.out.println("Uploading " + localImagePath + " to " + bucket.getName()); + bucket.create(IMAGE_PATH_IN_BUCKET + "/" + imageName, + fileInputStream, + JPG_CONTENT_TYPE, + Bucket.BlobWriteOption.doesNotExist()); + } + } + + private static Storage getStorage(String project, @Nullable Credentials credentials) { + StorageOptions.Builder builder = StorageOptions.newBuilder().setProjectId(project); + if (credentials != null) { + builder.setCredentials(credentials); + } + return builder.build().getService(); + } + + private static void deleteBucket(Storage storage, Bucket bucket) { + if (bucket == null || bucket.list() == null) { + return; + } + System.out.println("Deleting bucket: " + bucket.getName()); + for (Blob blob : bucket.list().iterateAll()) { + storage.delete(blob.getBlobId()); + } + + bucket.delete(Bucket.BucketSourceOption.metagenerationMatch()); + } + @Before public void testSetup() throws Exception { Credentials credentials = CredentialsHelper.getCredentials(SERVICE_ACCOUNT_FILE_PATH); String projectId = CredentialsHelper.getProjectId(PROJECT); storage = getStorage(projectId, credentials); - GCSPath path = GCSPath.from(PATH); + GCSPath path = GCSPath.from(BUCKET_NAME); String bucketName = path.getBucket(); bucket = storage.get(bucketName); if (bucket != null) { deleteBucket(storage, bucket); } + System.out.println("Creating bucket: " + bucketName); BucketInfo bucketInfo = BucketInfo.newBuilder(bucketName) - .setStorageClass(StorageClass.STANDARD) - .setLocation("us-central1") - .build(); + .setStorageClass(StorageClass.STANDARD) + .setLocation("us-east1") + .build(); bucket = storage.create(bucketInfo); - initTestsData(); + uploadImages(); } @After @@ -111,28 +192,20 @@ public void destroy() { deleteBucket(storage, bucket); } - private static void initTestsData() throws FileNotFoundException { - String path = String.format("src/test/resources/%s", OBJECT_IMAGE_NAME); - - FileInputStream serviceAccountStream = new FileInputStream(path); - bucket.create(OBJECT_IMAGE_NAME, serviceAccountStream, JPG_CONTENT_TYPE, Bucket.BlobWriteOption.doesNotExist()); - - path = String.format("src/test/resources/%s", SAFE_IMAGE_NAME); - - serviceAccountStream = new FileInputStream(path); - bucket.create(SAFE_IMAGE_NAME, serviceAccountStream, JPG_CONTENT_TYPE, Bucket.BlobWriteOption.doesNotExist()); - } - @Test public void testRunCropHintsFeature() throws Exception { String folder = "crop_hints"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + System.out.println("testRunCropHintsFeature() - resultFolderPath: " + resultFolderPath); + + createFolder(folder); + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(CROP_HINTS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.CROP_HINTS.getDisplayName()) - .setAspectRatios("1.66667") - .build(); + .setSourcePath(CROP_HINTS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.CROP_HINTS.getDisplayName()) + .setAspectRatios("1.66667") + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -144,11 +217,15 @@ public void testRunCropHintsFeature() throws Exception { @Test public void testRunFaceFeature() throws Exception { String folder = "face"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + System.out.println("testRunFaceFeature() - resultFolderPath: " + resultFolderPath); + + createFolder(folder); + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(FACE_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .build(); + .setSourcePath(FACE_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -160,12 +237,16 @@ public void testRunFaceFeature() throws Exception { @Test public void testRunImagePropertiesFeature() throws Exception { String folder = "properties"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + System.out.println("testRunImagePropertiesFeature() - resultFolderPath: " + resultFolderPath); + + createFolder(folder); + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(PROPERTIES_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.IMAGE_PROPERTIES.getDisplayName()) - .build(); + .setSourcePath(PROPERTIES_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.IMAGE_PROPERTIES.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -177,12 +258,16 @@ public void testRunImagePropertiesFeature() throws Exception { @Test public void testRunLabelsFeature() throws Exception { String folder = "labels"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + System.out.println("testRunLabelsFeature() - resultFolderPath: " + resultFolderPath); + + createFolder(folder); + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(LABELS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.LABELS.getDisplayName()) - .build(); + .setSourcePath(LABELS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.LABELS.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -194,12 +279,16 @@ public void testRunLabelsFeature() throws Exception { @Test public void testRunLandmarksFeature() throws Exception { String folder = "landmarks"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + System.out.println("testRunLandmarksFeature() - resultFolderPath: " + resultFolderPath); + + createFolder(folder); + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(LANDMARKS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.LANDMARKS.getDisplayName()) - .build(); + .setSourcePath(LANDMARKS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.LANDMARKS.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -211,12 +300,16 @@ public void testRunLandmarksFeature() throws Exception { @Test public void testRunLogosFeature() throws Exception { String folder = "logos"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + + createFolder(folder); + + System.out.println("testRunLogosFeature() - resultFolderPath: " + resultFolderPath); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(LOGOS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.LOGOS.getDisplayName()) - .build(); + .setSourcePath(LOGOS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.LOGOS.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -228,12 +321,16 @@ public void testRunLogosFeature() throws Exception { @Test public void testRunObjectLocalizationFeature() throws Exception { String folder = "objects"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + System.out.println("testRunObjectLocalizationFeature() - resultFolderPath: " + resultFolderPath); + + createFolder(folder); + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(OBJECTS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.OBJECT_LOCALIZATION.getDisplayName()) - .build(); + .setSourcePath(OBJECTS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.OBJECT_LOCALIZATION.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -245,13 +342,17 @@ public void testRunObjectLocalizationFeature() throws Exception { @Test public void testRunTextFeature() throws Exception { String folder = "text"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + System.out.println("testRunTextFeature() - resultFolderPath: " + resultFolderPath); + + createFolder(folder); + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(TEXT_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.TEXT.getDisplayName()) - .setLanguageHints("en") - .build(); + .setSourcePath(TEXT_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.TEXT.getDisplayName()) + .setLanguageHints("en") + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -263,12 +364,16 @@ public void testRunTextFeature() throws Exception { @Test public void testRunSafeSearchFeature() throws Exception { String folder = "safe"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + System.out.println("testRunSafeSearchFeature() - resultFolderPath: " + resultFolderPath); + + createFolder(folder); + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(SAFE_SEARCH_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.EXPLICIT_CONTENT.getDisplayName()) - .build(); + .setSourcePath(SAFE_SEARCH_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.EXPLICIT_CONTENT.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -280,12 +385,16 @@ public void testRunSafeSearchFeature() throws Exception { @Test public void testRunWebDetectionFeature() throws Exception { String folder = "web"; - String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); + String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + System.out.println("testRunSafeSearchFeature() - resultFolderPath: " + resultFolderPath); + + createFolder(folder); + OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(WEB_DETECTION_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.WEB_DETECTION.getDisplayName()) - .build(); + .setSourcePath(WEB_DETECTION_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.WEB_DETECTION.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -295,39 +404,25 @@ public void testRunWebDetectionFeature() throws Exception { } private void validateResult(String folder, String expectedUri) throws InterruptedException { - Thread.sleep(30000); - - Blob blob = bucket.get(String.format(RESULT_PATH_PATTERN, folder, RESULT_FILE_NAME)); + Thread.sleep(10000); // Wait a bit to make sure the blob is available in the bucket + String blobPath = String.format(RESULT_PATH_PATTERN, folder, RESULT_FILE_NAME); + System.out.println("validateResult() - blobPath: " + blobPath); + Blob blob = bucket.get(blobPath); Assert.assertTrue(blob.exists()); String content = new String(blob.getContent()); JsonParser parser = new JsonParser(); JsonElement jsonTree = parser.parse(content); String uri = jsonTree.getAsJsonObject() - .get("responses") - .getAsJsonArray() - .get(0) - .getAsJsonObject() - .get("context") - .getAsJsonObject() - .get("uri") - .getAsString(); - - Assert.assertEquals(expectedUri, uri); - } - - private static Storage getStorage(String project, @Nullable Credentials credentials) { - StorageOptions.Builder builder = StorageOptions.newBuilder().setProjectId(project); - if (credentials != null) { - builder.setCredentials(credentials); - } - return builder.build().getService(); - } - - private static void deleteBucket(Storage storage, Bucket bucket) { - for (Blob blob : bucket.list().iterateAll()) { - storage.delete(blob.getBlobId()); - } - bucket.delete(Bucket.BucketSourceOption.metagenerationMatch()); + .get("responses") + .getAsJsonArray() + .get(0) + .getAsJsonObject() + .get("context") + .getAsJsonObject() + .get("uri") + .getAsString(); + + Assert.assertTrue(expectedUri.equals(uri)); } } diff --git a/src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfigTest.java similarity index 79% rename from src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfigTest.java rename to src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfigTest.java index 498cb83..c54753d 100644 --- a/src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionConfigTest.java +++ b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionConfigTest.java @@ -23,11 +23,11 @@ import org.junit.Test; /** - * Test class for {@link TextExtractorActionConfigTest}. + * Test class for {@link OfflineTextExtractorActionConfigTest}. */ -public class TextExtractorActionConfigTest { +public class OfflineTextExtractorActionConfigTest { private static final String MOCK_STAGE = "mockStage"; - private static final TextExtractorActionConfig VALID_CONFIG = new TextExtractorActionConfig( + private static final OfflineTextExtractorActionConfig VALID_CONFIG = new OfflineTextExtractorActionConfig( "/path", "/path", "/path", @@ -45,19 +45,19 @@ public void testValidConfig() { @Test public void testEmptyServiceFilePath() { - TextExtractorActionConfig config = TextExtractorActionConfig.builder(VALID_CONFIG) + OfflineTextExtractorActionConfig config = OfflineTextExtractorActionConfig.builder(VALID_CONFIG) .setServiceFilePath("") .build(); MockFailureCollector failureCollector = new MockFailureCollector(MOCK_STAGE); config.validate(failureCollector); ValidationAssertions.assertPropertyValidationFailed(failureCollector, - CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH); + CloudVisionConstants.SERVICE_ACCOUNT_FILE_PATH); } @Test public void testEmptySourcePath() { - TextExtractorActionConfig config = TextExtractorActionConfig.builder(VALID_CONFIG) + OfflineTextExtractorActionConfig config = OfflineTextExtractorActionConfig.builder(VALID_CONFIG) .setSourcePath("") .build(); @@ -68,7 +68,7 @@ public void testEmptySourcePath() { @Test public void testEmptyDestinationPath() { - TextExtractorActionConfig config = TextExtractorActionConfig.builder(VALID_CONFIG) + OfflineTextExtractorActionConfig config = OfflineTextExtractorActionConfig.builder(VALID_CONFIG) .setDestinationPath("") .build(); diff --git a/src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionTest.java b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionTest.java similarity index 92% rename from src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionTest.java rename to src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionTest.java index 2cf1b52..9b4e62c 100644 --- a/src/test/java/io/cdap/plugin/cloud/vision/action/TextExtractorActionTest.java +++ b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineTextExtractorActionTest.java @@ -28,20 +28,19 @@ import io.cdap.cdap.etl.api.action.ActionContext; import io.cdap.cdap.etl.mock.action.MockActionContext; import io.cdap.plugin.cloud.vision.CredentialsHelper; -import io.cdap.plugin.cloud.vision.source.GCSPath; +import io.cdap.plugin.gcp.gcs.GCSPath; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; - import java.io.FileInputStream; import java.io.FileNotFoundException; import javax.annotation.Nullable; /** - * Test class for {@link TextExtractorAction}. + * Test class for {@link OfflineTextExtractorAction}. */ -public class TextExtractorActionTest { +public class OfflineTextExtractorActionTest { protected static final String PROJECT = System.getProperty("project", "auto-detect"); protected static final String SERVICE_ACCOUNT_FILE_PATH = System.getProperty("serviceFilePath", "auto-detect"); protected static final String PATH = System.getProperty("path", "gs://cloud-vision-cdap-text-offline"); @@ -94,7 +93,7 @@ private static void initTestsData() throws FileNotFoundException { @Test public void testRun() throws Exception { - TextExtractorActionConfig config = new TextExtractorActionConfig( + OfflineTextExtractorActionConfig config = new OfflineTextExtractorActionConfig( SERVICE_ACCOUNT_FILE_PATH, PDF_FILE_PATH, RESULT_FOLDER_PATH, @@ -103,7 +102,7 @@ public void testRun() throws Exception { null ); - TextExtractorAction action = new TextExtractorAction(config); + OfflineTextExtractorAction action = new OfflineTextExtractorAction(config); ActionContext context = new MockActionContext(); action.run(context); @@ -144,6 +143,9 @@ private static Storage getStorage(String project, @Nullable Credentials credenti } private static void deleteBucket(Storage storage, Bucket bucket) { + if (bucket == null || bucket.list() == null) { + return; + } for (Blob blob : bucket.list().iterateAll()) { storage.delete(blob.getBlobId()); } diff --git a/src/test/java/io/cdap/plugin/cloud/vision/package-info.java b/src/test/java/io/cdap/plugin/cloud/vision/package-info.java new file mode 100644 index 0000000..2d89acb --- /dev/null +++ b/src/test/java/io/cdap/plugin/cloud/vision/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Contains JUnit tests. + */ +package io.cdap.plugin.cloud.vision; diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigBuilder.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigBuilder.java old mode 100644 new mode 100755 index d34f781..acbeae1 --- a/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigBuilder.java +++ b/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigBuilder.java @@ -20,6 +20,8 @@ /** * Builder class that provides handy methods to construct {@link ExtractorTransformConfig} for testing. + * + * @param Generic type. */ public abstract class ExtractorTransformConfigBuilder extends CloudVisionConfigBuilder { diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfigTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigBuilder.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigBuilder.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/document/DocumentExtractorTransformConfigTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigBuilder.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigBuilder.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/image/ImageExtractorTransformConfigTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/BaseAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/BaseAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/CropHintsAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 index cf117a9..63d15ee --- a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformerTest.java +++ b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FaceAnnotationsToRecordTransformerTest.java @@ -152,9 +152,9 @@ private void assertAnnotationEquals(FaceAnnotation expected, StructuredRecord ac Assert.assertEquals(expected.getUnderExposedLikelihood().name(), actual.get(FaceAnnotationSchema.UNDER_EXPOSED_FIELD_NAME)); - List position = actual.get(FaceAnnotationSchema.POSITION_FIELD_NAME); + List position = actual.get(FaceAnnotationSchema.BOUNDING_POLY_NAME); assertPositionEqual(expected.getBoundingPoly(), position); - List fdPosition = actual.get(FaceAnnotationSchema.FD_POSITION_FIELD_NAME); + List fdPosition = actual.get(FaceAnnotationSchema.FD_BOUNDING_POLY_NAME); assertPositionEqual(expected.getFdBoundingPoly(), fdPosition); List landmarks = actual.get(FaceAnnotationSchema.LANDMARKS_FIELD_NAME); assertLandmarksEqual(expected.getLandmarksList(), landmarks); diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 index bf0b9f0..04fe9c9 --- a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformerTest.java +++ b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/FullTextAnnotationsToRecordTransformerTest.java @@ -190,7 +190,7 @@ private void assertLanguageEquals(TextAnnotation.DetectedLanguage expected, Stru actual.get(FullTextAnnotationSchema.DetectedLanguage.CONFIDENCE_FIELD_NAME), DELTA); Assert.assertEquals(expected.getLanguageCode(), - actual.get(FullTextAnnotationSchema.DetectedLanguage.CODE_FIELD_NAME)); + actual.get(FullTextAnnotationSchema.DetectedLanguage.LANGUAGE_CODE_FIELD_NAME)); } private void assertBlockEquals(Block expected, StructuredRecord actual) { diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ImagePropertiesAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LabelAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LandmarkAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LocalizedObjectAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/LogoAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/ProductSearchResultToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/SafeSearchAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/TextAnnotationsToRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionRecordTransformerTest.java b/src/test/java/io/cdap/plugin/cloud/vision/transform/transformer/WebDetectionRecordTransformerTest.java old mode 100644 new mode 100755 diff --git a/widgets/TextExtractorOffline-action.json b/widgets/OfflineTextExtractor-action.json similarity index 98% rename from widgets/TextExtractorOffline-action.json rename to widgets/OfflineTextExtractor-action.json index 2571335..f3a7ce8 100644 --- a/widgets/TextExtractorOffline-action.json +++ b/widgets/OfflineTextExtractor-action.json @@ -2,7 +2,7 @@ "metadata": { "spec-version": "1.5" }, - "display-name": "Text Extractor Offline", + "display-name": "Offline Text Extractor", "configuration-groups": [ { "label": "Basic", @@ -33,7 +33,8 @@ "default": "application/pdf", "values": [ "application/pdf", - "image/tiff" + "image/tiff", + "image/gif" ] } }, From 26176819ee72182dcabbb305581841a6f68f66f9 Mon Sep 17 00:00:00 2001 From: Patrice Borne Date: Fri, 10 Apr 2020 11:57:04 -0400 Subject: [PATCH 2/3] Fixes based on feedback --- checkstyle.xml | 27 +- pom.xml | 66 +---- .../cloud/vision/action/ActionConstants.java | 3 +- .../cloud/vision/action/package-info.java | 20 -- .../cloud/vision/exception/package-info.java | 20 -- .../plugin/cloud/vision/package-info.java | 20 -- .../transform/ExtractorTransformConfig.java | 5 +- .../transform/document/package-info.java | 20 -- .../vision/transform/image/package-info.java | 20 -- .../cloud/vision/transform/package-info.java | 20 -- .../vision/transform/schema/package-info.java | 20 -- .../transform/transformer/package-info.java | 20 -- .../cloud/vision/GcsBucketHelperTest.java | 11 +- .../OfflineImageExtractorActionTest.java | 242 +++++++++--------- .../plugin/cloud/vision/package-info.java | 20 -- 15 files changed, 153 insertions(+), 381 deletions(-) delete mode 100644 src/main/java/io/cdap/plugin/cloud/vision/action/package-info.java delete mode 100644 src/main/java/io/cdap/plugin/cloud/vision/exception/package-info.java delete mode 100644 src/main/java/io/cdap/plugin/cloud/vision/package-info.java delete mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/document/package-info.java delete mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/image/package-info.java delete mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/package-info.java delete mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/schema/package-info.java delete mode 100644 src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/package-info.java delete mode 100644 src/test/java/io/cdap/plugin/cloud/vision/package-info.java diff --git a/checkstyle.xml b/checkstyle.xml index 2cc4e9f..48560de 100644 --- a/checkstyle.xml +++ b/checkstyle.xml @@ -17,7 +17,7 @@ + "http://www.puppycrawl.com/dtds/configuration_1_3.dtd"> + + + - + - \ No newline at end of file + + + + + + + + + + + + + + + + diff --git a/pom.xml b/pom.xml index 01c2d23..5256ce0 100644 --- a/pom.xml +++ b/pom.xml @@ -333,7 +333,7 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.1 + 2.17 validate @@ -356,7 +356,7 @@ com.puppycrawl.tools checkstyle - 8.18 + 6.19 @@ -380,68 +380,6 @@ - - org.apache.maven.plugins - maven-enforcer-plugin - 3.0.0-M3 - - - enforce-version - - enforce - - - - - - - log4j:log4j:[0.0,1.2.17) - - - - - - - - - org.apache.felix - maven-bundle-plugin - 3.5.0 - true - - - <_exportcontents> - com.google.api,com.google.api.client.*,com.google.api.core,com.google.api.gax.batching, - com.google.api.gax.core,com.google.api.gax.grpc,com.google.api.gax.longrunning, - com.google.api.gax.paging,com.google.api.gax.retrying,com.google.api.gax.rpc, - com.google.api.gax.tracing,com.google.api.resourcenames, - com.google.api.services.storage.model,com.google.auth.*,com.google.cloud, - com.google.cloud.http,com.google.cloud.spi,com.google.cloud.storage, - com.google.cloud.storage.spi,com.google.cloud.storage.spi.v1,com.google.cloud.vision.v1.*, - com.google.common.base,com.google.common.collect,com.google.common.graph, - com.google.common.hash,com.google.common.io,com.google.common.util.concurrent, - com.google.common.util.concurrent.internal,com.google.errorprone.annotations, - com.google.iam.v1,com.google.longrunning,com.google.longrunning.stub,com.google.protobuf.*, - com.google.rpc,com.google.type,io.cdap.plugin.cloud.vision.*,io.grpc,io.grpc.stub, - io.opencensus.common,io.opencensus.trace,io.opencensus.trace.config, - io.opencensus.trace.export,io.opencensus.trace.propagation, - org.checkerframework.checker.nullness.compatqual,org.checkerframework.checker.nullness.qual, - org.checkerframework.framework.qual,org.threeten.bp,org.threeten.bp.* - - *;inline=false;scope=compile - true - lib - - - - - package - - bundle - - - - diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java b/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java index b21bf6a..64a14eb 100755 --- a/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/action/ActionConstants.java @@ -34,8 +34,7 @@ public final class ActionConstants { */ public static final String BATCH_SIZE = "batchSize"; /** - * Configuration property name used to specify the features to extract - * from images. + * Configuration property name used to specify the features to extract from images. */ public static final String FEATURES = "features"; /** diff --git a/src/main/java/io/cdap/plugin/cloud/vision/action/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/action/package-info.java deleted file mode 100644 index 2718b39..0000000 --- a/src/main/java/io/cdap/plugin/cloud/vision/action/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Actions to build CDAP pipelines that use the Cloud Vision API. - */ -package io.cdap.plugin.cloud.vision.action; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/exception/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/exception/package-info.java deleted file mode 100644 index 4854c8e..0000000 --- a/src/main/java/io/cdap/plugin/cloud/vision/exception/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Exception when there is a problem with using the Cloud Vision API. - */ -package io.cdap.plugin.cloud.vision.exception; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/package-info.java deleted file mode 100644 index 5baad12..0000000 --- a/src/main/java/io/cdap/plugin/cloud/vision/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Transformers and Actions to build CDAP pipelines that use the Cloud Vision API. - */ -package io.cdap.plugin.cloud.vision; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java index de98b16..4f4d849 100755 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java +++ b/src/main/java/io/cdap/plugin/cloud/vision/transform/ExtractorTransformConfig.java @@ -48,6 +48,7 @@ public class ExtractorTransformConfig extends CloudVisionConfig { @Name(ExtractorTransformConstants.OUTPUT_FIELD) @Description("Field to store the extracted image features. If the specified output field name already exists " + "in the input record, it will be overwritten.") + @Macro private String outputField; @Name(ExtractorTransformConstants.FEATURES) @@ -199,8 +200,8 @@ public static void validateFieldsMatch(Schema inferredSchema, Schema providedSch Schema inferredFieldNonNullableSchema = isInferredFieldNullable ? inferredFieldSchema.getNonNullable() : inferredFieldSchema; - Schema providedFieldNonNullableSchema = isProvidedFieldNullable - ? providedFieldSchema.getNonNullable() : providedFieldSchema; + Schema providedFieldNonNullableSchema = isProvidedFieldNullable ? + providedFieldSchema.getNonNullable() : providedFieldSchema; Schema.Type inferredType = inferredFieldNonNullableSchema.getType(); Schema.LogicalType inferredLogicalType = inferredFieldNonNullableSchema.getLogicalType(); diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/document/package-info.java deleted file mode 100644 index 023e20a..0000000 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/document/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Contains classes to implement the Document Extraction Transform using the Cloud Vision API. - */ -package io.cdap.plugin.cloud.vision.transform.document; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/image/package-info.java deleted file mode 100644 index fdf0d18..0000000 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/image/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Contains classes to implement the Image Extraction Transform using the Cloud Vision API. - */ -package io.cdap.plugin.cloud.vision.transform.image; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/package-info.java deleted file mode 100644 index b079416..0000000 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Transformers to build CDAP pipelines that use the Cloud Vision API. - */ -package io.cdap.plugin.cloud.vision.transform; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/package-info.java deleted file mode 100644 index 7dcaba8..0000000 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/schema/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Contains classes to implement the Schemas used by the Actions and Transformers using the Cloud Vision API. - */ -package io.cdap.plugin.cloud.vision.transform.schema; diff --git a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/package-info.java b/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/package-info.java deleted file mode 100644 index 744d2b7..0000000 --- a/src/main/java/io/cdap/plugin/cloud/vision/transform/transformer/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Classes that map answers from the Cloud Vision API to Records that can then be turned into json docs. - */ -package io.cdap.plugin.cloud.vision.transform.transformer; diff --git a/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java b/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java index 87b446a..07b7a68 100644 --- a/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java +++ b/src/test/java/io/cdap/plugin/cloud/vision/GcsBucketHelperTest.java @@ -31,18 +31,11 @@ */ public class GcsBucketHelperTest { protected static final String PATH = System.getProperty("path", "gs://vision-api-pbo2/images"); - protected static final String SERVICE_ACCOUNT_FILE_PATH = System.getProperty("serviceFilePath", "auto-detect"); + protected static final String SERVICE_ACCOUNT_FILE_PATH = System.getProperty("serviceFilePath", + "auto-detect"); // This is a limit imposed by the Cloud Vision API protected static final int MAX_NUMBER_OF_IMAGES_PER_BATCH = 2000; - @Before - public void setUp() throws Exception { - } - - @After - public void tearDown() throws Exception { - } - @Test public void getAllFilesInPath() throws Exception { Credentials credentials = CredentialsHelper.getCredentials(SERVICE_ACCOUNT_FILE_PATH); diff --git a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java index e1d81b6..49e110e 100755 --- a/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java +++ b/src/test/java/io/cdap/plugin/cloud/vision/action/OfflineImageExtractorActionTest.java @@ -35,6 +35,7 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; + import java.io.FileInputStream; import java.io.FileNotFoundException; import java.util.ArrayList; @@ -48,8 +49,8 @@ public class OfflineImageExtractorActionTest { private static final String PROJECT = System.getProperty("project", CloudVisionConstants.AUTO_DETECT); private static final String SERVICE_ACCOUNT_FILE_PATH = System.getProperty("serviceFilePath", - CloudVisionConstants.AUTO_DETECT); - private static final String BUCKET_NAME = System.getProperty("path", "gs://cloud-vision-cdap-tests"); + CloudVisionConstants.AUTO_DETECT); + private static final String PATH = System.getProperty("path", "gs://cloud-vision-cdap-tests"); private static final String IMAGE_PATH_IN_BUCKET = "images"; private static final String PATH_PATTERN = "%s/%s/"; @@ -70,40 +71,40 @@ public class OfflineImageExtractorActionTest { private static final String TEXT_IMAGE_NAME = "sign.jpg"; private static final String WEB_DETECTION_IMAGE_NAME = "carnaval.jpg"; - private static final String CROP_HINTS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET - + "/" + CROP_HINTS_IMAGE_NAME; - private static final String FACE_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET - + "/" + FACE_IMAGE_NAME; - private static final String LABELS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET - + "/" + LABELS_IMAGE_NAME; - private static final String LANDMARKS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET - + "/" + LANDMARKS_IMAGE_NAME; - private static final String LOGOS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET - + "/" + LOGOS_IMAGE_NAME; - private static final String OBJECTS_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET - + "/" + OBJECT_IMAGE_NAME; - private static final String PROPERTIES_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET - + "/" + PROPERTIES_IMAGE_NAME; - private static final String SAFE_SEARCH_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET - + "/" + SAFE_SEARCH_IMAGE_NAME; - private static final String TEXT_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET + "/" - + TEXT_IMAGE_NAME; - private static final String WEB_DETECTION_IMAGE_FILE_PATH = BUCKET_NAME + "/" + IMAGE_PATH_IN_BUCKET - + "/" + WEB_DETECTION_IMAGE_NAME; + private static final String CROP_HINTS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + + "/" + CROP_HINTS_IMAGE_NAME; + private static final String FACE_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + + "/" + FACE_IMAGE_NAME; + private static final String LABELS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + + "/" + LABELS_IMAGE_NAME; + private static final String LANDMARKS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + + "/" + LANDMARKS_IMAGE_NAME; + private static final String LOGOS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + + "/" + LOGOS_IMAGE_NAME; + private static final String OBJECTS_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + + "/" + OBJECT_IMAGE_NAME; + private static final String PROPERTIES_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + + "/" + PROPERTIES_IMAGE_NAME; + private static final String SAFE_SEARCH_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + + "/" + SAFE_SEARCH_IMAGE_NAME; + private static final String TEXT_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + "/" + + TEXT_IMAGE_NAME; + private static final String WEB_DETECTION_IMAGE_FILE_PATH = PATH + "/" + IMAGE_PATH_IN_BUCKET + + "/" + WEB_DETECTION_IMAGE_NAME; private static final String RESULT_FILE_NAME = "output-1-to-1.json"; private static final String BATCH_SIZE = "20"; private static final OfflineImageExtractorActionConfig CONFIG = new OfflineImageExtractorActionConfig( - SERVICE_ACCOUNT_FILE_PATH, - ImageFeature.FACE.getDisplayName(), - null, - null, - true, - null, - null, - BATCH_SIZE + SERVICE_ACCOUNT_FILE_PATH, + ImageFeature.FACE.getDisplayName(), + null, + null, + true, + null, + null, + BATCH_SIZE ); private static Storage storage; @@ -115,9 +116,9 @@ private static void createFolder(String folderName) throws FileNotFoundException System.out.println("Creating folder: " + folderName); FileInputStream fileInputStream = new FileInputStream("examples/empty_file.txt"); bucket.create(folderName + "/", - fileInputStream, - FOLDER_CONTENT_TYPE, - Bucket.BlobWriteOption.doesNotExist()); + fileInputStream, + FOLDER_CONTENT_TYPE, + Bucket.BlobWriteOption.doesNotExist()); } private static void uploadImages() throws FileNotFoundException { @@ -139,30 +140,10 @@ private static void uploadImages() throws FileNotFoundException { FileInputStream fileInputStream = new FileInputStream(localImagePath); System.out.println("Uploading " + localImagePath + " to " + bucket.getName()); bucket.create(IMAGE_PATH_IN_BUCKET + "/" + imageName, - fileInputStream, - JPG_CONTENT_TYPE, - Bucket.BlobWriteOption.doesNotExist()); - } - } - - private static Storage getStorage(String project, @Nullable Credentials credentials) { - StorageOptions.Builder builder = StorageOptions.newBuilder().setProjectId(project); - if (credentials != null) { - builder.setCredentials(credentials); - } - return builder.build().getService(); - } - - private static void deleteBucket(Storage storage, Bucket bucket) { - if (bucket == null || bucket.list() == null) { - return; + fileInputStream, + JPG_CONTENT_TYPE, + Bucket.BlobWriteOption.doesNotExist()); } - System.out.println("Deleting bucket: " + bucket.getName()); - for (Blob blob : bucket.list().iterateAll()) { - storage.delete(blob.getBlobId()); - } - - bucket.delete(Bucket.BucketSourceOption.metagenerationMatch()); } @Before @@ -171,7 +152,7 @@ public void testSetup() throws Exception { String projectId = CredentialsHelper.getProjectId(PROJECT); storage = getStorage(projectId, credentials); - GCSPath path = GCSPath.from(BUCKET_NAME); + GCSPath path = GCSPath.from(PATH); String bucketName = path.getBucket(); bucket = storage.get(bucketName); if (bucket != null) { @@ -180,9 +161,9 @@ public void testSetup() throws Exception { System.out.println("Creating bucket: " + bucketName); BucketInfo bucketInfo = BucketInfo.newBuilder(bucketName) - .setStorageClass(StorageClass.STANDARD) - .setLocation("us-east1") - .build(); + .setStorageClass(StorageClass.STANDARD) + .setLocation("us-east1") + .build(); bucket = storage.create(bucketInfo); uploadImages(); } @@ -195,17 +176,17 @@ public void destroy() { @Test public void testRunCropHintsFeature() throws Exception { String folder = "crop_hints"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); System.out.println("testRunCropHintsFeature() - resultFolderPath: " + resultFolderPath); createFolder(folder); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(CROP_HINTS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.CROP_HINTS.getDisplayName()) - .setAspectRatios("1.66667") - .build(); + .setSourcePath(CROP_HINTS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.CROP_HINTS.getDisplayName()) + .setAspectRatios("1.66667") + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -217,15 +198,15 @@ public void testRunCropHintsFeature() throws Exception { @Test public void testRunFaceFeature() throws Exception { String folder = "face"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); System.out.println("testRunFaceFeature() - resultFolderPath: " + resultFolderPath); createFolder(folder); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(FACE_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .build(); + .setSourcePath(FACE_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -237,16 +218,16 @@ public void testRunFaceFeature() throws Exception { @Test public void testRunImagePropertiesFeature() throws Exception { String folder = "properties"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); System.out.println("testRunImagePropertiesFeature() - resultFolderPath: " + resultFolderPath); createFolder(folder); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(PROPERTIES_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.IMAGE_PROPERTIES.getDisplayName()) - .build(); + .setSourcePath(PROPERTIES_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.IMAGE_PROPERTIES.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -258,16 +239,16 @@ public void testRunImagePropertiesFeature() throws Exception { @Test public void testRunLabelsFeature() throws Exception { String folder = "labels"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); System.out.println("testRunLabelsFeature() - resultFolderPath: " + resultFolderPath); createFolder(folder); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(LABELS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.LABELS.getDisplayName()) - .build(); + .setSourcePath(LABELS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.LABELS.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -279,16 +260,16 @@ public void testRunLabelsFeature() throws Exception { @Test public void testRunLandmarksFeature() throws Exception { String folder = "landmarks"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); System.out.println("testRunLandmarksFeature() - resultFolderPath: " + resultFolderPath); createFolder(folder); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(LANDMARKS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.LANDMARKS.getDisplayName()) - .build(); + .setSourcePath(LANDMARKS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.LANDMARKS.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -300,16 +281,16 @@ public void testRunLandmarksFeature() throws Exception { @Test public void testRunLogosFeature() throws Exception { String folder = "logos"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); createFolder(folder); System.out.println("testRunLogosFeature() - resultFolderPath: " + resultFolderPath); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(LOGOS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.LOGOS.getDisplayName()) - .build(); + .setSourcePath(LOGOS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.LOGOS.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -321,16 +302,16 @@ public void testRunLogosFeature() throws Exception { @Test public void testRunObjectLocalizationFeature() throws Exception { String folder = "objects"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); System.out.println("testRunObjectLocalizationFeature() - resultFolderPath: " + resultFolderPath); createFolder(folder); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(OBJECTS_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.OBJECT_LOCALIZATION.getDisplayName()) - .build(); + .setSourcePath(OBJECTS_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.OBJECT_LOCALIZATION.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -342,17 +323,17 @@ public void testRunObjectLocalizationFeature() throws Exception { @Test public void testRunTextFeature() throws Exception { String folder = "text"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); System.out.println("testRunTextFeature() - resultFolderPath: " + resultFolderPath); createFolder(folder); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(TEXT_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.TEXT.getDisplayName()) - .setLanguageHints("en") - .build(); + .setSourcePath(TEXT_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.TEXT.getDisplayName()) + .setLanguageHints("en") + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -364,16 +345,16 @@ public void testRunTextFeature() throws Exception { @Test public void testRunSafeSearchFeature() throws Exception { String folder = "safe"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); System.out.println("testRunSafeSearchFeature() - resultFolderPath: " + resultFolderPath); createFolder(folder); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(SAFE_SEARCH_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.EXPLICIT_CONTENT.getDisplayName()) - .build(); + .setSourcePath(SAFE_SEARCH_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.EXPLICIT_CONTENT.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -385,16 +366,16 @@ public void testRunSafeSearchFeature() throws Exception { @Test public void testRunWebDetectionFeature() throws Exception { String folder = "web"; - String resultFolderPath = String.format(PATH_PATTERN, BUCKET_NAME, folder); + String resultFolderPath = String.format(PATH_PATTERN, PATH, folder); System.out.println("testRunSafeSearchFeature() - resultFolderPath: " + resultFolderPath); createFolder(folder); OfflineImageExtractorActionConfig config = OfflineImageExtractorActionConfig.builder(CONFIG) - .setSourcePath(WEB_DETECTION_IMAGE_FILE_PATH) - .setDestinationPath(resultFolderPath) - .setFeatures(ImageFeature.WEB_DETECTION.getDisplayName()) - .build(); + .setSourcePath(WEB_DETECTION_IMAGE_FILE_PATH) + .setDestinationPath(resultFolderPath) + .setFeatures(ImageFeature.WEB_DETECTION.getDisplayName()) + .build(); OfflineImageExtractorAction action = new OfflineImageExtractorAction(config); ActionContext context = new MockActionContext(); @@ -414,15 +395,34 @@ private void validateResult(String folder, String expectedUri) throws Interrupte JsonParser parser = new JsonParser(); JsonElement jsonTree = parser.parse(content); String uri = jsonTree.getAsJsonObject() - .get("responses") - .getAsJsonArray() - .get(0) - .getAsJsonObject() - .get("context") - .getAsJsonObject() - .get("uri") - .getAsString(); - - Assert.assertTrue(expectedUri.equals(uri)); + .get("responses") + .getAsJsonArray() + .get(0) + .getAsJsonObject() + .get("context") + .getAsJsonObject() + .get("uri") + .getAsString(); + + Assert.assertEquals(expectedUri, uri); + } + + private static Storage getStorage(String project, @Nullable Credentials credentials) { + StorageOptions.Builder builder = StorageOptions.newBuilder().setProjectId(project); + if (credentials != null) { + builder.setCredentials(credentials); + } + return builder.build().getService(); + } + + private static void deleteBucket(Storage storage, Bucket bucket) { + if (bucket == null || bucket.list() == null) { + return; + } + System.out.println("Deleting bucket: " + bucket.getName()); + for (Blob blob : bucket.list().iterateAll()) { + storage.delete(blob.getBlobId()); + } + bucket.delete(Bucket.BucketSourceOption.metagenerationMatch()); } } diff --git a/src/test/java/io/cdap/plugin/cloud/vision/package-info.java b/src/test/java/io/cdap/plugin/cloud/vision/package-info.java deleted file mode 100644 index 2d89acb..0000000 --- a/src/test/java/io/cdap/plugin/cloud/vision/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright © 2019 Cask Data, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Contains JUnit tests. - */ -package io.cdap.plugin.cloud.vision; From 6b65789f0cd4bf947085270c0d116304724927c3 Mon Sep 17 00:00:00 2001 From: Patrice Borne Date: Thu, 30 Apr 2020 13:39:16 -0400 Subject: [PATCH 3/3] Fixes to the pom and the icon for an Action --- ...on.png => OfflineTextExtractor-action.png} | Bin pom.xml | 62 ++++++++++++++++++ 2 files changed, 62 insertions(+) rename icons/{TextExtractorOffline-action.png => OfflineTextExtractor-action.png} (100%) diff --git a/icons/TextExtractorOffline-action.png b/icons/OfflineTextExtractor-action.png similarity index 100% rename from icons/TextExtractorOffline-action.png rename to icons/OfflineTextExtractor-action.png diff --git a/pom.xml b/pom.xml index 5256ce0..1152a7d 100644 --- a/pom.xml +++ b/pom.xml @@ -380,6 +380,68 @@ + + org.apache.maven.plugins + maven-enforcer-plugin + 3.0.0-M3 + + + enforce-version + + enforce + + + + + + + log4j:log4j:[0.0,1.2.17) + + + + + + + + + org.apache.felix + maven-bundle-plugin + 3.5.0 + true + + + <_exportcontents> + com.google.api,com.google.api.client.*,com.google.api.core,com.google.api.gax.batching, + com.google.api.gax.core,com.google.api.gax.grpc,com.google.api.gax.longrunning, + com.google.api.gax.paging,com.google.api.gax.retrying,com.google.api.gax.rpc, + com.google.api.gax.tracing,com.google.api.resourcenames, + com.google.api.services.storage.model,com.google.auth.*,com.google.cloud, + com.google.cloud.http,com.google.cloud.spi,com.google.cloud.storage, + com.google.cloud.storage.spi,com.google.cloud.storage.spi.v1,com.google.cloud.vision.v1.*, + com.google.common.base,com.google.common.collect,com.google.common.graph, + com.google.common.hash,com.google.common.io,com.google.common.util.concurrent, + com.google.common.util.concurrent.internal,com.google.errorprone.annotations, + com.google.iam.v1,com.google.longrunning,com.google.longrunning.stub,com.google.protobuf.*, + com.google.rpc,com.google.type,io.cdap.plugin.cloud.vision.*,io.grpc,io.grpc.stub, + io.opencensus.common,io.opencensus.trace,io.opencensus.trace.config, + io.opencensus.trace.export,io.opencensus.trace.propagation, + org.checkerframework.checker.nullness.compatqual,org.checkerframework.checker.nullness.qual, + org.checkerframework.framework.qual,org.threeten.bp,org.threeten.bp.* + + *;inline=false;scope=compile + true + lib + + + + + package + + bundle + + + +