Skip to content

Commit

Permalink
Merge pull request pentaho#9305 from nawaz34-hitachivantara/BACKLOG-3…
Browse files Browse the repository at this point in the history
…9677-VFS

[BACKLOG-39677] Added root path code changes for VFS connection.
  • Loading branch information
NJtwentyone authored May 14, 2024
2 parents 73494ff + 706cfc4 commit 203e2b8
Show file tree
Hide file tree
Showing 8 changed files with 425 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@

package org.pentaho.di.connections;

import edu.umd.cs.findbugs.annotations.NonNull;
import edu.umd.cs.findbugs.annotations.Nullable;
import org.pentaho.di.connections.utils.EncryptUtils;
import org.pentaho.di.core.bowl.Bowl;
import org.pentaho.di.connections.utils.VFSConnectionTestOptions;
import org.pentaho.di.connections.vfs.VFSConnectionDetails;
import org.pentaho.di.connections.vfs.VFSConnectionProvider;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.metastore.api.IMetaStore;
import org.pentaho.metastore.api.exceptions.MetaStoreException;
import org.pentaho.metastore.persist.MetaStoreFactory;
Expand All @@ -39,6 +42,7 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.Objects;

import static org.pentaho.metastore.util.PentahoDefaults.NAMESPACE;

Expand Down Expand Up @@ -291,6 +295,27 @@ public <T extends ConnectionDetails> boolean test( T connectionDetails ) throws
return connectionProvider.test( connectionDetails );
}

/**
* Tests if a given VFS connection is valid, optionally, with certain testing options.
*
* @param connectionDetails The VFS connection.
* @param options The testing options, or {@code null}. When {@code null}, a default instance of
* {@link VFSConnectionTestOptions} is constructed and used.
* @return {@code true} if the connection is valid; {@code false}, otherwise.
*/
public <T extends VFSConnectionDetails> boolean test( @NonNull T connectionDetails, @Nullable VFSConnectionTestOptions options )
throws KettleException {
Objects.requireNonNull( connectionDetails );

if ( options == null ) {
options = new VFSConnectionTestOptions();
}

VFSConnectionProvider<T> connectionProvider = (VFSConnectionProvider<T>) connectionProviders.get(connectionDetails.getType());

return connectionProvider.test( connectionDetails, options );
}

/**
* Delete a connection by name from the default
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@

/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2024 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/

package org.pentaho.di.connections.utils;

/**
* This class contains options that control the testing of VFS Connection.
*/
public class VFSConnectionTestOptions {

private boolean ignoreRootPath;

public VFSConnectionTestOptions() {
}

public VFSConnectionTestOptions( boolean ignoreRootPath ) {
this.ignoreRootPath = ignoreRootPath;
}

/**
* Indicates if the root path should be ignored when testing the connection.
* @return {@code true}, if the root path should be ignored; {@code false}, otherwise.
*/
public boolean isIgnoreRootPath() {
return ignoreRootPath;
}


/**
* Sets if the root path should be ignored when testing the connection.
* @param ignoreRootPath The ignore root path flag.
* {@code true} to ignore the root path; {@code false}, otherwise.
*/
public void setIgnoreRootPath( boolean ignoreRootPath ) {
this.ignoreRootPath = ignoreRootPath;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
package org.pentaho.di.connections.vfs;

import edu.umd.cs.findbugs.annotations.NonNull;
import org.apache.commons.lang.StringUtils;
import org.pentaho.metastore.persist.MetaStoreAttribute;

import java.util.ArrayList;
Expand All @@ -39,6 +40,9 @@ public abstract class BaseVFSConnectionDetails implements VFSConnectionDetails {
@MetaStoreAttribute
private List<String> baRoles = new ArrayList<>();

@MetaStoreAttribute
private String rootPath;

@NonNull
@Override
public List<String> getBaRoles() {
Expand All @@ -52,6 +56,26 @@ public Map<String, String> getProperties() {
return props;
}

/**
* Gets if the VFS connection supports root path or not.
* @returns {@code true} if VFS connection supports root path; {@code false} otherwise.
* @default {@code true}
*/
@Override
public boolean isSupportsRootPath() {
return true;
}

@Override
public String getRootPath() {
return rootPath;
}

@Override
public void setRootPath( String rootPath ) {
this.rootPath = StringUtils.isEmpty( rootPath ) ? null : rootPath;
}

/**
* Adds base/default properties to properties of connection instance.
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,35 @@

package org.pentaho.di.connections.vfs;

import edu.umd.cs.findbugs.annotations.NonNull;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSystemException;
import org.apache.commons.vfs2.FileType;
import org.pentaho.di.connections.ConnectionDetails;
import org.pentaho.di.connections.ConnectionManager;
import org.pentaho.di.connections.utils.VFSConnectionTestOptions;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.value.ValueMetaBase;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.variables.Variables;
import org.pentaho.di.core.vfs.KettleVFS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;
import java.util.Objects;
import java.util.function.Supplier;

import static org.pentaho.di.connections.vfs.provider.ConnectionFileObject.DELIMITER;

public abstract class BaseVFSConnectionProvider<T extends VFSConnectionDetails> implements VFSConnectionProvider<T> {

private Supplier<ConnectionManager> connectionManagerSupplier = ConnectionManager::getInstance;

private static final Logger LOGGER = LoggerFactory.getLogger( BaseVFSConnectionProvider.class );

@Override
public List<String> getNames() {
return connectionManagerSupplier.get().getNamesByType( getClass() );
Expand Down Expand Up @@ -90,4 +103,74 @@ protected static boolean getBooleanValueOfVariable( VariableSpace space, String
protected VariableSpace getSpace( ConnectionDetails connectionDetails ) {
return connectionDetails.getSpace() == null ? Variables.getADefaultVariableSpace() : connectionDetails.getSpace();
}

@Override
public boolean test( @NonNull T connectionDetails, @NonNull VFSConnectionTestOptions connectionTestOptions ) throws KettleException {
boolean valid = test( connectionDetails );
if ( !valid ) {
return false;
}

if ( !connectionDetails.isSupportsRootPath() || connectionTestOptions.isIgnoreRootPath() ) {
return true;
}

String resolvedRootPath = getResolvedRootPath( connectionDetails );
if ( StringUtils.isEmpty( resolvedRootPath ) ) {
return !connectionDetails.isRootPathRequired();
}

String internalUrl = buildUrl( connectionDetails, resolvedRootPath );
FileObject fileObject = KettleVFS.getFileObject( internalUrl, new Variables(), getOpts( connectionDetails ) );

try {
return fileObject.exists() && this.isFolder( fileObject );
} catch ( FileSystemException fileSystemException ) {
LOGGER.error( fileSystemException.getMessage() );
return false;
}
}

@Override
public String getResolvedRootPath( @NonNull T connectionDetails ) {
if ( StringUtils.isNotEmpty( connectionDetails.getRootPath() ) ) {
VariableSpace space = getSpace( connectionDetails );
String resolvedRootPath = getVar( connectionDetails.getRootPath(), space );
if ( StringUtils.isNotBlank( resolvedRootPath ) ) {
return normalizeRootPath( resolvedRootPath );
}
}

return StringUtils.EMPTY;
}

private String normalizeRootPath( String rootPath ) {
if ( StringUtils.isNotEmpty( rootPath ) ) {
if ( !rootPath.startsWith( DELIMITER ) ) {
rootPath = DELIMITER + rootPath;
}
if (rootPath.endsWith( DELIMITER ) ) {
rootPath = rootPath.substring( 0, rootPath.length() - 1 );
}
}
return rootPath;
}

private String buildUrl( VFSConnectionDetails connectionDetails, String rootPath ) {
String domain = connectionDetails.getDomain();
if ( !domain.isEmpty() ) {
domain = DELIMITER + domain;
}
return connectionDetails.getType() + ":/" + domain + rootPath;
}

private boolean isFolder( @NonNull FileObject fileObject ) {
try {

return fileObject.getType() != null && fileObject.getType().equals( FileType.FOLDER );
} catch ( FileSystemException e ) {
return false;
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,83 @@ default boolean isRootPathRequired() {
}

/**
* Gets the root folder path of this VFS connection.
* <p>
* The root folder path allows limiting the files exposed through a <code>pvfs</code> URL.
* <p>
* The default interface implementation exists to ensure backward compatibility and returns {@code null}.
* <h3>
* Semantics of the Root Folder Path
* </h3>
* Assume a connection without a configured root folder path, <code>connection-name</code>.
* The general structure of a <code>pvfs</code> URL that resolves to a file in this connection is
* <code>pvfs://(connection-name)/(rest-path)</code>.
* If the <code>rest-path</code> component is split in two parts, the root path and the remainder,
* the following form is achieved: <code>pvfs://(connection-name)/(root-path)/(rest-rest-path)</code>.
* <p>
* Assume a connection configured with the root folder path <code>root-path</code>, all other configurations equal,
* named <code>connection-with-root-path</code>.
* The same file would be exposed by a <code>pvfs</code> URL in which the <code>root-path</code> component is omitted:
* <code>pvfs://(connection-with-root-path)/(rest-rest-path)</code>.
* <p>
* Necessarily, the configured root path must identify a file of type folder.
* <p>
* Files which are not descendant of a connection's root folder path cannot be identified/accessed using a
* <code>pvfs</code> URL. Folder segments of a <code>pvfs</code> URL cannot have the special names <code>.</code> or
* <code>..</code>.
* <h3>
* Syntax of the Root Folder Path
* </h3>
* The syntax of the root folder path is that of one or more folder names separated by a folder separator,
* <code>/</code>. For example, the following would be syntactically valid: <code>my-vfs-bucket/my-folder</code>.
* While a leading or a trailing folder separator should be tolerated, a <i>normalized</i> root folder path
* should have none.
* <p>
* The value stored in this property is subject to variable substitution and thus may not conform to the syntax
* of a root folder path. The syntax is validated only after variable substitution is performed.
* <h3>
* Impact of Root Folder Path on Provider URLs
* </h3>
* While omitted from the <code>pvfs</code> URL, the root folder path is incorporated in the <i>provider-specific</i>
* (a.k.a. internal) URL, as a result of the conversion process from <code>pvfs</code> to <code>provider</code> URL.
* The root folder path is not a required component of provider URLs, and files which are not descendants of the root
* folder path are still resolvable. The root folder path is not a security feature, by itself.
* <p>
* The general structure of a provider URL corresponding to the above <code>pvfs</code> URL is like:
* <code>(scheme):// [(domain) /] [(root-path) /] [(rest-rest-path)]</code>
* <p>
* Where the <i>scheme</i> component is given by the {@link #getType()} property, and the <i>domain</i> component is
* given by the {@link #getDomain()} property.
* <p>
* The provider URL structure for specific providers may vary from this general structure. However, the semantics of
* the root folder path property should be respected.
* <h3>
* Examples of <code>pvfs</code> and Provider URLs
* </h3>
* Given an S3 connection, with a configured root folder path of <code>my-bucket/my-folder</code>,
* the <code>pvfs</code> URL, <code>pvfs://my-s3-connection/my-sub-folder/my-file</code>, would convert to the
* provider URL, <code>s3://my-bucket/my-folder/my-sub-folder/my-file</code>.
* <p>
* Given an HCP connection, with a configured root folder path of <code>my-folder</code>, and a configured domain of
* <code>my-domain.com:3000</code>,the <code>pvfs</code> URL,
* <code>pvfs://my-hcp-connection/my-sub-folder/my-file</code>, would convert to the provider URL,
* <code>hcp://my-domain.com:3000/my-folder/my-sub-folder/my-file</code>.
*
* Gets the root path of a vfs connection.
*
* Defaults to {@code null}
* return the root path.
* @return A non-empty root path, if any; {@code null}, otherwise.
*/
default String getRootPath() {
return null;
}

/**
* Sets the root path, given as a string
* Sets the root folder path, given as a string.
* <p>
* An empty root folder path value should be converted to {@code null}.
* Further syntax validation is performed only after variable substitution.
* <p>
* The default interface implementation exists to ensure backward compatibility and does nothing.
*
* @param rootPath The root path
* @param rootPath The root path.
*/
default void setRootPath( String rootPath ) { }

Expand Down
Loading

0 comments on commit 203e2b8

Please sign in to comment.