diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 535250d..c41d18b 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -9,7 +9,7 @@ name: Lint # If the linter fails, the PR can still be completed, but none of the linter changes will be made. -on: +on: push: branches: - main @@ -31,7 +31,7 @@ jobs: - name: Check out repository (push) if: ${{ github.event_name == 'push' }} uses: actions/checkout@v3 - + - name: Check out repository (pull_request_target) if: ${{ github.event_name == 'pull_request_target' }} uses: actions/checkout@v3 @@ -57,4 +57,3 @@ jobs: uses: reviewdog/action-suggester@v1 with: tool_name: lint - \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 098289b..b0391b6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,10 +2,10 @@ name: Test on: pull_request: - branches: + branches: - main push: - branches: + branches: - main env: @@ -29,7 +29,7 @@ jobs: - name: install deps run: | sudo apt update && sudo apt-get install -y cmake libcurl4-openssl-dev libcurl4 pkg-config libssl-dev libxrootd-dev libxrootd-server-dev libgtest-dev - + - name: Create Build Environment # Some projects don't allow in-source building, so create a separate build directory # We'll use this as our working directory for all subsequent commands @@ -40,8 +40,8 @@ jobs: # access regardless of the host operating system shell: bash working-directory: ${{runner.workspace}}/build - # Note the current convention is to use the -S and -B options here to specify source - # and build directories, but this is only available with CMake 3.13 and higher. + # Note the current convention is to use the -S and -B options here to specify source + # and build directories, but this is only available with CMake 3.13 and higher. # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DXROOTD_PLUGINS_BUILD_UNITTESTS=yes -DXROOTD_PLUGINS_EXTERNAL_GTEST=${{ matrix.external-gtest }} @@ -54,6 +54,6 @@ jobs: - name: Test working-directory: ${{runner.workspace}}/build shell: bash - # Execute tests defined by the CMake configuration. + # Execute tests defined by the CMake configuration. # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail run: ctest -C $BUILD_TYPE --verbose diff --git a/.gitmodules b/.gitmodules index 76e2bbf..0c2d89b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ [submodule "vendor/gtest"] path = vendor/gtest url = https://github.com/google/googletest.git + +[submodule "vendor/tinyxml2"] + path = vendor/tinyxml2 + url = https://github.com/leethomason/tinyxml2.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3d80c59 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: v18.1.6 + hooks: + - id: clang-format diff --git a/CMakeLists.txt b/CMakeLists.txt index b29f2e9..2f5ea1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,7 @@ endmacro(use_cxx17) use_cxx17() if( CMAKE_COMPILER_IS_GNUCXX ) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror" ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror" ) endif() if(NOT APPLE) @@ -47,19 +47,26 @@ if(NOT APPLE) SET( CMAKE_MODULE_LINKER_FLAGS "-Wl,--no-undefined") endif() +if( NOT XROOTD_EXTERNAL_TINYXML2 ) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + add_subdirectory(vendor/tinyxml2) +else() + find_package(tinyxml2::tinyxml2) +endif() + include_directories(${XROOTD_INCLUDES} ${CURL_INCLUDE_DIRS} ${LIBCRYPTO_INCLUDE_DIRS}) -add_library(XrdS3 SHARED src/S3File.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) +add_library(XrdS3 SHARED src/S3File.cc src/S3Directory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) add_library(XrdHTTPServer SHARED src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) -target_link_libraries(XrdS3 -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES}) +target_link_libraries(XrdS3 -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES} tinyxml2::tinyxml2) target_link_libraries(XrdHTTPServer -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES}) # The CMake documentation strongly advises against using these macros; instead, the pkg_check_modules # is supposed to fill out the full path to ${LIBCRYPTO_LIBRARIES}. As of cmake 3.26.1, this does not # appear to be the case on Mac OS X. Remove once no longer necessary! -target_link_directories(XrdS3 PRIVATE ${LIBCRYPTO_LIBRARY_DIRS}) -target_link_directories(XrdHTTPServer PRIVATE ${LIBCRYPTO_LIBRARY_DIRS}) +target_link_directories(XrdS3 PUBLIC ${LIBCRYPTO_LIBRARY_DIRS}) +target_link_directories(XrdHTTPServer PUBLIC ${LIBCRYPTO_LIBRARY_DIRS}) if(NOT APPLE) set_target_properties(XrdS3 PROPERTIES OUTPUT_NAME "XrdS3-${XROOTD_PLUGIN_VERSION}" SUFFIX ".so" LINK_FLAGS "-Wl,--version-script=${CMAKE_SOURCE_DIR}/configs/export-lib-symbols") @@ -77,16 +84,17 @@ install( ) if( XROOTD_PLUGINS_BUILD_UNITTESTS ) - if( NOT XROOTD_PLUGINS_EXTERNAL_GTEST ) -include(ExternalProject) -ExternalProject_Add(gtest - PREFIX external/gtest - URL ${CMAKE_CURRENT_SOURCE_DIR}/vendor/gtest - INSTALL_COMMAND : -) -endif() -enable_testing() -add_subdirectory(test) + if( NOT XROOTD_PLUGINS_EXTERNAL_GTEST ) + include(ExternalProject) + ExternalProject_Add(gtest + PREFIX external/gtest + URL ${CMAKE_CURRENT_SOURCE_DIR}/vendor/gtest + BUILD_BYPRODUCTS external/gtest/src/gtest-build/lib/libgtest.a + INSTALL_COMMAND : + ) + endif() + enable_testing() + add_subdirectory(test) endif() #install( diff --git a/README.md b/README.md index c4ff82c..6df385d 100644 --- a/README.md +++ b/README.md @@ -161,4 +161,4 @@ In a separate terminal, run ``` curl -v http://localhost:1094// -``` \ No newline at end of file +``` diff --git a/rpm/xrootd-s3-http.spec b/rpm/xrootd-s3-http.spec index 297dac1..092ced9 100644 --- a/rpm/xrootd-s3-http.spec +++ b/rpm/xrootd-s3-http.spec @@ -52,4 +52,3 @@ make install DESTDIR=$RPM_BUILD_ROOT * Tue Dec 06 2022 Brian Bockelman - 0.0.1-1 - Initial, "Hello world" version of the S3 filesystem plugin - diff --git a/src/AWSv4-impl.cc b/src/AWSv4-impl.cc index 26f306b..1306742 100644 --- a/src/AWSv4-impl.cc +++ b/src/AWSv4-impl.cc @@ -201,7 +201,9 @@ std::string canonicalizeQueryString( } // We'll always have a superflous trailing ampersand. - canonicalQueryString.erase(canonicalQueryString.end() - 1); + if (!canonicalQueryString.empty()) { + canonicalQueryString.erase(canonicalQueryString.end() - 1); + } return canonicalQueryString; } diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index 2b3fa51..999ca8f 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -67,8 +67,6 @@ HTTPRequest::~HTTPRequest() {} if (rv##B != CURLE_OK) { \ this->errorCode = "E_CURL_LIB"; \ this->errorMessage = "curl_easy_setopt( " #B " ) failed."; \ - /* dprintf( D_ALWAYS, "curl_easy_setopt( %s ) failed (%d): '%s', \ - failing.\n", #B, rv##B, curl_easy_strerror( rv##B ) ); */ \ return false; \ } \ } @@ -168,6 +166,8 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, const std::string &uri, const std::string &payload) { + m_log.Log(XrdHTTPServer::Debug, "SendRequest", "Sending HTTP request", + uri.c_str()); CURLcode rv = curl_global_init(CURL_GLOBAL_ALL); if (rv != 0) { this->errorCode = "E_CURL_LIB"; @@ -317,20 +317,6 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, CAFile = x509_ca_file; } - if (CAPath.empty()) { - char *soap_ssl_ca_dir = getenv("GAHP_SSL_CADIR"); - if (soap_ssl_ca_dir != NULL) { - CAPath = soap_ssl_ca_dir; - } - } - - if (CAFile.empty()) { - char *soap_ssl_ca_file = getenv("GAHP_SSL_CAFILE"); - if (soap_ssl_ca_file != NULL) { - CAFile = soap_ssl_ca_file; - } - } - if (!CAPath.empty()) { SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_CAPATH, CAPath.c_str()); } diff --git a/src/HTTPCommands.hh b/src/HTTPCommands.hh index aec5966..cc78c33 100644 --- a/src/HTTPCommands.hh +++ b/src/HTTPCommands.hh @@ -47,6 +47,8 @@ class HTTPRequest { virtual bool SendHTTPRequest(const std::string &payload); unsigned long getResponseCode() const { return responseCode; } + const std::string &getErrorCode() const { return errorCode; } + const std::string &getErrorMessage() const { return errorMessage; } const std::string &getResultString() const { return resultString; } // Currently only used in PUTS, but potentially useful elsewhere @@ -74,7 +76,7 @@ class HTTPRequest { std::string errorCode; std::string resultString; - unsigned long responseCode; + unsigned long responseCode{0}; unsigned long expectedResponseCode = 200; bool includeResponseHeader; diff --git a/src/S3AccessInfo.cc b/src/S3AccessInfo.cc index 8664833..d0351a1 100644 --- a/src/S3AccessInfo.cc +++ b/src/S3AccessInfo.cc @@ -49,3 +49,9 @@ const std::string &S3AccessInfo::getS3SecretKeyFile() const { void S3AccessInfo::setS3SecretKeyFile(const std::string &s3SecretKeyFile) { s3_secret_key_file = s3SecretKeyFile; } + +const std::string &S3AccessInfo::getS3UrlStyle() const { return s3_url_style; } + +void S3AccessInfo::setS3UrlStyle(const std::string &s3UrlStyle) { + s3_url_style = s3UrlStyle; +} diff --git a/src/S3AccessInfo.hh b/src/S3AccessInfo.hh index 2770ee0..cf73eec 100644 --- a/src/S3AccessInfo.hh +++ b/src/S3AccessInfo.hh @@ -2,8 +2,7 @@ // Created by Rich Wellner on 2/29/24. // -#ifndef XROOTD_S3_HTTP_S3ACCESSINFO_HH -#define XROOTD_S3_HTTP_S3ACCESSINFO_HH +#pragma once #include @@ -33,6 +32,12 @@ class S3AccessInfo { void setS3SecretKeyFile(const std::string &s3SecretKeyFile); + const std::string &getS3UrlStyle() const; + + void setS3UrlStyle(const std::string &s3UrlStyle); + + const int getS3SignatureVersion() const { return 4; } + private: std::string s3_bucket_name; std::string s3_service_name; @@ -40,6 +45,5 @@ class S3AccessInfo { std::string s3_service_url; std::string s3_access_key_file; std::string s3_secret_key_file; + std::string s3_url_style; }; - -#endif // XROOTD_S3_HTTP_S3ACCESSINFO_HH diff --git a/src/S3Commands.cc b/src/S3Commands.cc index dc12889..bb28928 100644 --- a/src/S3Commands.cc +++ b/src/S3Commands.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -44,8 +45,6 @@ bool AmazonRequest::SendRequest() { default: this->errorCode = "E_INTERNAL"; this->errorMessage = "Invalid signature version."; - // dprintf( D_ALWAYS, "Invalid signature version (%d), failing.\n", - // signatureVersion ); return false; } } @@ -57,45 +56,54 @@ std::string AmazonRequest::canonicalizeQueryString() { // Takes in the configured `s3.service_url` and uses the bucket/object requested // to generate the host URL, as well as the canonical URI (which is the path to // the object). -bool AmazonRequest::parseURL(const std::string &url, std::string &path) { - auto i = url.find("://"); - if (i == std::string::npos) { +bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path, + std::string &path) { + auto schemeEndIdx = url.find("://"); + if (schemeEndIdx == std::string::npos) { return false; } + if (url.size() < schemeEndIdx + 3) { + return false; + } + auto hostStartIdx = schemeEndIdx + 3; - auto j = url.find("/", i + 3); - if (j == std::string::npos) { - if (style == "path") { + auto resourceStartIdx = url.find("/", hostStartIdx); + if (resourceStartIdx == std::string::npos) { + if (m_style == "path") { // If we're configured for path-style requests, then the host is // everything between // :// and the last / - host = substring(url, i + 3); + host = substring(url, hostStartIdx); // Likewise, the path is going to be /bucket/object - // Sometimes we intentionally configure the plugin with no bucket because we - // assume the incoming object request already encodes the bucket. This is used - // for exporting many buckets from a single endpoint. + // Sometimes we intentionally configure the plugin with no bucket + // because we assume the incoming object request already encodes the + // bucket. This is used for exporting many buckets from a single + // endpoint. if (bucket.empty()) { path = "/" + object; + bucket_path = "/" + object.substr(0, object.find('/')); } else { path = "/" + bucket + "/" + object; + bucket_path = "/" + bucket; } } else { // In virtual-style requests, the host should be determined as // everything between // :// up until the last /, but with appended to the front. - host = bucket + "." + substring(url, i + 3); + host = bucket + "." + substring(url, hostStartIdx); path = "/" + object; + bucket_path = "/"; } return true; } - if (style == "path") { - host = substring(url, i + 3, j); - path = substring(url, j) + "/" + bucket + "/" + object; + if (m_style == "path") { + host = substring(url, hostStartIdx, resourceStartIdx); + path = substring(url, resourceStartIdx) + "/" + bucket + "/" + object; } else { - host = bucket + "." + substring(url, i + 3, j); - path = substring(url, j) + object; + host = bucket + "." + substring(url, hostStartIdx, resourceStartIdx); + path = substring(url, resourceStartIdx) + object; } return true; @@ -136,6 +144,8 @@ bool AmazonRequest::createV4Signature(const std::string &payload, } trim(saKey); } else { + canonicalQueryString = canonicalizeQueryString(); + requiresSignature = false; // If we don't create a signature, it must not be needed... return true; // If there was no saKey, we need not generate a signature @@ -176,14 +186,8 @@ bool AmazonRequest::createV4Signature(const std::string &payload, canonicalURI = pathEncode(canonicalURI); // The canonical query string is the alphabetically sorted list of - // URI-encoded parameter names '=' values, separated by '&'s. That - // wouldn't be hard to do, but we don't need to, since we send - // everything in the POST body, instead. - std::string canonicalQueryString; - - // This function doesn't (currently) support query parameters, - // but no current caller attempts to use them. - assert((httpVerb != "GET") || query_parameters.size() == 0); + // URI-encoded parameter names '=' values, separated by '&'s. + canonicalQueryString = canonicalizeQueryString(); // The canonical headers must include the Host header, so add that // now if we don't have it. @@ -209,7 +213,6 @@ bool AmazonRequest::createV4Signature(const std::string &payload, if (!doSha256(payload, messageDigest, &mdLength)) { this->errorCode = "E_INTERNAL"; this->errorMessage = "Unable to hash payload."; - // dprintf( D_ALWAYS, "Unable to hash payload, failing.\n" ); return false; } std::string payloadHash; @@ -396,10 +399,6 @@ bool AmazonRequest::sendV4Request(const std::string &payload, return false; } - if (!sendContentSHA) { - // dprintf( D_FULLDEBUG, "Payload is '%s'\n", payload.c_str() ); - } - std::string authorizationValue; if (!createV4Signature(payload, authorizationValue, sendContentSHA)) { if (this->errorCode.empty()) { @@ -417,7 +416,12 @@ bool AmazonRequest::sendV4Request(const std::string &payload, headers["Authorization"] = authorizationValue; } - return sendPreparedRequest(protocol, hostUrl, payload); + // This operation is on the bucket itself; alter the URL + auto url = hostUrl; + if (!canonicalQueryString.empty()) { + url += "?" + canonicalQueryString; + } + return sendPreparedRequest(protocol, url, payload); } // It's stated in the API documentation that you can upload to any region @@ -484,3 +488,143 @@ bool AmazonS3Head::SendRequest() { } // --------------------------------------------------------------------------- + +bool AmazonS3List::SendRequest(const std::string &continuationToken) { + query_parameters["list-type"] = "2"; // Version 2 of the object-listing API + query_parameters["delimiter"] = "/"; + query_parameters["prefix"] = urlquote(object); + if (!continuationToken.empty()) { + query_parameters["continuation-token"] = urlquote(continuationToken); + } + query_parameters["max-keys"] = std::to_string(m_maxKeys); + httpVerb = "GET"; + + // Operation is on the bucket itself; alter the URL to remove the object + hostUrl = protocol + "://" + host + bucketPath; + + return SendS3Request(""); +} + +// Parse the results of the AWS directory listing +// +// S3 returns an XML structure for directory listings so we must pick it apart +// and convert it to `objInfo` and `commonPrefixes`. The `objInfo` is a list of +// objects that match the current prefix but don't have a subsequent `/` in the +// object name. The `commonPrefixes` are the unique prefixes of other objects +// that have the same prefix as the original query but also have an `/`. +// +// Example. Suppose we have the following objects in the bucket: +// - /foo/bar.txt +// - /foo/bar/example.txt +// - /foo/baz/example.txt +// Then, a query to list with prefix `/foo/` would return object info for +// `/foo/bar.txt` while the common prefixes would be `/foo/bar/` and `/foo/baz`. +// Note this is quite close to returning a list of files in a directory and a +// list of sub-directories. +bool AmazonS3List::Results(std::vector &objInfo, + std::vector &commonPrefixes, + std::string &ct, std::string &errMsg) { + tinyxml2::XMLDocument doc; + auto err = doc.Parse(resultString.c_str()); + if (err != tinyxml2::XML_SUCCESS) { + errMsg = doc.ErrorStr(); + return false; + } + + auto elem = doc.RootElement(); + if (strcmp(elem->Name(), "ListBucketResult")) { + errMsg = "S3 ListBucket response is not rooted with ListBucketResult " + "element"; + return false; + } + + // Example response from S3: + // + // + // genome-browser + // cells/muscle-ibm/endothelial-stromal-cells + // 40 + // 40 + // 1PnsptbFFpBSb6UBNN4F/RrxtBvIHjNpdXNYlX8E7IyqXRK26w2y36KViUAbyPPsjzikVY0Zj4jMvQHRhsGWZbcKKrEVvaR0HaZDtfUXUwnc= + // false + // + // cells/muscle-ibm/endothelial-stromal-cells/UMAP.coords.tsv.gz + // 2023-08-21T11:02:53.000Z + // "b9b0065f10cbd91c9d341acc235c63b0" + // 360012 + // STANDARD + // + // + // cells/muscle-ibm/endothelial-stromal-cells/barcodes.tsv.gz + // 2023-07-17T11:02:19.000Z + // "048feef5d340e2dd4d2d2d495c24ad7e" + // 118061 + // STANDARD + // + // ... (truncated some entries for readability) ... + // + // cells/muscle-ibm/endothelial-stromal-cells/coords/ + // + // + // cells/muscle-ibm/endothelial-stromal-cells/markers/ + // + // + // cells/muscle-ibm/endothelial-stromal-cells/metaFields/ + // + // + bool isTruncated = false; + for (auto child = elem->FirstChildElement(); child != nullptr; + child = child->NextSiblingElement()) { + if (!strcmp(child->Name(), "IsTruncated")) { + bool isTrunc; + if (child->QueryBoolText(&isTrunc) == tinyxml2::XML_SUCCESS) { + isTruncated = isTrunc; + } + } else if (!strcmp(child->Name(), "CommonPrefixes")) { + auto prefix = child->FirstChildElement("Prefix"); + if (prefix != nullptr) { + auto prefixChar = prefix->GetText(); + if (prefixChar != nullptr) { + auto prefixStr = std::string(prefixChar); + trim(prefixStr); + if (!prefixStr.empty()) { + commonPrefixes.emplace_back(prefixStr); + } + } + } + } else if (!strcmp(child->Name(), "Contents")) { + std::string keyStr; + int64_t size; + bool goodSize = false; + auto key = child->FirstChildElement("Key"); + if (key != nullptr) { + auto keyChar = key->GetText(); + if (keyChar != nullptr) { + keyStr = std::string(keyChar); + trim(keyStr); + } + } + auto sizeElem = child->FirstChildElement("Size"); + if (sizeElem != nullptr) { + goodSize = + (sizeElem->QueryInt64Text(&size) == tinyxml2::XML_SUCCESS); + } + if (goodSize && !keyStr.empty()) { + S3ObjectInfo obj; + obj.m_key = keyStr; + obj.m_size = size; + objInfo.emplace_back(obj); + } + } else if (!strcmp(child->Name(), "NextContinuationToken")) { + auto ctChar = child->GetText(); + if (ctChar) { + ct = ctChar; + trim(ct); + } + } + } + if (!isTruncated) { + ct = ""; + } + return true; +} diff --git a/src/S3Commands.hh b/src/S3Commands.hh index dee72ee..6d4eddc 100644 --- a/src/S3Commands.hh +++ b/src/S3Commands.hh @@ -19,17 +19,26 @@ #pragma once #include "HTTPCommands.hh" +#include "S3AccessInfo.hh" #include +#include class AmazonRequest : public HTTPRequest { public: + AmazonRequest(const S3AccessInfo &ai, const std::string objectName, + XrdSysError &log) + : AmazonRequest(ai.getS3ServiceUrl(), ai.getS3AccessKeyFile(), + ai.getS3SecretKeyFile(), ai.getS3BucketName(), + objectName, ai.getS3UrlStyle(), + ai.getS3SignatureVersion(), log) {} + AmazonRequest(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, int sv, XrdSysError &log) : HTTPRequest(s, log), accessKeyFile(akf), secretKeyFile(skf), - signatureVersion(sv), bucket(b), object(o), style(style) { + signatureVersion(sv), bucket(b), object(o), m_style(style) { requiresSignature = true; // Start off by parsing the hostUrl, which we use in conjunction with // the bucket to fill in the host (for setting host header). For @@ -37,7 +46,7 @@ class AmazonRequest : public HTTPRequest { // "https://my-url.com:443", the bucket is "my-bucket", and the object // is "my-object", then the host will be "my-bucket.my-url.com:443" and // the canonicalURI will be "/my-object". - if (!parseURL(hostUrl, canonicalURI)) { + if (!parseURL(hostUrl, bucketPath, canonicalURI)) { errorCode = "E_INVALID_SERVICE_URL"; errorMessage = "Failed to parse host and canonicalURI from service URL."; @@ -67,7 +76,8 @@ class AmazonRequest : public HTTPRequest { virtual const std::string *getAccessKey() const { return &accessKeyFile; } virtual const std::string *getSecretKey() const { return &secretKeyFile; } - bool parseURL(const std::string &url, std::string &path); + bool parseURL(const std::string &url, std::string &bucket_path, + std::string &path); virtual bool SendRequest(); virtual bool SendS3Request(const std::string &payload); @@ -82,6 +92,9 @@ class AmazonRequest : public HTTPRequest { std::string host; std::string canonicalURI; + std::string bucketPath; // Path to use for bucket-level operations (such as + // listings). May be empty for DNS-style buckets + std::string canonicalQueryString; std::string bucket; std::string object; @@ -89,7 +102,7 @@ class AmazonRequest : public HTTPRequest { std::string region; std::string service; - std::string style; + std::string m_style; private: bool createV4Signature(const std::string &payload, @@ -103,6 +116,10 @@ class AmazonS3Upload : public AmazonRequest { using AmazonRequest::SendRequest; public: + AmazonS3Upload(const S3AccessInfo &ai, const std::string &objectName, + XrdSysError &log) + : AmazonRequest(ai, objectName, log) {} + AmazonS3Upload(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, @@ -122,6 +139,10 @@ class AmazonS3Download : public AmazonRequest { using AmazonRequest::SendRequest; public: + AmazonS3Download(const S3AccessInfo &ai, const std::string &objectName, + XrdSysError &log) + : AmazonRequest(ai, objectName, log) {} + AmazonS3Download(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, @@ -137,6 +158,10 @@ class AmazonS3Head : public AmazonRequest { using AmazonRequest::SendRequest; public: + AmazonS3Head(const S3AccessInfo &ai, const std::string &objectName, + XrdSysError &log) + : AmazonRequest(ai, objectName, log) {} + AmazonS3Head(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, @@ -147,3 +172,27 @@ class AmazonS3Head : public AmazonRequest { virtual bool SendRequest(); }; + +struct S3ObjectInfo { + size_t m_size; + std::string m_key; +}; + +class AmazonS3List : public AmazonRequest { + using AmazonRequest::SendRequest; + + public: + AmazonS3List(const S3AccessInfo &ai, const std::string &objectName, + size_t maxKeys, XrdSysError &log) + : AmazonRequest(ai, objectName, log), m_maxKeys(maxKeys) {} + + virtual ~AmazonS3List() {} + + bool SendRequest(const std::string &continuationToken); + bool Results(std::vector &objInfo, + std::vector &commonPrefixes, std::string &ct, + std::string &errMsg); + + private: + size_t m_maxKeys{1000}; +}; diff --git a/src/S3Directory.cc b/src/S3Directory.cc new file mode 100644 index 0000000..31375b5 --- /dev/null +++ b/src/S3Directory.cc @@ -0,0 +1,249 @@ +/*************************************************************** + * + * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +#include "S3Directory.hh" +#include "S3Commands.hh" +#include "logging.hh" +#include "stl_string_utils.hh" + +#include + +#include + +#include + +void S3Directory::Reset() { + m_opened = false; + m_ct = ""; + m_idx = 0; + m_objInfo.clear(); + m_commonPrefixes.clear(); + m_stat_buf = nullptr; + m_ai = S3AccessInfo(); + m_object = ""; +} + +int S3Directory::ListS3Dir(const std::string &ct) { + AmazonS3List listCommand(m_ai, m_object, 1000, m_log); + auto res = listCommand.SendRequest(ct); + if (!res) { + switch (listCommand.getResponseCode()) { + case 404: + return -ENOENT; + case 500: + return -EIO; + case 403: + return -EPERM; + default: + return -EIO; + } + } + std::string errMsg; + m_idx = 0; + res = listCommand.Results(m_objInfo, m_commonPrefixes, m_ct, errMsg); + if (!res) { + m_log.Log(XrdHTTPServer::Warning, "Opendir", + "Failed to parse S3 results:", errMsg.c_str()); + return -EIO; + } + if (m_log.getMsgMask() & XrdHTTPServer::Debug) { + std::stringstream ss; + ss << "Directory listing returned " << m_objInfo.size() + << " objects and " << m_commonPrefixes.size() << " prefixes"; + m_log.Log(XrdHTTPServer::Debug, "Stat", ss.str().c_str()); + } + + m_opened = true; + return 0; +} + +int S3Directory::Opendir(const char *path, XrdOucEnv &env) { + if (m_opened) { + return -EBADF; + } + Reset(); + + std::string exposedPath, object; + int rv = m_fs.parsePath(path, exposedPath, object); + if (rv != 0) { + return rv; + } + + auto ai = m_fs.getS3AccessInfo(exposedPath, object); + if (!ai) { + return -ENOENT; + } + + if (ai->getS3BucketName().empty()) { + return -EINVAL; + } + + m_ai = *ai; + // If the prefix is "foo" and there's an object "foo/bar", then + // the lookup only returns "foo/" (as it's the longest common prefix prior + // to a delimiter). Instead, we want to query for "foo/", which returns + // "foo/bar". + if (!object.empty() && (object[object.size() - 1] != '/')) { + object += "/"; + } + m_object = object; + + return ListS3Dir(""); +} + +int S3Directory::Readdir(char *buff, int blen) { + if (!m_opened) { + return -EBADF; + } + + if (m_stat_buf) { + memset(m_stat_buf, '\0', sizeof(struct stat)); + } + + // m_idx encodes the location inside the current directory. + // - m_idx in [0, m_objInfo.size) means return a "file" from the object + // list. + // - m_idx == m_objectInfo.size means return the first entry in the + // directory/common prefix list. + // - m_idx in (m_commonPrefixes.size, -1] means return an entry from the + // common prefix list. + // - m_idx == -m_commonPrefixes.size means that all the path elements have + // been consumed. + // + // If all the paths entry have been consumed, then if the continuation token + // is set, list more objects in the bucket. If it's unset, then we've + // iterated through all the bucket contents. + auto idx = m_idx; + if (m_objInfo.empty() && m_commonPrefixes.empty()) { + *buff = '\0'; + return XrdOssOK; + } else if (idx >= 0 && idx < static_cast(m_objInfo.size())) { + m_idx++; + std::string full_name = m_objInfo[idx].m_key; + auto lastSlashIdx = full_name.rfind("/"); + if (lastSlashIdx != std::string::npos) { + full_name.erase(0, lastSlashIdx); + } + trimslashes(full_name); + strncpy(buff, full_name.c_str(), blen); + if (buff[blen - 1] != '\0') { + buff[blen - 1] = '\0'; + return -ENOMEM; + } + if (m_stat_buf) { + m_stat_buf->st_mode = 0x0600 | S_IFREG; + m_stat_buf->st_nlink = 1; + m_stat_buf->st_size = m_objInfo[idx].m_size; + } + } else if (idx < 0 && + -idx == static_cast(m_commonPrefixes.size())) { + if (!m_ct.empty()) { + // Get the next set of results from S3. + m_idx = 0; + m_objInfo.clear(); + m_commonPrefixes.clear(); + if (m_stat_buf) { + memset(m_stat_buf, '\0', sizeof(struct stat)); + } + auto rv = ListS3Dir(m_ct); + if (rv != 0) { + m_opened = false; + return rv; + } + // Recurse to parse the fresh results. + return Readdir(buff, blen); + } + *buff = '\0'; + return XrdOssOK; + } else if (idx == static_cast(m_objInfo.size()) || + -idx < static_cast(m_commonPrefixes.size())) { + if (m_commonPrefixes.empty()) { + if (!m_ct.empty()) { + // Get the next set of results from S3. + m_idx = 0; + m_objInfo.clear(); + m_commonPrefixes.clear(); + if (m_stat_buf) { + memset(m_stat_buf, '\0', sizeof(struct stat)); + } + auto rv = ListS3Dir(m_ct); + if (rv != 0) { + m_opened = false; + return rv; + } + // Recurse to parse the fresh results. + return Readdir(buff, blen); + } + *buff = '\0'; + return XrdOssOK; + } + if (idx == static_cast(m_objInfo.size())) { + m_idx = -1; + idx = 0; + } else { + idx = -m_idx; + m_idx--; + } + std::string full_name = m_commonPrefixes[idx]; + trimslashes(full_name); + auto lastSlashIdx = full_name.rfind("/"); + if (lastSlashIdx != std::string::npos) { + full_name.erase(0, lastSlashIdx); + } + trimslashes(full_name); + strncpy(buff, full_name.c_str(), blen); + if (buff[blen - 1] != '\0') { + buff[blen - 1] = '\0'; + return -ENOMEM; + } + if (m_stat_buf) { + m_stat_buf->st_mode = 0x0700 | S_IFDIR; + m_stat_buf->st_nlink = 0; + m_stat_buf->st_size = 4096; + } + } else { + return -EBADF; + } + + if (m_stat_buf) { + m_stat_buf->st_uid = 1; + m_stat_buf->st_gid = 1; + m_stat_buf->st_mtime = m_stat_buf->st_ctime = m_stat_buf->st_atime = 0; + m_stat_buf->st_dev = 0; + m_stat_buf->st_ino = 1; // If both st_dev and st_ino are 0, then XRootD + // interprets that as an unavailable file. + } + return XrdOssOK; +} + +int S3Directory::StatRet(struct stat *buf) { + if (!m_opened) { + return -EBADF; + } + + m_stat_buf = buf; + return XrdOssOK; +} + +int S3Directory::Close(long long *retsz) { + if (!m_opened) { + return -EBADF; + } + Reset(); + return XrdOssOK; +} diff --git a/src/S3Directory.hh b/src/S3Directory.hh index 3287eda..b15207d 100644 --- a/src/S3Directory.hh +++ b/src/S3Directory.hh @@ -19,27 +19,41 @@ #pragma once #include "HTTPDirectory.hh" +#include "S3Commands.hh" +#include "S3FileSystem.hh" -// Leaving in duplicate definitions for now. It remains -// to be seen if we'll need to change these and have specific -// behaviors for either HTTP or S3 variants in the future. +#include +#include + +class XrdSysError; class S3Directory : public HTTPDirectory { public: - S3Directory(XrdSysError &log) - : HTTPDirectory(log) - // m_log(log) - {} + S3Directory(XrdSysError &log, const S3FileSystem &fs) + : HTTPDirectory(log), m_fs(fs) {} virtual ~S3Directory() {} - virtual int Opendir(const char *path, XrdOucEnv &env) override { - return -ENOSYS; - } + virtual int Opendir(const char *path, XrdOucEnv &env) override; + + int Readdir(char *buff, int blen) override; + + int StatRet(struct stat *statStruct) override; - int Readdir(char *buff, int blen) override { return -ENOSYS; } + int Close(long long *retsz = 0) override; - int StatRet(struct stat *statStruct) override { return -ENOSYS; } + private: + void Reset(); + int ListS3Dir(const std::string &ct); - int Close(long long *retsz = 0) override { return -ENOSYS; } + bool m_opened{false}; + ssize_t m_idx{0}; + std::vector m_objInfo; + std::vector m_commonPrefixes; + std::string m_prefix; + std::string m_ct; + std::string m_object; + const S3FileSystem &m_fs; + S3AccessInfo m_ai; + struct stat *m_stat_buf{nullptr}; }; diff --git a/src/S3File.cc b/src/S3File.cc index 6b7ffce..e90e5ca 100644 --- a/src/S3File.cc +++ b/src/S3File.cc @@ -48,81 +48,31 @@ XrdVERSIONINFO(XrdOssGetFileSystem, S3); S3File::S3File(XrdSysError &log, S3FileSystem *oss) : m_log(log), m_oss(oss), content_length(0), last_modified(0) {} -int parse_path(const S3FileSystem &fs, const char *fullPath, - std::string &exposedPath, std::string &object) { - // - // Check the path for validity. - // - std::filesystem::path p(fullPath); - auto pathComponents = p.begin(); - - // Iterate through components of the fullPath until we either find a match - // or we've reached the end of the path. - std::filesystem::path currentPath = *pathComponents; - while (pathComponents != p.end()) { - if (fs.exposedPathExists(currentPath.string())) { - exposedPath = currentPath.string(); - break; - } - ++pathComponents; - if (pathComponents != p.end()) { - currentPath /= *pathComponents; - } else { - return -ENOENT; - } - } - - // Objects names may contain path separators. - ++pathComponents; - if (pathComponents == p.end()) { - return -ENOENT; - } - - std::filesystem::path objectPath = *pathComponents++; - for (; pathComponents != p.end(); ++pathComponents) { - objectPath /= (*pathComponents); +int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { + if (m_log.getMsgMask() & XrdHTTPServer::Debug) { + m_log.Log(LogMask::Warning, "S3File::Open", "Opening file", path); } - object = objectPath.string(); - - fprintf(stderr, "object = %s\n", object.c_str()); - - return 0; -} -int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { std::string exposedPath, object; - int rv = parse_path(*m_oss, path, exposedPath, object); + auto rv = m_oss->parsePath(path, exposedPath, object); if (rv != 0) { return rv; } - if (!m_oss->exposedPathExists(exposedPath)) + auto ai = m_oss->getS3AccessInfo(exposedPath, object); + if (!ai) { return -ENOENT; + } + if (ai->getS3BucketName().empty()) { + return -EINVAL; + } - std::string configured_s3_region = m_oss->getS3Region(exposedPath); - std::string configured_s3_service_url = m_oss->getS3ServiceURL(exposedPath); - std::string configured_s3_access_key = - m_oss->getS3AccessKeyFile(exposedPath); - std::string configured_s3_secret_key = - m_oss->getS3SecretKeyFile(exposedPath); - std::string configured_s3_bucket_name = m_oss->getS3BucketName(exposedPath); - std::string configured_s3_url_style = m_oss->getS3URLStyle(); - - // We used to query S3 here to see if the object existed, but of course - // if you're creating a file on upload, you don't care. - - this->s3_object_name = object; - this->s3_bucket_name = configured_s3_bucket_name; - this->s3_service_url = configured_s3_service_url; - this->s3_access_key = configured_s3_access_key; - this->s3_secret_key = configured_s3_secret_key; - this->s3_url_style = configured_s3_url_style; + m_ai = *ai; + m_object = object; // This flag is not set when it's going to be a read operation // so we check if the file exists in order to be able to return a 404 if (!Oflag) { - AmazonS3Head head(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Head head(m_ai, m_object, m_log); if (!head.SendRequest()) { return -ENOENT; @@ -133,9 +83,7 @@ int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { } ssize_t S3File::Read(void *buffer, off_t offset, size_t size) { - AmazonS3Download download(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Download download(m_ai, m_object, m_log); if (!download.SendRequest(offset, size)) { std::stringstream ss; @@ -151,9 +99,7 @@ ssize_t S3File::Read(void *buffer, off_t offset, size_t size) { } int S3File::Fstat(struct stat *buff) { - AmazonS3Head head(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Head head(m_ai, m_object, m_log); if (!head.SendRequest()) { // SendRequest() returns false for all errors, including ones @@ -218,9 +164,7 @@ int S3File::Fstat(struct stat *buff) { } ssize_t S3File::Write(const void *buffer, off_t offset, size_t size) { - AmazonS3Upload upload(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Upload upload(m_ai, m_object, m_log); std::string payload((char *)buffer, size); if (!upload.SendRequest(payload, offset, size)) { diff --git a/src/S3File.hh b/src/S3File.hh index 33a3244..e42fcb1 100644 --- a/src/S3File.hh +++ b/src/S3File.hh @@ -98,12 +98,8 @@ class S3File : public XrdOssDF { XrdSysError &m_log; S3FileSystem *m_oss; - std::string s3_service_url; - std::string s3_bucket_name; - std::string s3_object_name; - std::string s3_access_key; - std::string s3_secret_key; - std::string s3_url_style; + std::string m_object; + S3AccessInfo m_ai; size_t content_length; time_t last_modified; diff --git a/src/S3FileSystem.cc b/src/S3FileSystem.cc index 4c5668d..e78f501 100644 --- a/src/S3FileSystem.cc +++ b/src/S3FileSystem.cc @@ -20,6 +20,7 @@ #include "S3AccessInfo.hh" #include "S3Directory.hh" #include "S3File.hh" +#include "logging.hh" #include "shortfile.hh" #include "stl_string_utils.hh" @@ -28,7 +29,9 @@ #include #include +#include #include +#include #include #include @@ -73,13 +76,13 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { std::string value; std::string attribute; Config.Attach(cfgFD); - S3AccessInfo *newAccessInfo = new S3AccessInfo(); + std::shared_ptr newAccessInfo(new S3AccessInfo()); std::string exposedPath; while ((temporary = Config.GetMyFirstWord())) { attribute = temporary; temporary = Config.GetWord(); if (attribute == "s3.end") { - s3_access_map[exposedPath] = newAccessInfo; + m_s3_access_map[exposedPath] = newAccessInfo; if (newAccessInfo->getS3ServiceName().empty()) { m_log.Emsg("Config", "s3.service_name not specified"); return false; @@ -103,7 +106,7 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { return false; } } - newAccessInfo = new S3AccessInfo(); + newAccessInfo.reset(new S3AccessInfo()); exposedPath = ""; continue; } @@ -164,18 +167,20 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { newAccessInfo->setS3SecretKeyFile(value); else if (attribute == "s3.service_url") newAccessInfo->setS3ServiceUrl(value); - else if (attribute == "s3.url_style") - this->s3_url_style = value; + else if (attribute == "s3.url_style") { + s3_url_style = value; + newAccessInfo->setS3UrlStyle(s3_url_style); + } } - if (this->s3_url_style.empty()) { + if (s3_url_style.empty()) { m_log.Emsg("Config", "s3.url_style not specified"); return false; } else { // We want this to be case-insensitive. - toLower(this->s3_url_style); + toLower(s3_url_style); } - if (this->s3_url_style != "virtual" && this->s3_url_style != "path") { + if (s3_url_style != "virtual" && s3_url_style != "path") { m_log.Emsg( "Config", "invalid s3.url_style specified. Must be 'virtual' or 'path'"); @@ -196,7 +201,7 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { // Object Allocation Functions // XrdOssDF *S3FileSystem::newDir(const char *user) { - return new S3Directory(m_log); + return new S3Directory(m_log, *this); } XrdOssDF *S3FileSystem::newFile(const char *user) { @@ -205,23 +210,126 @@ XrdOssDF *S3FileSystem::newFile(const char *user) { int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, XrdOucEnv *env) { - std::string error; + m_log.Log(XrdHTTPServer::Debug, "Stat", "Stat'ing path", path); - m_log.Emsg("Stat", "Stat'ing path", path); + std::string exposedPath, object; + auto rv = parsePath(path, exposedPath, object); + if (rv != 0) { + return rv; + } + auto ai = getS3AccessInfo(exposedPath, object); + if (!ai) { + m_log.Log(XrdHTTPServer::Info, "Stat", + "Prefix not configured for Stat"); + return -ENOENT; + } + if (ai->getS3BucketName().empty()) { + return -EINVAL; + } - S3File s3file(m_log, this); - int rv = s3file.Open(path, 0, (mode_t)0, *env); - if (rv) { - m_log.Emsg("Stat", "Failed to open path:", path); + trimslashes(object); + AmazonS3List listCommand(*ai, object, 1, m_log); + auto res = listCommand.SendRequest(""); + if (!res) { + auto httpCode = listCommand.getResponseCode(); + if (httpCode == 0) { + if (m_log.getMsgMask() & XrdHTTPServer::Info) { + std::stringstream ss; + ss << "Failed to stat path " << path + << "; error: " << listCommand.getErrorMessage() + << " (code=" << listCommand.getErrorCode() << ")"; + m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); + } + return -EIO; + } else { + if (m_log.getMsgMask() & XrdHTTPServer::Info) { + std::stringstream ss; + ss << "Failed to stat path " << path << "; response code " + << httpCode; + m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); + } + switch (httpCode) { + case 404: + return -ENOENT; + case 500: + return -EIO; + case 403: + return -EPERM; + default: + return -EIO; + } + } } - // Assume that S3File::FStat() doesn't write to buff unless it succeeds. - rv = s3file.Fstat(buff); - if (rv != 0) { - formatstr(error, "File %s not found.", path); - m_log.Emsg("Stat", error.c_str()); + + std::string errMsg; + std::vector objInfo; + std::vector commonPrefixes; + std::string ct; + res = listCommand.Results(objInfo, commonPrefixes, ct, errMsg); + if (!res) { + m_log.Log(XrdHTTPServer::Warning, "Stat", + "Failed to parse S3 results:", errMsg.c_str()); + return -EIO; + } + if (m_log.getMsgMask() & XrdHTTPServer::Debug) { + std::stringstream ss; + ss << "Stat on object returned " << objInfo.size() << " objects and " + << commonPrefixes.size() << " prefixes"; + m_log.Log(XrdHTTPServer::Debug, "Stat", ss.str().c_str()); + } + + if (object.empty()) { + buff->st_mode = 0700 | S_IFDIR; + buff->st_nlink = 0; + buff->st_uid = 1; + buff->st_gid = 1; + buff->st_size = 4096; + buff->st_mtime = buff->st_atime = buff->st_ctime = 0; + buff->st_dev = 0; + buff->st_ino = 1; + return 0; + } + + bool foundObj = false; + size_t objSize = 0; + for (const auto &obj : objInfo) { + if (obj.m_key == object) { + foundObj = true; + objSize = obj.m_size; + break; + } + } + if (foundObj) { + buff->st_mode = 0600 | S_IFREG; + buff->st_nlink = 1; + buff->st_uid = buff->st_gid = 1; + buff->st_size = objSize; + buff->st_mtime = buff->st_atime = buff->st_ctime = 0; + buff->st_dev = 0; + buff->st_ino = 1; + return 0; + } + + auto desiredPrefix = object + "/"; + bool foundPrefix = false; + for (const auto &prefix : commonPrefixes) { + if (prefix == desiredPrefix) { + foundPrefix = true; + break; + } + } + if (!foundPrefix) { return -ENOENT; } + buff->st_mode = 0700 | S_IFDIR; + buff->st_nlink = 0; + buff->st_uid = 1; + buff->st_gid = 1; + buff->st_size = 4096; + buff->st_mtime = buff->st_atime = buff->st_ctime = 0; + buff->st_dev = 0; + buff->st_ino = 1; return 0; } @@ -229,7 +337,7 @@ int S3FileSystem::Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts) { // Is path valid? std::string exposedPath, object; - int rv = parse_path(*this, path, exposedPath, object); + int rv = parsePath(path, exposedPath, object); if (rv != 0) { return rv; } @@ -243,3 +351,66 @@ int S3FileSystem::Create(const char *tid, const char *path, mode_t mode, return 0; } + +int S3FileSystem::parsePath(const char *fullPath, std::string &exposedPath, + std::string &object) const { + // + // Check the path for validity. + // + std::filesystem::path p(fullPath); + auto pathComponents = p.begin(); + + // Iterate through components of the fullPath until we either find a match + // or we've reached the end of the path. + std::filesystem::path currentPath = *pathComponents; + while (pathComponents != p.end()) { + if (exposedPathExists(currentPath.string())) { + exposedPath = currentPath.string(); + break; + } + ++pathComponents; + if (pathComponents != p.end()) { + currentPath /= *pathComponents; + } else { + return -ENOENT; + } + } + + // Objects names may contain path separators. + ++pathComponents; + if (pathComponents == p.end()) { + return -ENOENT; + } + + std::filesystem::path objectPath = *pathComponents++; + for (; pathComponents != p.end(); ++pathComponents) { + objectPath /= (*pathComponents); + } + object = objectPath.string(); + + return 0; +} + +const std::shared_ptr +S3FileSystem::getS3AccessInfo(const std::string &exposedPath, + std::string &object) const { + auto ai = m_s3_access_map.at(exposedPath); + if (!ai) { + return ai; + } + if (ai->getS3BucketName().empty()) { + // Bucket name is embedded in the "object" name. Split it into the + // bucket and "real" object. + std::shared_ptr aiCopy(new S3AccessInfo(*ai)); + auto firstSlashIdx = object.find('/'); + if (firstSlashIdx == std::string::npos) { + aiCopy->setS3BucketName(object); + object = ""; + } else { + aiCopy->setS3BucketName(object.substr(0, firstSlashIdx)); + object = object.substr(firstSlashIdx + 1); + } + return aiCopy; + } + return ai; +} diff --git a/src/S3FileSystem.hh b/src/S3FileSystem.hh index 5cdee6a..95c639e 100644 --- a/src/S3FileSystem.hh +++ b/src/S3FileSystem.hh @@ -101,37 +101,48 @@ class S3FileSystem : public XrdOss { return nullptr; } + // Given a path as seen by XRootD, split it into the configured prefix and + // the object within the prefix. + // + // The returned `exposedPath` can be later used with the `get*` functions to + // fetch the required S3 configuration. + int parsePath(const char *fullPath, std::string &exposedPath, + std::string &object) const; + bool exposedPathExists(const std::string &exposedPath) const { - return s3_access_map.count(exposedPath) > 0; + return m_s3_access_map.count(exposedPath) > 0; } const std::string &getS3ServiceName(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3ServiceName(); + return m_s3_access_map.at(exposedPath)->getS3ServiceName(); } const std::string &getS3Region(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3Region(); + return m_s3_access_map.at(exposedPath)->getS3Region(); } const std::string &getS3ServiceURL(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3ServiceUrl(); + return m_s3_access_map.at(exposedPath)->getS3ServiceUrl(); } const std::string &getS3BucketName(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3BucketName(); + return m_s3_access_map.at(exposedPath)->getS3BucketName(); } const std::string & getS3AccessKeyFile(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3AccessKeyFile(); + return m_s3_access_map.at(exposedPath)->getS3AccessKeyFile(); } const std::string & getS3SecretKeyFile(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3SecretKeyFile(); + return m_s3_access_map.at(exposedPath)->getS3SecretKeyFile(); } const std::string &getS3URLStyle() const { return s3_url_style; } + const std::shared_ptr + getS3AccessInfo(const std::string &exposedPath, std::string &object) const; + private: XrdOucEnv *m_env; XrdSysError m_log; bool handle_required_config(const char *desired_name, const std::string &source); - std::map s3_access_map; + std::map> m_s3_access_map; std::string s3_url_style; }; diff --git a/src/shortfile.cc b/src/shortfile.cc index ce6b022..257c4ee 100644 --- a/src/shortfile.cc +++ b/src/shortfile.cc @@ -83,3 +83,34 @@ bool readShortFile(const std::string &fileName, std::string &contents) { return true; } + +bool writeShortFile(const std::string &fileName, std::string &contents, + int flags) { + int fd = open(fileName.c_str(), O_WRONLY | flags, 0600); + if (fd < 0) { + return false; + } + + auto ptr = &contents[0]; + ssize_t nwrite; + auto nleft = contents.size(); + + while (nleft > 0) { + REISSUE_WRITE: + nwrite = write(fd, ptr, nleft); + if (nwrite < 0) { + /* error happened, ignore if EINTR, otherwise inform the caller */ + if (errno == EINTR) { + goto REISSUE_WRITE; + } + close(fd); + return false; + } + + nleft -= nwrite; + ptr += nwrite; + } + + close(fd); + return true; +} diff --git a/src/shortfile.hh b/src/shortfile.hh index 34af1e8..5c5f81d 100644 --- a/src/shortfile.hh +++ b/src/shortfile.hh @@ -21,3 +21,6 @@ #include bool readShortFile(const std::string &fileName, std::string &contents); + +bool writeShortFile(const std::string &fileName, std::string &contents, + int flags); diff --git a/src/stl_string_utils.cc b/src/stl_string_utils.cc index 4623b6e..9c3e3dc 100644 --- a/src/stl_string_utils.cc +++ b/src/stl_string_utils.cc @@ -146,3 +146,40 @@ int formatstr_cat(std::string &s, const char *format, ...) { va_end(args); return r; } + +std::string urlquote(const std::string input) { + std::string output; + output.reserve(3 * input.size()); + for (char val : input) { + if ((val >= 48 || val <= 57) || // Digits 0-9 + (val >= 65 || val <= 90) || // Uppercase A-Z + (val >= 97 || val <= 122) || // Lowercase a-z + (val == 95 || val == 46 || val == 45 || val == 126 || + val == 47)) // '_.-~/' + { + output += val; + } else { + output += "%" + std::to_string(val); + } + } + return output; +} + +void trimslashes(std::string &path) { + if (path.empty()) { + return; + } + size_t begin = 0; + while (begin < path.length() && (path[begin] == '/')) { + ++begin; + } + + auto end = path.length() - 1; + while (end >= 0 && end >= begin && (path[end] == '/')) { + --end; + } + + if (begin != 0 || end != (path.length()) - 1) { + path = path.substr(begin, (end - begin) + 1); + } +} diff --git a/src/stl_string_utils.hh b/src/stl_string_utils.hh index 9222f52..d119e4e 100644 --- a/src/stl_string_utils.hh +++ b/src/stl_string_utils.hh @@ -37,3 +37,19 @@ int formatstr(std::string &s, const char *format, ...) CHECK_PRINTF_FORMAT(2, 3); int formatstr_cat(std::string &s, const char *format, ...) CHECK_PRINTF_FORMAT(2, 3); + +// Given an input string, quote it to a form that is safe +// for embedding in a URL query parameter. +// +// Letters, digits, and the characters '_.-~/' are never +// quoted; otherwise, the byte is represented with its percent-encoded +// ASCII representation (e.g., ' ' becomes %20) +std::string urlquote(const std::string input); + +// Trim the slash(es) from a given object name +// +// foo/bar/ -> foo/bar +// bar/baz -> bar/baz +// foo/bar/// -> foo/bar +// /a/b -> a/b +void trimslashes(std::string &path); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 438758c..f06ebe7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,11 +2,12 @@ add_executable( s3-gtest s3_tests.cc ../src/AWSv4-impl.cc ../src/logging.cc ../src/S3AccessInfo.cc + ../src/S3Directory.cc ../src/S3File.cc ../src/S3FileSystem.cc ../src/shortfile.cc ../src/stl_string_utils.cc - ../src/HTTPCommands.cc + ../src/HTTPCommands.cc ../src/S3Commands.cc ) @@ -20,15 +21,12 @@ add_executable( http-gtest http_tests.cc ) -if( NOT XROOTD_PLUGINS_EXTERNAL_GTEST ) - add_dependencies(s3-gtest gtest) - add_dependencies(http-gtest gtest) - include_directories("${PROJECT_SOURCE_DIR}/vendor/gtest/googletest/include") -endif() - if(XROOTD_PLUGINS_EXTERNAL_GTEST) set(LIBGTEST "gtest") else() + add_dependencies(s3-gtest gtest) + add_dependencies(http-gtest gtest) + include_directories("${PROJECT_SOURCE_DIR}/vendor/gtest/googletest/include") set(LIBGTEST "${CMAKE_BINARY_DIR}/external/gtest/src/gtest-build/lib/libgtest.a") endif() diff --git a/test/s3_tests.cc b/test/s3_tests.cc index 8b9fdc6..0711c3b 100644 --- a/test/s3_tests.cc +++ b/test/s3_tests.cc @@ -17,7 +17,10 @@ ***************************************************************/ #include "../src/S3Commands.hh" +#include "../src/S3FileSystem.hh" +#include "../src/shortfile.hh" +#include #include #include #include @@ -62,6 +65,220 @@ TEST(TestS3URLGeneration, Test1) { ASSERT_EQ(generatedHostUrl, "https://s3-service.com:443/test-object"); } +class FileSystemFixtureBase : public testing::Test { + protected: + FileSystemFixtureBase() + : m_log(new XrdSysLogger(2, 0)) // Log to stderr, no log rotation + {} + + void SetUp() override { + setenv("XRDINSTANCE", "xrootd", 1); + char tmp_configfn[] = "/tmp/xrootd-s3-gtest.cfg.XXXXXX"; + auto result = mkstemp(tmp_configfn); + ASSERT_NE(result, -1) << "Failed to create temp file (" + << strerror(errno) << ", errno=" << errno << ")"; + m_configfn = std::string(tmp_configfn); + + auto contents = GetConfig(); + ASSERT_FALSE(contents.empty()); + ASSERT_TRUE(writeShortFile(m_configfn, contents, 0)) + << "Failed to write to temp file (" << strerror(errno) + << ", errno=" << errno << ")"; + } + + void TearDown() override { + if (!m_configfn.empty()) { + auto rv = unlink(m_configfn.c_str()); + ASSERT_EQ(rv, 0) << "Failed to delete temp file (" + << strerror(errno) << ", errno=" << errno << ")"; + } + } + + virtual std::string GetConfig() = 0; + + std::string m_configfn; + std::unique_ptr m_log; +}; + +class FileSystemS3VirtualBucket : public FileSystemFixtureBase { + protected: + virtual std::string GetConfig() override { + return R"( +s3.begin +s3.path_name /test +s3.bucket_name genome-browser +s3.service_name s3.amazonaws.com +s3.region us-east-1 +s3.service_url https://s3.us-east-1.amazonaws.com +s3.url_style virtual +s3.end +)"; + } +}; + +class FileSystemS3VirtualNoBucket : public FileSystemFixtureBase { + protected: + virtual std::string GetConfig() override { + return R"( +s3.begin +s3.path_name /test +s3.service_name s3.amazonaws.com +s3.region us-east-1 +s3.service_url https://s3.us-east-1.amazonaws.com +s3.url_style virtual +s3.end +)"; + } +}; + +class FileSystemS3PathBucket : public FileSystemFixtureBase { + protected: + virtual std::string GetConfig() override { + return R"( +s3.begin +s3.path_name /test +s3.service_name s3.amazonaws.com +s3.region us-east-1 +s3.bucket_name genome-browser +s3.service_url https://s3.us-east-1.amazonaws.com +s3.url_style path +s3.end +)"; + } +}; + +class FileSystemS3PathNoBucket : public FileSystemFixtureBase { + protected: + virtual std::string GetConfig() override { + return R"( +s3.begin +s3.path_name /test +s3.service_name s3.amazonaws.com +s3.region us-east-1 +s3.service_url https://s3.us-east-1.amazonaws.com +s3.url_style path +s3.end +)"; + } +}; + +void TestDirectoryContents(S3FileSystem &fs, const std::string &dirname) { + std::unique_ptr dir(fs.newDir()); + ASSERT_TRUE(dir); + + XrdOucEnv env; + auto rv = dir->Opendir(dirname.c_str(), env); + ASSERT_EQ(rv, 0); + + struct stat buf; + ASSERT_EQ(dir->StatRet(&buf), 0); + + std::vector name; + name.resize(255); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "cellbrowser.json.bak"); + ASSERT_EQ(buf.st_mode & S_IFREG, + static_cast(S_IFREG)); + ASSERT_EQ(buf.st_size, 672); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "dataset.json"); + ASSERT_EQ(buf.st_mode & S_IFREG, + static_cast(S_IFREG)); + ASSERT_EQ(buf.st_size, 1847); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "desc.json"); + ASSERT_EQ(buf.st_mode & S_IFREG, + static_cast(S_IFREG)); + ASSERT_EQ(buf.st_size, 1091); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "all"); + ASSERT_EQ(buf.st_mode & S_IFDIR, + static_cast(S_IFDIR)); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "by-organ"); + ASSERT_EQ(buf.st_mode & S_IFDIR, + static_cast(S_IFDIR)); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "func-compart"); + ASSERT_EQ(buf.st_mode & S_IFDIR, + static_cast(S_IFDIR)); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), ""); + + ASSERT_EQ(dir->Close(), 0); +} + +TEST_F(FileSystemS3VirtualBucket, Create) { + EXPECT_NO_THROW( + { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); }); +} + +TEST_F(FileSystemS3VirtualBucket, Stat) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + struct stat buff; + auto rv = fs.Stat("/test/cells/tabula-sapiens/cellbrowser.json.bak", &buff); + ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; +} + +TEST_F(FileSystemS3VirtualBucket, List) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + TestDirectoryContents(fs, "/test/cells/tabula-sapiens"); +} + +TEST_F(FileSystemS3VirtualNoBucket, Stat) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + struct stat buff; + auto rv = fs.Stat( + "/test/genome-browser/cells/tabula-sapiens/cellbrowser.json.bak", + &buff); + ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; +} + +TEST_F(FileSystemS3VirtualNoBucket, List) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + TestDirectoryContents(fs, "/test/genome-browser/cells/tabula-sapiens"); +} + +TEST_F(FileSystemS3PathBucket, Stat) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + struct stat buff; + auto rv = fs.Stat("/test/cells/tabula-sapiens/cellbrowser.json.bak", &buff); + ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; +} + +TEST_F(FileSystemS3PathBucket, List) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + TestDirectoryContents(fs, "/test/cells/tabula-sapiens"); +} + +TEST_F(FileSystemS3PathNoBucket, Stat) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + struct stat buff; + auto rv = fs.Stat( + "/test/genome-browser/cells/tabula-sapiens/cellbrowser.json.bak", + &buff); + ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; +} + +TEST_F(FileSystemS3PathNoBucket, List) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + TestDirectoryContents(fs, "/test/genome-browser/cells/tabula-sapiens/"); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/vendor/tinyxml2 b/vendor/tinyxml2 new file mode 160000 index 0000000..312a809 --- /dev/null +++ b/vendor/tinyxml2 @@ -0,0 +1 @@ +Subproject commit 312a8092245df393db14a0b2427457ed2ba75e1b