From 9403474b220fb6fe5663a3a7115baa6963fab7fc Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Wed, 22 May 2024 13:41:55 -0500 Subject: [PATCH 01/14] Add tinyxml2 as a dependency --- .gitmodules | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitmodules b/.gitmodules index 76e2bbf..0c2d89b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ [submodule "vendor/gtest"] path = vendor/gtest url = https://github.com/google/googletest.git + +[submodule "vendor/tinyxml2"] + path = vendor/tinyxml2 + url = https://github.com/leethomason/tinyxml2.git From bf9ba17872fec7c05807889e322841d52d2bac24 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Wed, 22 May 2024 22:40:59 -0500 Subject: [PATCH 02/14] Implement directory listings for S3 class Adds a new request type, list, and interprets the results as a directory listing. --- CMakeLists.txt | 11 +- src/HTTPCommands.cc | 16 +-- src/S3AccessInfo.cc | 6 ++ src/S3AccessInfo.hh | 12 ++- src/S3Commands.cc | 124 +++++++++++++++++++---- src/S3Commands.hh | 43 +++++++- src/S3Directory.cc | 216 ++++++++++++++++++++++++++++++++++++++++ src/S3Directory.hh | 40 +++++--- src/S3File.cc | 85 +++------------- src/S3File.hh | 6 +- src/S3FileSystem.cc | 136 ++++++++++++++++++++++--- src/S3FileSystem.hh | 12 +++ src/stl_string_utils.cc | 37 +++++++ src/stl_string_utils.hh | 16 +++ test/CMakeLists.txt | 1 + vendor/tinyxml2 | 1 + 16 files changed, 620 insertions(+), 142 deletions(-) create mode 100644 src/S3Directory.cc create mode 160000 vendor/tinyxml2 diff --git a/CMakeLists.txt b/CMakeLists.txt index b29f2e9..43ff635 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,12 +47,19 @@ if(NOT APPLE) SET( CMAKE_MODULE_LINKER_FLAGS "-Wl,--no-undefined") endif() +if( NOT XROOTD_EXTERNAL_TINYXML2 ) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + add_subdirectory(vendor/tinyxml2) +else() + find_package(tinyxml2::tinyxml2) +endif() + include_directories(${XROOTD_INCLUDES} ${CURL_INCLUDE_DIRS} ${LIBCRYPTO_INCLUDE_DIRS}) -add_library(XrdS3 SHARED src/S3File.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) +add_library(XrdS3 SHARED src/S3File.cc src/S3Directory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) add_library(XrdHTTPServer SHARED src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) -target_link_libraries(XrdS3 -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES}) +target_link_libraries(XrdS3 -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES} tinyxml2::tinyxml2) target_link_libraries(XrdHTTPServer -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES}) # The CMake documentation strongly advises against using these macros; instead, the pkg_check_modules diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index 2b3fa51..7b2073e 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -168,6 +168,8 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, const std::string &uri, const std::string &payload) { + m_log.Log(XrdHTTPServer::Debug, "SendRequest", "Sending HTTP request", + uri.c_str()); CURLcode rv = curl_global_init(CURL_GLOBAL_ALL); if (rv != 0) { this->errorCode = "E_CURL_LIB"; @@ -317,20 +319,6 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, CAFile = x509_ca_file; } - if (CAPath.empty()) { - char *soap_ssl_ca_dir = getenv("GAHP_SSL_CADIR"); - if (soap_ssl_ca_dir != NULL) { - CAPath = soap_ssl_ca_dir; - } - } - - if (CAFile.empty()) { - char *soap_ssl_ca_file = getenv("GAHP_SSL_CAFILE"); - if (soap_ssl_ca_file != NULL) { - CAFile = soap_ssl_ca_file; - } - } - if (!CAPath.empty()) { SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_CAPATH, CAPath.c_str()); } diff --git a/src/S3AccessInfo.cc b/src/S3AccessInfo.cc index 8664833..d0351a1 100644 --- a/src/S3AccessInfo.cc +++ b/src/S3AccessInfo.cc @@ -49,3 +49,9 @@ const std::string &S3AccessInfo::getS3SecretKeyFile() const { void S3AccessInfo::setS3SecretKeyFile(const std::string &s3SecretKeyFile) { s3_secret_key_file = s3SecretKeyFile; } + +const std::string &S3AccessInfo::getS3UrlStyle() const { return s3_url_style; } + +void S3AccessInfo::setS3UrlStyle(const std::string &s3UrlStyle) { + s3_url_style = s3UrlStyle; +} diff --git a/src/S3AccessInfo.hh b/src/S3AccessInfo.hh index 2770ee0..b45e112 100644 --- a/src/S3AccessInfo.hh +++ b/src/S3AccessInfo.hh @@ -2,8 +2,7 @@ // Created by Rich Wellner on 2/29/24. // -#ifndef XROOTD_S3_HTTP_S3ACCESSINFO_HH -#define XROOTD_S3_HTTP_S3ACCESSINFO_HH +#pragma once #include @@ -33,6 +32,12 @@ class S3AccessInfo { void setS3SecretKeyFile(const std::string &s3SecretKeyFile); + const std::string &getS3UrlStyle() const; + + void setS3UrlStyle(const std::string &s3UrlStyle); + + const int getS3SignatureVersion() const {return 4;} + private: std::string s3_bucket_name; std::string s3_service_name; @@ -40,6 +45,5 @@ class S3AccessInfo { std::string s3_service_url; std::string s3_access_key_file; std::string s3_secret_key_file; + std::string s3_url_style; }; - -#endif // XROOTD_S3_HTTP_S3ACCESSINFO_HH diff --git a/src/S3Commands.cc b/src/S3Commands.cc index dc12889..04e1e83 100644 --- a/src/S3Commands.cc +++ b/src/S3Commands.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -44,8 +45,6 @@ bool AmazonRequest::SendRequest() { default: this->errorCode = "E_INTERNAL"; this->errorMessage = "Invalid signature version."; - // dprintf( D_ALWAYS, "Invalid signature version (%d), failing.\n", - // signatureVersion ); return false; } } @@ -57,7 +56,8 @@ std::string AmazonRequest::canonicalizeQueryString() { // Takes in the configured `s3.service_url` and uses the bucket/object requested // to generate the host URL, as well as the canonical URI (which is the path to // the object). -bool AmazonRequest::parseURL(const std::string &url, std::string &path) { +bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path, + std::string &path) { auto i = url.find("://"); if (i == std::string::npos) { return false; @@ -76,8 +76,10 @@ bool AmazonRequest::parseURL(const std::string &url, std::string &path) { // for exporting many buckets from a single endpoint. if (bucket.empty()) { path = "/" + object; + bucket_path = "/" + object.substr(0, object.find('/')); } else { path = "/" + bucket + "/" + object; + bucket_path = bucket; } } else { // In virtual-style requests, the host should be determined as @@ -85,6 +87,7 @@ bool AmazonRequest::parseURL(const std::string &url, std::string &path) { // :// up until the last /, but with appended to the front. host = bucket + "." + substring(url, i + 3); path = "/" + object; + bucket_path = "/"; } return true; @@ -136,6 +139,8 @@ bool AmazonRequest::createV4Signature(const std::string &payload, } trim(saKey); } else { + canonicalQueryString = canonicalizeQueryString(); + requiresSignature = false; // If we don't create a signature, it must not be needed... return true; // If there was no saKey, we need not generate a signature @@ -176,14 +181,8 @@ bool AmazonRequest::createV4Signature(const std::string &payload, canonicalURI = pathEncode(canonicalURI); // The canonical query string is the alphabetically sorted list of - // URI-encoded parameter names '=' values, separated by '&'s. That - // wouldn't be hard to do, but we don't need to, since we send - // everything in the POST body, instead. - std::string canonicalQueryString; - - // This function doesn't (currently) support query parameters, - // but no current caller attempts to use them. - assert((httpVerb != "GET") || query_parameters.size() == 0); + // URI-encoded parameter names '=' values, separated by '&'s. + canonicalQueryString = canonicalizeQueryString(); // The canonical headers must include the Host header, so add that // now if we don't have it. @@ -209,7 +208,6 @@ bool AmazonRequest::createV4Signature(const std::string &payload, if (!doSha256(payload, messageDigest, &mdLength)) { this->errorCode = "E_INTERNAL"; this->errorMessage = "Unable to hash payload."; - // dprintf( D_ALWAYS, "Unable to hash payload, failing.\n" ); return false; } std::string payloadHash; @@ -396,10 +394,6 @@ bool AmazonRequest::sendV4Request(const std::string &payload, return false; } - if (!sendContentSHA) { - // dprintf( D_FULLDEBUG, "Payload is '%s'\n", payload.c_str() ); - } - std::string authorizationValue; if (!createV4Signature(payload, authorizationValue, sendContentSHA)) { if (this->errorCode.empty()) { @@ -417,7 +411,12 @@ bool AmazonRequest::sendV4Request(const std::string &payload, headers["Authorization"] = authorizationValue; } - return sendPreparedRequest(protocol, hostUrl, payload); + // This operation is on the bucket itself; alter the URL + auto url = hostUrl; + if (!canonicalQueryString.empty()) { + url += "?" + canonicalQueryString; + } + return sendPreparedRequest(protocol, url, payload); } // It's stated in the API documentation that you can upload to any region @@ -484,3 +483,94 @@ bool AmazonS3Head::SendRequest() { } // --------------------------------------------------------------------------- + +bool AmazonS3List::SendRequest(const std::string &continuationToken, + size_t max_keys) { + query_parameters["list-type"] = "2"; + query_parameters["delimiter"] = "/"; + query_parameters["prefix"] = urlquote(object); + if (!continuationToken.empty()) { + query_parameters["continuation-token"] = urlquote(continuationToken); + } + query_parameters["max-keys"] = std::to_string(max_keys); + httpVerb = "GET"; + + // Operation is on the bucket itself; alter the URL to remove the object + hostUrl = protocol + "://" + host + bucketPath; + + return SendS3Request(""); +} + +bool AmazonS3List::Results(std::vector &objInfo, + std::vector &commonPrefixes, + std::string &ct, std::string &errMsg) { + tinyxml2::XMLDocument doc; + auto err = doc.Parse(resultString.c_str()); + if (err != tinyxml2::XML_SUCCESS) { + errMsg = doc.ErrorStr(); + return false; + } + + auto elem = doc.RootElement(); + if (strcmp(elem->Name(), "ListBucketResult")) { + errMsg = "S3 ListBucket response is not rooted with ListBucketResult " + "element"; + return false; + } + + bool isTruncated = false; + for (auto child = elem->FirstChildElement(); child != nullptr; + child = child->NextSiblingElement()) { + if (!strcmp(child->Name(), "IsTruncated")) { + bool isTrunc; + if (child->QueryBoolText(&isTrunc) == tinyxml2::XML_SUCCESS) { + isTruncated = true; + } + } else if (!strcmp(child->Name(), "CommonPrefixes")) { + auto prefix = child->FirstChildElement("Prefix"); + if (prefix != nullptr) { + auto prefixChar = prefix->GetText(); + if (prefixChar != nullptr) { + auto prefixStr = std::string(prefixChar); + trim(prefixStr); + if (!prefixStr.empty()) { + commonPrefixes.emplace_back(prefixStr); + } + } + } + } else if (!strcmp(child->Name(), "Contents")) { + std::string keyStr; + int64_t size; + bool goodSize = false; + auto key = child->FirstChildElement("Key"); + if (key != nullptr) { + auto keyChar = key->GetText(); + if (keyChar != nullptr) { + keyStr = std::string(keyChar); + trim(keyStr); + } + } + auto sizeElem = child->FirstChildElement("Size"); + if (sizeElem != nullptr) { + goodSize = + (sizeElem->QueryInt64Text(&size) == tinyxml2::XML_SUCCESS); + } + if (goodSize && !keyStr.empty()) { + S3ObjectInfo obj; + obj.m_key = keyStr; + obj.m_size = size; + objInfo.emplace_back(obj); + } + } else if (!strcmp(child->Name(), "NextContinuationToken")) { + auto ctChar = child->GetText(); + if (ctChar) { + ct = ctChar; + trim(ct); + } + } + } + if (!isTruncated) { + ct = ""; + } + return true; +} diff --git a/src/S3Commands.hh b/src/S3Commands.hh index dee72ee..542ed54 100644 --- a/src/S3Commands.hh +++ b/src/S3Commands.hh @@ -18,12 +18,20 @@ #pragma once +#include "S3AccessInfo.hh" #include "HTTPCommands.hh" #include +#include class AmazonRequest : public HTTPRequest { public: + AmazonRequest(const S3AccessInfo &ai, const std::string objectName, XrdSysError &log) + : AmazonRequest(ai.getS3ServiceUrl(), ai.getS3AccessKeyFile(), + ai.getS3SecretKeyFile(), ai.getS3BucketName(), + objectName, ai.getS3UrlStyle(), ai.getS3SignatureVersion(), log) + {} + AmazonRequest(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, int sv, @@ -37,7 +45,7 @@ class AmazonRequest : public HTTPRequest { // "https://my-url.com:443", the bucket is "my-bucket", and the object // is "my-object", then the host will be "my-bucket.my-url.com:443" and // the canonicalURI will be "/my-object". - if (!parseURL(hostUrl, canonicalURI)) { + if (!parseURL(hostUrl, bucketPath, canonicalURI)) { errorCode = "E_INVALID_SERVICE_URL"; errorMessage = "Failed to parse host and canonicalURI from service URL."; @@ -67,7 +75,7 @@ class AmazonRequest : public HTTPRequest { virtual const std::string *getAccessKey() const { return &accessKeyFile; } virtual const std::string *getSecretKey() const { return &secretKeyFile; } - bool parseURL(const std::string &url, std::string &path); + bool parseURL(const std::string &url, std::string &bucket_path, std::string &path); virtual bool SendRequest(); virtual bool SendS3Request(const std::string &payload); @@ -82,6 +90,8 @@ class AmazonRequest : public HTTPRequest { std::string host; std::string canonicalURI; + std::string bucketPath; // Path to use for bucket-level operations (such as listings). May be empty for DNS-style buckets + std::string canonicalQueryString; std::string bucket; std::string object; @@ -103,6 +113,10 @@ class AmazonS3Upload : public AmazonRequest { using AmazonRequest::SendRequest; public: + AmazonS3Upload(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) + : AmazonRequest(ai, objectName, log) + {} + AmazonS3Upload(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, @@ -122,6 +136,10 @@ class AmazonS3Download : public AmazonRequest { using AmazonRequest::SendRequest; public: + AmazonS3Download(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) + : AmazonRequest(ai, objectName, log) + {} + AmazonS3Download(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, @@ -137,6 +155,10 @@ class AmazonS3Head : public AmazonRequest { using AmazonRequest::SendRequest; public: + AmazonS3Head(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) + : AmazonRequest(ai, objectName, log) + {} + AmazonS3Head(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, @@ -147,3 +169,20 @@ class AmazonS3Head : public AmazonRequest { virtual bool SendRequest(); }; + +struct S3ObjectInfo { + size_t m_size; + std::string m_key; +}; + +class AmazonS3List : public AmazonRequest { + public: + AmazonS3List(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) + : AmazonRequest(ai, objectName, log) + {} + + virtual ~AmazonS3List() {} + + bool SendRequest(const std::string &continuationToken, size_t max_keys=1000); + bool Results(std::vector &objInfo, std::vector &commonPrefixes, std::string &ct, std::string &errMsg); +}; diff --git a/src/S3Directory.cc b/src/S3Directory.cc new file mode 100644 index 0000000..a148b73 --- /dev/null +++ b/src/S3Directory.cc @@ -0,0 +1,216 @@ +/*************************************************************** + * + * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +#include "S3Directory.hh" +#include "S3Commands.hh" +#include "logging.hh" +#include "stl_string_utils.hh" + +#include + +#include + +void S3Directory::Reset() { + m_opened = false; + m_ct = ""; + m_idx = 0; + m_objInfo.clear(); + m_commonPrefixes.clear(); + m_stat_buf = nullptr; + m_ai = S3AccessInfo(); + m_object = ""; +} + +int S3Directory::ListS3Dir(const std::string &ct) { + AmazonS3List listCommand(m_ai, m_object, m_log); + auto res = listCommand.SendRequest(ct); + if (!res) { + switch (listCommand.getResponseCode()) { + case 404: + return -ENOENT; + case 500: + return -EIO; + case 403: + return -EPERM; + default: + return -EIO; + } + } + std::string errMsg; + m_idx = 0; + res = listCommand.Results(m_objInfo, m_commonPrefixes, m_ct, errMsg); + if (!res) { + m_log.Log(XrdHTTPServer::Warning, "Opendir", + "Failed to parse S3 results:", errMsg.c_str()); + return -EIO; + } + m_opened = true; + return 0; +} + +int S3Directory::Opendir(const char *path, XrdOucEnv &env) { + if (m_opened) { + return -EBADF; + } + Reset(); + + std::string exposedPath, object; + int rv = m_fs.parsePath(path, exposedPath, object); + if (rv != 0) { + return rv; + } + + auto ai = m_fs.getS3AccessInfo(exposedPath); + if (!ai) { + return -ENOENT; + } + m_ai = *ai; + m_object = object; + + return ListS3Dir(""); +} + +int S3Directory::Readdir(char *buff, int blen) { + if (!m_opened) { + return -EBADF; + } + + memset(m_stat_buf, '\0', sizeof(struct stat)); + + // m_idx encodes the location inside the current directory. + // - m_idx in [0, m_objInfo.size) means return a "file" from the object + // list. + // - m_idx == m_objectInfo.size means return the first entry in the + // directory/common prefix list. + // - m_idx in (m_commonPrefixes.size, -1] means return an entry from the + // common prefix list. + // - m_idx == -m_commonPrefixes.size means that all the path elements have + // been consumed. + // + // If all the paths entry have been consumed, then if the continuation token + // is set, list more objects in the bucket. If it's unset, then we've + // iterated through all the bucket contents. + auto idx = m_idx; + if (m_objInfo.empty() && m_commonPrefixes.empty()) { + *buff = '\0'; + return XrdOssOK; + } else if (idx >= 0 && idx < static_cast(m_objInfo.size())) { + m_idx++; + std::string full_name = m_objInfo[idx].m_key; + full_name.erase(0, full_name.rfind("/")); + trimslashes(full_name); + strncpy(buff, full_name.c_str(), blen); + if (buff[blen - 1] != '\0') { + buff[blen - 1] = '\0'; + return -ENOMEM; + } + if (m_stat_buf) { + m_stat_buf->st_mode = 0x0600 | S_IFREG; + m_stat_buf->st_nlink = 1; + m_stat_buf->st_size = m_objInfo[idx].m_size; + } + } else if (idx < 0 && + -idx == static_cast(m_commonPrefixes.size())) { + if (!m_ct.empty()) { + // Get the next set of results from S3. + m_idx = 0; + m_objInfo.clear(); + m_commonPrefixes.clear(); + memset(&m_stat_buf, '\0', sizeof(struct stat)); + auto rv = ListS3Dir(m_ct); + if (rv != 0) { + m_opened = false; + return rv; + } + // Recurse to parse the fresh results. + return Readdir(buff, blen); + } + *buff = '\0'; + return XrdOssOK; + } else if (idx == static_cast(m_objInfo.size()) || + -idx < static_cast(m_commonPrefixes.size())) { + if (m_commonPrefixes.empty()) { + if (!m_ct.empty()) { + // Get the next set of results from S3. + m_idx = 0; + m_objInfo.clear(); + m_commonPrefixes.clear(); + memset(&m_stat_buf, '\0', sizeof(struct stat)); + auto rv = ListS3Dir(m_ct); + if (rv != 0) { + m_opened = false; + return rv; + } + // Recurse to parse the fresh results. + return Readdir(buff, blen); + } + *buff = '\0'; + return XrdOssOK; + } + if (idx == static_cast(m_objInfo.size())) { + m_idx = -1; + idx = 0; + } else { + idx = -m_idx; + m_idx--; + } + std::string full_name = m_commonPrefixes[idx]; + trimslashes(full_name); + full_name.erase(0, full_name.rfind("/")); + trimslashes(full_name); + strncpy(buff, full_name.c_str(), blen); + if (buff[blen - 1] != '\0') { + buff[blen - 1] = '\0'; + return -ENOMEM; + } + if (m_stat_buf) { + m_stat_buf->st_mode = 0x0700 | S_IFDIR; + m_stat_buf->st_nlink = 0; + m_stat_buf->st_size = 4096; + } + } else { + return -EBADF; + } + + if (m_stat_buf) { + m_stat_buf->st_uid = 1; + m_stat_buf->st_gid = 1; + m_stat_buf->st_mtime = m_stat_buf->st_ctime = m_stat_buf->st_atime = 0; + m_stat_buf->st_dev = 0; + m_stat_buf->st_ino = 1; // If both st_dev and st_ino are 0, then XRootD + // interprets that as an unavailable file. + } + return XrdOssOK; +} + +int S3Directory::StatRet(struct stat *buf) { + if (!m_opened) { + return -EBADF; + } + + m_stat_buf = buf; + return XrdOssOK; +} + +int S3Directory::Close(long long *retsz) { + if (!m_opened) { + return -EBADF; + } + Reset(); + return XrdOssOK; +} diff --git a/src/S3Directory.hh b/src/S3Directory.hh index 3287eda..910b288 100644 --- a/src/S3Directory.hh +++ b/src/S3Directory.hh @@ -19,27 +19,43 @@ #pragma once #include "HTTPDirectory.hh" +#include "S3Commands.hh" +#include "S3FileSystem.hh" -// Leaving in duplicate definitions for now. It remains -// to be seen if we'll need to change these and have specific -// behaviors for either HTTP or S3 variants in the future. +#include +#include + +class XrdSysError; class S3Directory : public HTTPDirectory { public: - S3Directory(XrdSysError &log) - : HTTPDirectory(log) - // m_log(log) + S3Directory(XrdSysError &log, const S3FileSystem &fs) + : HTTPDirectory(log), + m_fs(fs) {} virtual ~S3Directory() {} - virtual int Opendir(const char *path, XrdOucEnv &env) override { - return -ENOSYS; - } + virtual int Opendir(const char *path, XrdOucEnv &env) override; + + int Readdir(char *buff, int blen) override; + + int StatRet(struct stat *statStruct) override; - int Readdir(char *buff, int blen) override { return -ENOSYS; } + int Close(long long *retsz = 0) override; - int StatRet(struct stat *statStruct) override { return -ENOSYS; } + private: + void Reset(); + int ListS3Dir(const std::string &ct); - int Close(long long *retsz = 0) override { return -ENOSYS; } + bool m_opened{false}; + ssize_t m_idx{0}; + std::vector m_objInfo; + std::vector m_commonPrefixes; + std::string m_prefix; + std::string m_ct; + std::string m_object; + const S3FileSystem &m_fs; + S3AccessInfo m_ai; + struct stat *m_stat_buf{nullptr}; }; diff --git a/src/S3File.cc b/src/S3File.cc index 6b7ffce..13a8060 100644 --- a/src/S3File.cc +++ b/src/S3File.cc @@ -48,81 +48,26 @@ XrdVERSIONINFO(XrdOssGetFileSystem, S3); S3File::S3File(XrdSysError &log, S3FileSystem *oss) : m_log(log), m_oss(oss), content_length(0), last_modified(0) {} -int parse_path(const S3FileSystem &fs, const char *fullPath, - std::string &exposedPath, std::string &object) { - // - // Check the path for validity. - // - std::filesystem::path p(fullPath); - auto pathComponents = p.begin(); - - // Iterate through components of the fullPath until we either find a match - // or we've reached the end of the path. - std::filesystem::path currentPath = *pathComponents; - while (pathComponents != p.end()) { - if (fs.exposedPathExists(currentPath.string())) { - exposedPath = currentPath.string(); - break; - } - ++pathComponents; - if (pathComponents != p.end()) { - currentPath /= *pathComponents; - } else { - return -ENOENT; - } - } - - // Objects names may contain path separators. - ++pathComponents; - if (pathComponents == p.end()) { - return -ENOENT; - } - - std::filesystem::path objectPath = *pathComponents++; - for (; pathComponents != p.end(); ++pathComponents) { - objectPath /= (*pathComponents); +int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { + if (m_log.getMsgMask() & XrdHTTPServer::Debug) { + m_log.Log(LogMask::Warning, "S3File::Open", "Opening file", path); } - object = objectPath.string(); - - fprintf(stderr, "object = %s\n", object.c_str()); - - return 0; -} -int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { std::string exposedPath, object; - int rv = parse_path(*m_oss, path, exposedPath, object); + auto rv = m_oss->parsePath(path, exposedPath, object); if (rv != 0) { return rv; } - if (!m_oss->exposedPathExists(exposedPath)) + auto ai = m_oss->getS3AccessInfo(exposedPath); + if (!ai) { return -ENOENT; - - std::string configured_s3_region = m_oss->getS3Region(exposedPath); - std::string configured_s3_service_url = m_oss->getS3ServiceURL(exposedPath); - std::string configured_s3_access_key = - m_oss->getS3AccessKeyFile(exposedPath); - std::string configured_s3_secret_key = - m_oss->getS3SecretKeyFile(exposedPath); - std::string configured_s3_bucket_name = m_oss->getS3BucketName(exposedPath); - std::string configured_s3_url_style = m_oss->getS3URLStyle(); - - // We used to query S3 here to see if the object existed, but of course - // if you're creating a file on upload, you don't care. - - this->s3_object_name = object; - this->s3_bucket_name = configured_s3_bucket_name; - this->s3_service_url = configured_s3_service_url; - this->s3_access_key = configured_s3_access_key; - this->s3_secret_key = configured_s3_secret_key; - this->s3_url_style = configured_s3_url_style; + } + m_ai = *ai; // This flag is not set when it's going to be a read operation // so we check if the file exists in order to be able to return a 404 if (!Oflag) { - AmazonS3Head head(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Head head(m_ai, s3_object_name, m_log); if (!head.SendRequest()) { return -ENOENT; @@ -133,9 +78,7 @@ int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { } ssize_t S3File::Read(void *buffer, off_t offset, size_t size) { - AmazonS3Download download(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Download download(m_ai, s3_object_name, m_log); if (!download.SendRequest(offset, size)) { std::stringstream ss; @@ -151,9 +94,7 @@ ssize_t S3File::Read(void *buffer, off_t offset, size_t size) { } int S3File::Fstat(struct stat *buff) { - AmazonS3Head head(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Head head(m_ai, s3_object_name, m_log); if (!head.SendRequest()) { // SendRequest() returns false for all errors, including ones @@ -218,9 +159,7 @@ int S3File::Fstat(struct stat *buff) { } ssize_t S3File::Write(const void *buffer, off_t offset, size_t size) { - AmazonS3Upload upload(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Upload upload(m_ai, s3_object_name, m_log); std::string payload((char *)buffer, size); if (!upload.SendRequest(payload, offset, size)) { diff --git a/src/S3File.hh b/src/S3File.hh index 33a3244..b930c3a 100644 --- a/src/S3File.hh +++ b/src/S3File.hh @@ -98,12 +98,8 @@ class S3File : public XrdOssDF { XrdSysError &m_log; S3FileSystem *m_oss; - std::string s3_service_url; - std::string s3_bucket_name; std::string s3_object_name; - std::string s3_access_key; - std::string s3_secret_key; - std::string s3_url_style; + S3AccessInfo m_ai; size_t content_length; time_t last_modified; diff --git a/src/S3FileSystem.cc b/src/S3FileSystem.cc index 3a4e939..10d510b 100644 --- a/src/S3FileSystem.cc +++ b/src/S3FileSystem.cc @@ -20,6 +20,7 @@ #include "S3AccessInfo.hh" #include "S3Directory.hh" #include "S3File.hh" +#include "logging.hh" #include "stl_string_utils.hh" #include @@ -27,7 +28,9 @@ #include #include +#include #include +#include #include #include @@ -180,7 +183,7 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { // Object Allocation Functions // XrdOssDF *S3FileSystem::newDir(const char *user) { - return new S3Directory(m_log); + return new S3Directory(m_log, *this); } XrdOssDF *S3FileSystem::newFile(const char *user) { @@ -189,23 +192,91 @@ XrdOssDF *S3FileSystem::newFile(const char *user) { int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, XrdOucEnv *env) { - std::string error; + m_log.Log(XrdHTTPServer::Debug, "Stat", "Stat'ing path", path); - m_log.Emsg("Stat", "Stat'ing path", path); + std::string exposedPath, object; + auto rv = parsePath(path, exposedPath, object); + if (rv != 0) { + return rv; + } + auto ai = getS3AccessInfo(exposedPath); + if (!ai) { + return -ENOENT; + } - S3File s3file(m_log, this); - int rv = s3file.Open(path, 0, (mode_t)0, *env); - if (rv) { - m_log.Emsg("Stat", "Failed to open path:", path); + trimslashes(object); + AmazonS3List listCommand(*ai, object, m_log); + auto res = listCommand.SendRequest(""); + if (!res) { + if (m_log.getMsgMask() & XrdHTTPServer::Info) { + std::stringstream ss; + ss << "Failed to stat path " << path << "; response code " + << listCommand.getResponseCode(); + m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); + } + switch (listCommand.getResponseCode()) { + case 404: + return -ENOENT; + case 500: + return -EIO; + case 403: + return -EPERM; + default: + return -EIO; + } } - // Assume that S3File::FStat() doesn't write to buff unless it succeeds. - rv = s3file.Fstat(buff); - if (rv != 0) { - formatstr(error, "File %s not found.", path); - m_log.Emsg("Stat", error.c_str()); + + std::string errMsg; + std::vector objInfo; + std::vector commonPrefixes; + std::string ct; + res = listCommand.Results(objInfo, commonPrefixes, ct, errMsg); + if (!res) { + m_log.Log(XrdHTTPServer::Warning, "Stat", + "Failed to parse S3 results:", errMsg.c_str()); + return -EIO; + } + + bool foundObj = false; + size_t objSize = 0; + for (const auto &obj : objInfo) { + if (obj.m_key == object) { + foundObj = true; + objSize = obj.m_size; + break; + } + } + if (foundObj) { + buff->st_mode = 0600 | S_IFREG; + buff->st_nlink = 1; + buff->st_uid = buff->st_gid = 1; + buff->st_size = objSize; + buff->st_mtime = buff->st_atime = buff->st_ctime = 0; + buff->st_dev = 0; + buff->st_ino = 1; + return 0; + } + + auto desiredPrefix = object + "/"; + bool foundPrefix = false; + for (const auto &prefix : commonPrefixes) { + if (prefix == desiredPrefix) { + foundPrefix = true; + break; + } + } + if (!foundPrefix) { return -ENOENT; } + buff->st_mode = 0700 | S_IFDIR; + buff->st_nlink = 0; + buff->st_uid = 1; + buff->st_gid = 1; + buff->st_size = 4096; + buff->st_mtime = buff->st_atime = buff->st_ctime = 0; + buff->st_dev = 0; + buff->st_ino = 1; return 0; } @@ -213,7 +284,7 @@ int S3FileSystem::Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts) { // Is path valid? std::string exposedPath, object; - int rv = parse_path(*this, path, exposedPath, object); + int rv = parsePath(path, exposedPath, object); if (rv != 0) { return rv; } @@ -227,3 +298,42 @@ int S3FileSystem::Create(const char *tid, const char *path, mode_t mode, return 0; } + +int S3FileSystem::parsePath(const char *fullPath, std::string &exposedPath, + std::string &object) const { + // + // Check the path for validity. + // + std::filesystem::path p(fullPath); + auto pathComponents = p.begin(); + + // Iterate through components of the fullPath until we either find a match + // or we've reached the end of the path. + std::filesystem::path currentPath = *pathComponents; + while (pathComponents != p.end()) { + if (exposedPathExists(currentPath.string())) { + exposedPath = currentPath.string(); + break; + } + ++pathComponents; + if (pathComponents != p.end()) { + currentPath /= *pathComponents; + } else { + return -ENOENT; + } + } + + // Objects names may contain path separators. + ++pathComponents; + if (pathComponents == p.end()) { + return -ENOENT; + } + + std::filesystem::path objectPath = *pathComponents++; + for (; pathComponents != p.end(); ++pathComponents) { + objectPath /= (*pathComponents); + } + object = objectPath.string(); + + return 0; +} diff --git a/src/S3FileSystem.hh b/src/S3FileSystem.hh index 5cdee6a..41d70f3 100644 --- a/src/S3FileSystem.hh +++ b/src/S3FileSystem.hh @@ -101,6 +101,13 @@ class S3FileSystem : public XrdOss { return nullptr; } + // Given a path as seen by XRootD, split it into the configured prefix and the object + // within the prefix. + // + // The returned `exposedPath` can be later used with the `get*` functions to fetch + // the required S3 configuration. + int parsePath(const char *fullPath, std::string &exposedPath, std::string &object) const; + bool exposedPathExists(const std::string &exposedPath) const { return s3_access_map.count(exposedPath) > 0; } @@ -126,6 +133,11 @@ class S3FileSystem : public XrdOss { } const std::string &getS3URLStyle() const { return s3_url_style; } + const S3AccessInfo * + getS3AccessInfo(const std::string &exposedPath) const { + return s3_access_map.at(exposedPath); + } + private: XrdOucEnv *m_env; XrdSysError m_log; diff --git a/src/stl_string_utils.cc b/src/stl_string_utils.cc index 4623b6e..9c3e3dc 100644 --- a/src/stl_string_utils.cc +++ b/src/stl_string_utils.cc @@ -146,3 +146,40 @@ int formatstr_cat(std::string &s, const char *format, ...) { va_end(args); return r; } + +std::string urlquote(const std::string input) { + std::string output; + output.reserve(3 * input.size()); + for (char val : input) { + if ((val >= 48 || val <= 57) || // Digits 0-9 + (val >= 65 || val <= 90) || // Uppercase A-Z + (val >= 97 || val <= 122) || // Lowercase a-z + (val == 95 || val == 46 || val == 45 || val == 126 || + val == 47)) // '_.-~/' + { + output += val; + } else { + output += "%" + std::to_string(val); + } + } + return output; +} + +void trimslashes(std::string &path) { + if (path.empty()) { + return; + } + size_t begin = 0; + while (begin < path.length() && (path[begin] == '/')) { + ++begin; + } + + auto end = path.length() - 1; + while (end >= 0 && end >= begin && (path[end] == '/')) { + --end; + } + + if (begin != 0 || end != (path.length()) - 1) { + path = path.substr(begin, (end - begin) + 1); + } +} diff --git a/src/stl_string_utils.hh b/src/stl_string_utils.hh index 9222f52..d119e4e 100644 --- a/src/stl_string_utils.hh +++ b/src/stl_string_utils.hh @@ -37,3 +37,19 @@ int formatstr(std::string &s, const char *format, ...) CHECK_PRINTF_FORMAT(2, 3); int formatstr_cat(std::string &s, const char *format, ...) CHECK_PRINTF_FORMAT(2, 3); + +// Given an input string, quote it to a form that is safe +// for embedding in a URL query parameter. +// +// Letters, digits, and the characters '_.-~/' are never +// quoted; otherwise, the byte is represented with its percent-encoded +// ASCII representation (e.g., ' ' becomes %20) +std::string urlquote(const std::string input); + +// Trim the slash(es) from a given object name +// +// foo/bar/ -> foo/bar +// bar/baz -> bar/baz +// foo/bar/// -> foo/bar +// /a/b -> a/b +void trimslashes(std::string &path); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 438758c..7f5cd1a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,6 +2,7 @@ add_executable( s3-gtest s3_tests.cc ../src/AWSv4-impl.cc ../src/logging.cc ../src/S3AccessInfo.cc + ../src/S3Directory.cc ../src/S3File.cc ../src/S3FileSystem.cc ../src/shortfile.cc diff --git a/vendor/tinyxml2 b/vendor/tinyxml2 new file mode 160000 index 0000000..312a809 --- /dev/null +++ b/vendor/tinyxml2 @@ -0,0 +1 @@ +Subproject commit 312a8092245df393db14a0b2427457ed2ba75e1b From af32b90c788bae52fa4d4e38f80bb2411d9c56e1 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 14:39:21 -0500 Subject: [PATCH 03/14] Bugfixes and overhaul for the S3 directory listing Includes: - Simple refactoring to follow common C++ coding styles. - Handling of various edge cases when the object is missing or empty. - Fixing path-based URL mode. - Fix bucket name parsing from path when a specific bucket is not configured. - Fix segfaults when using continuation tokens. --- src/AWSv4-impl.cc | 4 +- src/HTTPCommands.hh | 4 +- src/S3Commands.cc | 88 +++++++++++++++++++++++++++++-------- src/S3Commands.hh | 15 ++++--- src/S3Directory.cc | 38 +++++++++++++--- src/S3File.cc | 15 ++++--- src/S3File.hh | 2 +- src/S3FileSystem.cc | 104 ++++++++++++++++++++++++++++++++++---------- src/S3FileSystem.hh | 22 +++++----- 9 files changed, 219 insertions(+), 73 deletions(-) diff --git a/src/AWSv4-impl.cc b/src/AWSv4-impl.cc index 26f306b..1306742 100644 --- a/src/AWSv4-impl.cc +++ b/src/AWSv4-impl.cc @@ -201,7 +201,9 @@ std::string canonicalizeQueryString( } // We'll always have a superflous trailing ampersand. - canonicalQueryString.erase(canonicalQueryString.end() - 1); + if (!canonicalQueryString.empty()) { + canonicalQueryString.erase(canonicalQueryString.end() - 1); + } return canonicalQueryString; } diff --git a/src/HTTPCommands.hh b/src/HTTPCommands.hh index aec5966..a3a0c69 100644 --- a/src/HTTPCommands.hh +++ b/src/HTTPCommands.hh @@ -47,6 +47,8 @@ class HTTPRequest { virtual bool SendHTTPRequest(const std::string &payload); unsigned long getResponseCode() const { return responseCode; } + const std::string & getErrorCode() const {return errorCode;} + const std::string & getErrorMessage() const {return errorMessage;} const std::string &getResultString() const { return resultString; } // Currently only used in PUTS, but potentially useful elsewhere @@ -74,7 +76,7 @@ class HTTPRequest { std::string errorCode; std::string resultString; - unsigned long responseCode; + unsigned long responseCode{0}; unsigned long expectedResponseCode = 200; bool includeResponseHeader; diff --git a/src/S3Commands.cc b/src/S3Commands.cc index 04e1e83..7398b2e 100644 --- a/src/S3Commands.cc +++ b/src/S3Commands.cc @@ -58,18 +58,22 @@ std::string AmazonRequest::canonicalizeQueryString() { // the object). bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path, std::string &path) { - auto i = url.find("://"); - if (i == std::string::npos) { + auto schemeEndIdx = url.find("://"); + if (schemeEndIdx == std::string::npos) { return false; } + if (url.size() < schemeEndIdx + 3) { + return false; + } + auto hostStartIdx = schemeEndIdx + 3; - auto j = url.find("/", i + 3); - if (j == std::string::npos) { - if (style == "path") { + auto resourceStartIdx = url.find("/", hostStartIdx); + if (resourceStartIdx == std::string::npos) { + if (m_style == "path") { // If we're configured for path-style requests, then the host is // everything between // :// and the last / - host = substring(url, i + 3); + host = substring(url, hostStartIdx); // Likewise, the path is going to be /bucket/object // Sometimes we intentionally configure the plugin with no bucket because we // assume the incoming object request already encodes the bucket. This is used @@ -79,13 +83,13 @@ bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path, bucket_path = "/" + object.substr(0, object.find('/')); } else { path = "/" + bucket + "/" + object; - bucket_path = bucket; + bucket_path = "/" + bucket; } } else { // In virtual-style requests, the host should be determined as // everything between // :// up until the last /, but with appended to the front. - host = bucket + "." + substring(url, i + 3); + host = bucket + "." + substring(url, hostStartIdx); path = "/" + object; bucket_path = "/"; } @@ -93,12 +97,12 @@ bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path, return true; } - if (style == "path") { - host = substring(url, i + 3, j); - path = substring(url, j) + "/" + bucket + "/" + object; + if (m_style == "path") { + host = substring(url, hostStartIdx, resourceStartIdx); + path = substring(url, resourceStartIdx) + "/" + bucket + "/" + object; } else { - host = bucket + "." + substring(url, i + 3, j); - path = substring(url, j) + object; + host = bucket + "." + substring(url, hostStartIdx, resourceStartIdx); + path = substring(url, resourceStartIdx) + object; } return true; @@ -484,15 +488,14 @@ bool AmazonS3Head::SendRequest() { // --------------------------------------------------------------------------- -bool AmazonS3List::SendRequest(const std::string &continuationToken, - size_t max_keys) { - query_parameters["list-type"] = "2"; +bool AmazonS3List::SendRequest(const std::string &continuationToken) { + query_parameters["list-type"] = "2"; // Version 2 of the object-listing API query_parameters["delimiter"] = "/"; query_parameters["prefix"] = urlquote(object); if (!continuationToken.empty()) { query_parameters["continuation-token"] = urlquote(continuationToken); } - query_parameters["max-keys"] = std::to_string(max_keys); + query_parameters["max-keys"] = std::to_string(m_maxKeys); httpVerb = "GET"; // Operation is on the bucket itself; alter the URL to remove the object @@ -501,6 +504,21 @@ bool AmazonS3List::SendRequest(const std::string &continuationToken, return SendS3Request(""); } +// Parse the results of the AWS directory listing +// +// S3 returns an XML structure for directory listings so we must pick it apart and +// convert it to `objInfo` and `commonPrefixes`. The `objInfo` is a list of objects +// that match the current prefix but don't have a subsequent `/` in the object name. +// The `commonPrefixes` are the unique prefixes of other objects that have the same +// prefix as the original query but also have an `/`. +// +// Example. Suppose we have the following objects in the bucket: +// - /foo/bar.txt +// - /foo/bar/example.txt +// - /foo/baz/example.txt +// Then, a query to list with prefix `/foo/` would return object info for `/foo/bar.txt` +// while the common prefixes would be `/foo/bar/` and `/foo/baz`. Note this is quite +// close to returning a list of files in a directory and a list of sub-directories. bool AmazonS3List::Results(std::vector &objInfo, std::vector &commonPrefixes, std::string &ct, std::string &errMsg) { @@ -518,13 +536,47 @@ bool AmazonS3List::Results(std::vector &objInfo, return false; } + // Example response from S3: + // + // + // genome-browser + // cells/muscle-ibm/endothelial-stromal-cells + // 40 + // 40 + // 1PnsptbFFpBSb6UBNN4F/RrxtBvIHjNpdXNYlX8E7IyqXRK26w2y36KViUAbyPPsjzikVY0Zj4jMvQHRhsGWZbcKKrEVvaR0HaZDtfUXUwnc= + // false + // + // cells/muscle-ibm/endothelial-stromal-cells/UMAP.coords.tsv.gz + // 2023-08-21T11:02:53.000Z + // "b9b0065f10cbd91c9d341acc235c63b0" + // 360012 + // STANDARD + // + // + // cells/muscle-ibm/endothelial-stromal-cells/barcodes.tsv.gz + // 2023-07-17T11:02:19.000Z + // "048feef5d340e2dd4d2d2d495c24ad7e" + // 118061 + // STANDARD + // + // ... (truncated some entries for readability) ... + // + // cells/muscle-ibm/endothelial-stromal-cells/coords/ + // + // + // cells/muscle-ibm/endothelial-stromal-cells/markers/ + // + // + // cells/muscle-ibm/endothelial-stromal-cells/metaFields/ + // + // bool isTruncated = false; for (auto child = elem->FirstChildElement(); child != nullptr; child = child->NextSiblingElement()) { if (!strcmp(child->Name(), "IsTruncated")) { bool isTrunc; if (child->QueryBoolText(&isTrunc) == tinyxml2::XML_SUCCESS) { - isTruncated = true; + isTruncated = isTrunc; } } else if (!strcmp(child->Name(), "CommonPrefixes")) { auto prefix = child->FirstChildElement("Prefix"); diff --git a/src/S3Commands.hh b/src/S3Commands.hh index 542ed54..10e7c72 100644 --- a/src/S3Commands.hh +++ b/src/S3Commands.hh @@ -37,7 +37,7 @@ class AmazonRequest : public HTTPRequest { const std::string &o, const std::string &style, int sv, XrdSysError &log) : HTTPRequest(s, log), accessKeyFile(akf), secretKeyFile(skf), - signatureVersion(sv), bucket(b), object(o), style(style) { + signatureVersion(sv), bucket(b), object(o), m_style(style) { requiresSignature = true; // Start off by parsing the hostUrl, which we use in conjunction with // the bucket to fill in the host (for setting host header). For @@ -99,7 +99,7 @@ class AmazonRequest : public HTTPRequest { std::string region; std::string service; - std::string style; + std::string m_style; private: bool createV4Signature(const std::string &payload, @@ -176,13 +176,18 @@ struct S3ObjectInfo { }; class AmazonS3List : public AmazonRequest { + using AmazonRequest::SendRequest; + public: - AmazonS3List(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) - : AmazonRequest(ai, objectName, log) + AmazonS3List(const S3AccessInfo &ai, const std::string &objectName, size_t maxKeys, XrdSysError &log) + : AmazonRequest(ai, objectName, log), m_maxKeys(maxKeys) {} virtual ~AmazonS3List() {} - bool SendRequest(const std::string &continuationToken, size_t max_keys=1000); + bool SendRequest(const std::string &continuationToken); bool Results(std::vector &objInfo, std::vector &commonPrefixes, std::string &ct, std::string &errMsg); + + private: + size_t m_maxKeys{1000}; }; diff --git a/src/S3Directory.cc b/src/S3Directory.cc index a148b73..85cef54 100644 --- a/src/S3Directory.cc +++ b/src/S3Directory.cc @@ -25,6 +25,8 @@ #include +#include + void S3Directory::Reset() { m_opened = false; m_ct = ""; @@ -37,7 +39,7 @@ void S3Directory::Reset() { } int S3Directory::ListS3Dir(const std::string &ct) { - AmazonS3List listCommand(m_ai, m_object, m_log); + AmazonS3List listCommand(m_ai, m_object, 1000, m_log); auto res = listCommand.SendRequest(ct); if (!res) { switch (listCommand.getResponseCode()) { @@ -59,6 +61,12 @@ int S3Directory::ListS3Dir(const std::string &ct) { "Failed to parse S3 results:", errMsg.c_str()); return -EIO; } + if (m_log.getMsgMask() & XrdHTTPServer::Debug) { + std::stringstream ss; + ss << "Directory listing returned " << m_objInfo.size() << " objects and " << m_commonPrefixes.size() << " prefixes"; + m_log.Log(XrdHTTPServer::Debug, "Stat", ss.str().c_str()); + } + m_opened = true; return 0; } @@ -75,11 +83,23 @@ int S3Directory::Opendir(const char *path, XrdOucEnv &env) { return rv; } - auto ai = m_fs.getS3AccessInfo(exposedPath); + auto ai = m_fs.getS3AccessInfo(exposedPath, object); if (!ai) { return -ENOENT; } + + if (ai->getS3BucketName().empty()) { + return -EINVAL; + } + m_ai = *ai; + // If the prefix is "foo" and there's an object "foo/bar", then + // the lookup only returns "foo/" (as it's the longest common prefix prior + // to a delimiter). Instead, we want to query for "foo/", which returns + // "foo/bar". + if (!object.empty() && (object[object.size() - 1] != '/')) { + object += "/"; + } m_object = object; return ListS3Dir(""); @@ -112,7 +132,10 @@ int S3Directory::Readdir(char *buff, int blen) { } else if (idx >= 0 && idx < static_cast(m_objInfo.size())) { m_idx++; std::string full_name = m_objInfo[idx].m_key; - full_name.erase(0, full_name.rfind("/")); + auto lastSlashIdx = full_name.rfind("/"); + if (lastSlashIdx != std::string::npos) { + full_name.erase(0, lastSlashIdx); + } trimslashes(full_name); strncpy(buff, full_name.c_str(), blen); if (buff[blen - 1] != '\0') { @@ -131,7 +154,7 @@ int S3Directory::Readdir(char *buff, int blen) { m_idx = 0; m_objInfo.clear(); m_commonPrefixes.clear(); - memset(&m_stat_buf, '\0', sizeof(struct stat)); + memset(m_stat_buf, '\0', sizeof(struct stat)); auto rv = ListS3Dir(m_ct); if (rv != 0) { m_opened = false; @@ -150,7 +173,7 @@ int S3Directory::Readdir(char *buff, int blen) { m_idx = 0; m_objInfo.clear(); m_commonPrefixes.clear(); - memset(&m_stat_buf, '\0', sizeof(struct stat)); + memset(m_stat_buf, '\0', sizeof(struct stat)); auto rv = ListS3Dir(m_ct); if (rv != 0) { m_opened = false; @@ -171,7 +194,10 @@ int S3Directory::Readdir(char *buff, int blen) { } std::string full_name = m_commonPrefixes[idx]; trimslashes(full_name); - full_name.erase(0, full_name.rfind("/")); + auto lastSlashIdx = full_name.rfind("/"); + if (lastSlashIdx != std::string::npos) { + full_name.erase(0, lastSlashIdx); + } trimslashes(full_name); strncpy(buff, full_name.c_str(), blen); if (buff[blen - 1] != '\0') { diff --git a/src/S3File.cc b/src/S3File.cc index 13a8060..e90e5ca 100644 --- a/src/S3File.cc +++ b/src/S3File.cc @@ -58,16 +58,21 @@ int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { if (rv != 0) { return rv; } - auto ai = m_oss->getS3AccessInfo(exposedPath); + auto ai = m_oss->getS3AccessInfo(exposedPath, object); if (!ai) { return -ENOENT; } + if (ai->getS3BucketName().empty()) { + return -EINVAL; + } + m_ai = *ai; + m_object = object; // This flag is not set when it's going to be a read operation // so we check if the file exists in order to be able to return a 404 if (!Oflag) { - AmazonS3Head head(m_ai, s3_object_name, m_log); + AmazonS3Head head(m_ai, m_object, m_log); if (!head.SendRequest()) { return -ENOENT; @@ -78,7 +83,7 @@ int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { } ssize_t S3File::Read(void *buffer, off_t offset, size_t size) { - AmazonS3Download download(m_ai, s3_object_name, m_log); + AmazonS3Download download(m_ai, m_object, m_log); if (!download.SendRequest(offset, size)) { std::stringstream ss; @@ -94,7 +99,7 @@ ssize_t S3File::Read(void *buffer, off_t offset, size_t size) { } int S3File::Fstat(struct stat *buff) { - AmazonS3Head head(m_ai, s3_object_name, m_log); + AmazonS3Head head(m_ai, m_object, m_log); if (!head.SendRequest()) { // SendRequest() returns false for all errors, including ones @@ -159,7 +164,7 @@ int S3File::Fstat(struct stat *buff) { } ssize_t S3File::Write(const void *buffer, off_t offset, size_t size) { - AmazonS3Upload upload(m_ai, s3_object_name, m_log); + AmazonS3Upload upload(m_ai, m_object, m_log); std::string payload((char *)buffer, size); if (!upload.SendRequest(payload, offset, size)) { diff --git a/src/S3File.hh b/src/S3File.hh index b930c3a..e42fcb1 100644 --- a/src/S3File.hh +++ b/src/S3File.hh @@ -98,7 +98,7 @@ class S3File : public XrdOssDF { XrdSysError &m_log; S3FileSystem *m_oss; - std::string s3_object_name; + std::string m_object; S3AccessInfo m_ai; size_t content_length; diff --git a/src/S3FileSystem.cc b/src/S3FileSystem.cc index 10d510b..3951c45 100644 --- a/src/S3FileSystem.cc +++ b/src/S3FileSystem.cc @@ -75,13 +75,13 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { std::string value; std::string attribute; Config.Attach(cfgFD); - S3AccessInfo *newAccessInfo = new S3AccessInfo(); + std::shared_ptr newAccessInfo(new S3AccessInfo()); std::string exposedPath; while ((temporary = Config.GetMyFirstWord())) { attribute = temporary; temporary = Config.GetWord(); if (attribute == "s3.end") { - s3_access_map[exposedPath] = newAccessInfo; + m_s3_access_map[exposedPath] = newAccessInfo; if (newAccessInfo->getS3ServiceName().empty()) { m_log.Emsg("Config", "s3.service_name not specified"); return false; @@ -90,7 +90,7 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { m_log.Emsg("Config", "s3.region not specified"); return false; } - newAccessInfo = new S3AccessInfo(); + newAccessInfo.reset(new S3AccessInfo()); exposedPath = ""; continue; } @@ -151,18 +151,20 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { newAccessInfo->setS3SecretKeyFile(value); else if (attribute == "s3.service_url") newAccessInfo->setS3ServiceUrl(value); - else if (attribute == "s3.url_style") - this->s3_url_style = value; + else if (attribute == "s3.url_style") { + s3_url_style = value; + newAccessInfo->setS3UrlStyle(s3_url_style); + } } - if (this->s3_url_style.empty()) { + if (s3_url_style.empty()) { m_log.Emsg("Config", "s3.url_style not specified"); return false; } else { // We want this to be case-insensitive. - toLower(this->s3_url_style); + toLower(s3_url_style); } - if (this->s3_url_style != "virtual" && this->s3_url_style != "path") { + if (s3_url_style != "virtual" && s3_url_style != "path") { m_log.Emsg( "Config", "invalid s3.url_style specified. Must be 'virtual' or 'path'"); @@ -199,30 +201,44 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, if (rv != 0) { return rv; } - auto ai = getS3AccessInfo(exposedPath); + auto ai = getS3AccessInfo(exposedPath, object); if (!ai) { return -ENOENT; } + if (ai->getS3BucketName().empty()) { + return -EINVAL; + } trimslashes(object); - AmazonS3List listCommand(*ai, object, m_log); + AmazonS3List listCommand(*ai, object, 1, m_log); auto res = listCommand.SendRequest(""); if (!res) { - if (m_log.getMsgMask() & XrdHTTPServer::Info) { - std::stringstream ss; - ss << "Failed to stat path " << path << "; response code " - << listCommand.getResponseCode(); - m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); - } - switch (listCommand.getResponseCode()) { - case 404: - return -ENOENT; - case 500: - return -EIO; - case 403: - return -EPERM; - default: + auto httpCode = listCommand.getResponseCode(); + if (httpCode == 0) { + if (m_log.getMsgMask() & XrdHTTPServer::Info) { + std::stringstream ss; + ss << "Failed to stat path " << path << "; error: " << listCommand.getErrorMessage() + << " (code=" << listCommand.getErrorCode() << ")"; + m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); + } return -EIO; + } else { + if (m_log.getMsgMask() & XrdHTTPServer::Info) { + std::stringstream ss; + ss << "Failed to stat path " << path << "; response code " + << httpCode; + m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); + } + switch (httpCode) { + case 404: + return -ENOENT; + case 500: + return -EIO; + case 403: + return -EPERM; + default: + return -EIO; + } } } @@ -236,6 +252,23 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, "Failed to parse S3 results:", errMsg.c_str()); return -EIO; } + if (m_log.getMsgMask() & XrdHTTPServer::Debug) { + std::stringstream ss; + ss << "Stat on object returned " << objInfo.size() << " objects and " << commonPrefixes.size() << " prefixes"; + m_log.Log(XrdHTTPServer::Debug, "Stat", ss.str().c_str()); + } + + if (object.empty()) { + buff->st_mode = 0700 | S_IFDIR; + buff->st_nlink = 0; + buff->st_uid = 1; + buff->st_gid = 1; + buff->st_size = 4096; + buff->st_mtime = buff->st_atime = buff->st_ctime = 0; + buff->st_dev = 0; + buff->st_ino = 1; + return 0; + } bool foundObj = false; size_t objSize = 0; @@ -337,3 +370,26 @@ int S3FileSystem::parsePath(const char *fullPath, std::string &exposedPath, return 0; } + +const std::shared_ptr +S3FileSystem::getS3AccessInfo(const std::string &exposedPath, std::string &object) const { + auto ai = m_s3_access_map.at(exposedPath); + if (!ai) { + return ai; + } + if (ai->getS3BucketName().empty()) { + // Bucket name is embedded in the "object" name. Split it into the bucket and + // "real" object. + std::shared_ptr aiCopy(new S3AccessInfo(*ai)); + auto firstSlashIdx = object.find('/'); + if (firstSlashIdx == std::string::npos) { + aiCopy->setS3BucketName(object); + object = ""; + } else { + aiCopy->setS3BucketName(object.substr(0, firstSlashIdx)); + object = object.substr(firstSlashIdx + 1); + } + return aiCopy; + } + return ai; +} diff --git a/src/S3FileSystem.hh b/src/S3FileSystem.hh index 41d70f3..f449ede 100644 --- a/src/S3FileSystem.hh +++ b/src/S3FileSystem.hh @@ -109,34 +109,32 @@ class S3FileSystem : public XrdOss { int parsePath(const char *fullPath, std::string &exposedPath, std::string &object) const; bool exposedPathExists(const std::string &exposedPath) const { - return s3_access_map.count(exposedPath) > 0; + return m_s3_access_map.count(exposedPath) > 0; } const std::string &getS3ServiceName(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3ServiceName(); + return m_s3_access_map.at(exposedPath)->getS3ServiceName(); } const std::string &getS3Region(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3Region(); + return m_s3_access_map.at(exposedPath)->getS3Region(); } const std::string &getS3ServiceURL(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3ServiceUrl(); + return m_s3_access_map.at(exposedPath)->getS3ServiceUrl(); } const std::string &getS3BucketName(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3BucketName(); + return m_s3_access_map.at(exposedPath)->getS3BucketName(); } const std::string & getS3AccessKeyFile(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3AccessKeyFile(); + return m_s3_access_map.at(exposedPath)->getS3AccessKeyFile(); } const std::string & getS3SecretKeyFile(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath)->getS3SecretKeyFile(); + return m_s3_access_map.at(exposedPath)->getS3SecretKeyFile(); } const std::string &getS3URLStyle() const { return s3_url_style; } - const S3AccessInfo * - getS3AccessInfo(const std::string &exposedPath) const { - return s3_access_map.at(exposedPath); - } + const std::shared_ptr + getS3AccessInfo(const std::string &exposedPath, std::string &object) const; private: XrdOucEnv *m_env; @@ -144,6 +142,6 @@ class S3FileSystem : public XrdOss { bool handle_required_config(const char *desired_name, const std::string &source); - std::map s3_access_map; + std::map> m_s3_access_map; std::string s3_url_style; }; From 2952a85964b30450ff951e2fd94dbc574309da44 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 14:43:50 -0500 Subject: [PATCH 04/14] Remove commented-out line --- src/HTTPCommands.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index 7b2073e..999ca8f 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -67,8 +67,6 @@ HTTPRequest::~HTTPRequest() {} if (rv##B != CURLE_OK) { \ this->errorCode = "E_CURL_LIB"; \ this->errorMessage = "curl_easy_setopt( " #B " ) failed."; \ - /* dprintf( D_ALWAYS, "curl_easy_setopt( %s ) failed (%d): '%s', \ - failing.\n", #B, rv##B, curl_easy_strerror( rv##B ) ); */ \ return false; \ } \ } From eea5ff105d52e16e785198a38118b394f2afc808 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 16:52:20 -0500 Subject: [PATCH 05/14] Run clang-format across new code --- src/HTTPCommands.hh | 4 ++-- src/S3AccessInfo.hh | 2 +- src/S3Commands.cc | 24 +++++++++++++----------- src/S3Commands.hh | 45 +++++++++++++++++++++++++-------------------- src/S3Directory.cc | 5 +++-- src/S3Directory.hh | 4 +--- src/S3FileSystem.cc | 17 ++++++++++------- src/S3FileSystem.hh | 11 ++++++----- 8 files changed, 61 insertions(+), 51 deletions(-) diff --git a/src/HTTPCommands.hh b/src/HTTPCommands.hh index a3a0c69..cc78c33 100644 --- a/src/HTTPCommands.hh +++ b/src/HTTPCommands.hh @@ -47,8 +47,8 @@ class HTTPRequest { virtual bool SendHTTPRequest(const std::string &payload); unsigned long getResponseCode() const { return responseCode; } - const std::string & getErrorCode() const {return errorCode;} - const std::string & getErrorMessage() const {return errorMessage;} + const std::string &getErrorCode() const { return errorCode; } + const std::string &getErrorMessage() const { return errorMessage; } const std::string &getResultString() const { return resultString; } // Currently only used in PUTS, but potentially useful elsewhere diff --git a/src/S3AccessInfo.hh b/src/S3AccessInfo.hh index b45e112..cf73eec 100644 --- a/src/S3AccessInfo.hh +++ b/src/S3AccessInfo.hh @@ -36,7 +36,7 @@ class S3AccessInfo { void setS3UrlStyle(const std::string &s3UrlStyle); - const int getS3SignatureVersion() const {return 4;} + const int getS3SignatureVersion() const { return 4; } private: std::string s3_bucket_name; diff --git a/src/S3Commands.cc b/src/S3Commands.cc index 7398b2e..bb28928 100644 --- a/src/S3Commands.cc +++ b/src/S3Commands.cc @@ -75,9 +75,10 @@ bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path, // :// and the last / host = substring(url, hostStartIdx); // Likewise, the path is going to be /bucket/object - // Sometimes we intentionally configure the plugin with no bucket because we - // assume the incoming object request already encodes the bucket. This is used - // for exporting many buckets from a single endpoint. + // Sometimes we intentionally configure the plugin with no bucket + // because we assume the incoming object request already encodes the + // bucket. This is used for exporting many buckets from a single + // endpoint. if (bucket.empty()) { path = "/" + object; bucket_path = "/" + object.substr(0, object.find('/')); @@ -506,19 +507,20 @@ bool AmazonS3List::SendRequest(const std::string &continuationToken) { // Parse the results of the AWS directory listing // -// S3 returns an XML structure for directory listings so we must pick it apart and -// convert it to `objInfo` and `commonPrefixes`. The `objInfo` is a list of objects -// that match the current prefix but don't have a subsequent `/` in the object name. -// The `commonPrefixes` are the unique prefixes of other objects that have the same -// prefix as the original query but also have an `/`. +// S3 returns an XML structure for directory listings so we must pick it apart +// and convert it to `objInfo` and `commonPrefixes`. The `objInfo` is a list of +// objects that match the current prefix but don't have a subsequent `/` in the +// object name. The `commonPrefixes` are the unique prefixes of other objects +// that have the same prefix as the original query but also have an `/`. // // Example. Suppose we have the following objects in the bucket: // - /foo/bar.txt // - /foo/bar/example.txt // - /foo/baz/example.txt -// Then, a query to list with prefix `/foo/` would return object info for `/foo/bar.txt` -// while the common prefixes would be `/foo/bar/` and `/foo/baz`. Note this is quite -// close to returning a list of files in a directory and a list of sub-directories. +// Then, a query to list with prefix `/foo/` would return object info for +// `/foo/bar.txt` while the common prefixes would be `/foo/bar/` and `/foo/baz`. +// Note this is quite close to returning a list of files in a directory and a +// list of sub-directories. bool AmazonS3List::Results(std::vector &objInfo, std::vector &commonPrefixes, std::string &ct, std::string &errMsg) { diff --git a/src/S3Commands.hh b/src/S3Commands.hh index 10e7c72..6d4eddc 100644 --- a/src/S3Commands.hh +++ b/src/S3Commands.hh @@ -18,19 +18,20 @@ #pragma once -#include "S3AccessInfo.hh" #include "HTTPCommands.hh" +#include "S3AccessInfo.hh" #include #include class AmazonRequest : public HTTPRequest { public: - AmazonRequest(const S3AccessInfo &ai, const std::string objectName, XrdSysError &log) + AmazonRequest(const S3AccessInfo &ai, const std::string objectName, + XrdSysError &log) : AmazonRequest(ai.getS3ServiceUrl(), ai.getS3AccessKeyFile(), - ai.getS3SecretKeyFile(), ai.getS3BucketName(), - objectName, ai.getS3UrlStyle(), ai.getS3SignatureVersion(), log) - {} + ai.getS3SecretKeyFile(), ai.getS3BucketName(), + objectName, ai.getS3UrlStyle(), + ai.getS3SignatureVersion(), log) {} AmazonRequest(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, @@ -75,7 +76,8 @@ class AmazonRequest : public HTTPRequest { virtual const std::string *getAccessKey() const { return &accessKeyFile; } virtual const std::string *getSecretKey() const { return &secretKeyFile; } - bool parseURL(const std::string &url, std::string &bucket_path, std::string &path); + bool parseURL(const std::string &url, std::string &bucket_path, + std::string &path); virtual bool SendRequest(); virtual bool SendS3Request(const std::string &payload); @@ -90,7 +92,8 @@ class AmazonRequest : public HTTPRequest { std::string host; std::string canonicalURI; - std::string bucketPath; // Path to use for bucket-level operations (such as listings). May be empty for DNS-style buckets + std::string bucketPath; // Path to use for bucket-level operations (such as + // listings). May be empty for DNS-style buckets std::string canonicalQueryString; std::string bucket; @@ -113,9 +116,9 @@ class AmazonS3Upload : public AmazonRequest { using AmazonRequest::SendRequest; public: - AmazonS3Upload(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) - : AmazonRequest(ai, objectName, log) - {} + AmazonS3Upload(const S3AccessInfo &ai, const std::string &objectName, + XrdSysError &log) + : AmazonRequest(ai, objectName, log) {} AmazonS3Upload(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, @@ -136,9 +139,9 @@ class AmazonS3Download : public AmazonRequest { using AmazonRequest::SendRequest; public: - AmazonS3Download(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) - : AmazonRequest(ai, objectName, log) - {} + AmazonS3Download(const S3AccessInfo &ai, const std::string &objectName, + XrdSysError &log) + : AmazonRequest(ai, objectName, log) {} AmazonS3Download(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, @@ -155,9 +158,9 @@ class AmazonS3Head : public AmazonRequest { using AmazonRequest::SendRequest; public: - AmazonS3Head(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) - : AmazonRequest(ai, objectName, log) - {} + AmazonS3Head(const S3AccessInfo &ai, const std::string &objectName, + XrdSysError &log) + : AmazonRequest(ai, objectName, log) {} AmazonS3Head(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, @@ -179,14 +182,16 @@ class AmazonS3List : public AmazonRequest { using AmazonRequest::SendRequest; public: - AmazonS3List(const S3AccessInfo &ai, const std::string &objectName, size_t maxKeys, XrdSysError &log) - : AmazonRequest(ai, objectName, log), m_maxKeys(maxKeys) - {} + AmazonS3List(const S3AccessInfo &ai, const std::string &objectName, + size_t maxKeys, XrdSysError &log) + : AmazonRequest(ai, objectName, log), m_maxKeys(maxKeys) {} virtual ~AmazonS3List() {} bool SendRequest(const std::string &continuationToken); - bool Results(std::vector &objInfo, std::vector &commonPrefixes, std::string &ct, std::string &errMsg); + bool Results(std::vector &objInfo, + std::vector &commonPrefixes, std::string &ct, + std::string &errMsg); private: size_t m_maxKeys{1000}; diff --git a/src/S3Directory.cc b/src/S3Directory.cc index 85cef54..5d8defa 100644 --- a/src/S3Directory.cc +++ b/src/S3Directory.cc @@ -61,9 +61,10 @@ int S3Directory::ListS3Dir(const std::string &ct) { "Failed to parse S3 results:", errMsg.c_str()); return -EIO; } - if (m_log.getMsgMask() & XrdHTTPServer::Debug) { + if (m_log.getMsgMask() & XrdHTTPServer::Debug) { std::stringstream ss; - ss << "Directory listing returned " << m_objInfo.size() << " objects and " << m_commonPrefixes.size() << " prefixes"; + ss << "Directory listing returned " << m_objInfo.size() + << " objects and " << m_commonPrefixes.size() << " prefixes"; m_log.Log(XrdHTTPServer::Debug, "Stat", ss.str().c_str()); } diff --git a/src/S3Directory.hh b/src/S3Directory.hh index 910b288..b15207d 100644 --- a/src/S3Directory.hh +++ b/src/S3Directory.hh @@ -30,9 +30,7 @@ class XrdSysError; class S3Directory : public HTTPDirectory { public: S3Directory(XrdSysError &log, const S3FileSystem &fs) - : HTTPDirectory(log), - m_fs(fs) - {} + : HTTPDirectory(log), m_fs(fs) {} virtual ~S3Directory() {} diff --git a/src/S3FileSystem.cc b/src/S3FileSystem.cc index 3951c45..d1af33a 100644 --- a/src/S3FileSystem.cc +++ b/src/S3FileSystem.cc @@ -217,8 +217,9 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, if (httpCode == 0) { if (m_log.getMsgMask() & XrdHTTPServer::Info) { std::stringstream ss; - ss << "Failed to stat path " << path << "; error: " << listCommand.getErrorMessage() - << " (code=" << listCommand.getErrorCode() << ")"; + ss << "Failed to stat path " << path + << "; error: " << listCommand.getErrorMessage() + << " (code=" << listCommand.getErrorCode() << ")"; m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); } return -EIO; @@ -226,7 +227,7 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, if (m_log.getMsgMask() & XrdHTTPServer::Info) { std::stringstream ss; ss << "Failed to stat path " << path << "; response code " - << httpCode; + << httpCode; m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); } switch (httpCode) { @@ -254,7 +255,8 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, } if (m_log.getMsgMask() & XrdHTTPServer::Debug) { std::stringstream ss; - ss << "Stat on object returned " << objInfo.size() << " objects and " << commonPrefixes.size() << " prefixes"; + ss << "Stat on object returned " << objInfo.size() << " objects and " + << commonPrefixes.size() << " prefixes"; m_log.Log(XrdHTTPServer::Debug, "Stat", ss.str().c_str()); } @@ -372,14 +374,15 @@ int S3FileSystem::parsePath(const char *fullPath, std::string &exposedPath, } const std::shared_ptr -S3FileSystem::getS3AccessInfo(const std::string &exposedPath, std::string &object) const { +S3FileSystem::getS3AccessInfo(const std::string &exposedPath, + std::string &object) const { auto ai = m_s3_access_map.at(exposedPath); if (!ai) { return ai; } if (ai->getS3BucketName().empty()) { - // Bucket name is embedded in the "object" name. Split it into the bucket and - // "real" object. + // Bucket name is embedded in the "object" name. Split it into the + // bucket and "real" object. std::shared_ptr aiCopy(new S3AccessInfo(*ai)); auto firstSlashIdx = object.find('/'); if (firstSlashIdx == std::string::npos) { diff --git a/src/S3FileSystem.hh b/src/S3FileSystem.hh index f449ede..95c639e 100644 --- a/src/S3FileSystem.hh +++ b/src/S3FileSystem.hh @@ -101,12 +101,13 @@ class S3FileSystem : public XrdOss { return nullptr; } - // Given a path as seen by XRootD, split it into the configured prefix and the object - // within the prefix. + // Given a path as seen by XRootD, split it into the configured prefix and + // the object within the prefix. // - // The returned `exposedPath` can be later used with the `get*` functions to fetch - // the required S3 configuration. - int parsePath(const char *fullPath, std::string &exposedPath, std::string &object) const; + // The returned `exposedPath` can be later used with the `get*` functions to + // fetch the required S3 configuration. + int parsePath(const char *fullPath, std::string &exposedPath, + std::string &object) const; bool exposedPathExists(const std::string &exposedPath) const { return m_s3_access_map.count(exposedPath) > 0; From dcd1f881f001cc9e921dbb3cb2aa444585a3f4a9 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 20:19:27 -0500 Subject: [PATCH 06/14] Fix CMake for gtest on Mac --- CMakeLists.txt | 25 +++++++++++++------------ test/CMakeLists.txt | 9 +++------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 43ff635..e806342 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,8 +65,8 @@ target_link_libraries(XrdHTTPServer -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB # The CMake documentation strongly advises against using these macros; instead, the pkg_check_modules # is supposed to fill out the full path to ${LIBCRYPTO_LIBRARIES}. As of cmake 3.26.1, this does not # appear to be the case on Mac OS X. Remove once no longer necessary! -target_link_directories(XrdS3 PRIVATE ${LIBCRYPTO_LIBRARY_DIRS}) -target_link_directories(XrdHTTPServer PRIVATE ${LIBCRYPTO_LIBRARY_DIRS}) +target_link_directories(XrdS3 PUBLIC ${LIBCRYPTO_LIBRARY_DIRS}) +target_link_directories(XrdHTTPServer PUBLIC ${LIBCRYPTO_LIBRARY_DIRS}) if(NOT APPLE) set_target_properties(XrdS3 PROPERTIES OUTPUT_NAME "XrdS3-${XROOTD_PLUGIN_VERSION}" SUFFIX ".so" LINK_FLAGS "-Wl,--version-script=${CMAKE_SOURCE_DIR}/configs/export-lib-symbols") @@ -84,16 +84,17 @@ install( ) if( XROOTD_PLUGINS_BUILD_UNITTESTS ) - if( NOT XROOTD_PLUGINS_EXTERNAL_GTEST ) -include(ExternalProject) -ExternalProject_Add(gtest - PREFIX external/gtest - URL ${CMAKE_CURRENT_SOURCE_DIR}/vendor/gtest - INSTALL_COMMAND : -) -endif() -enable_testing() -add_subdirectory(test) + if( NOT XROOTD_PLUGINS_EXTERNAL_GTEST ) + include(ExternalProject) + ExternalProject_Add(gtest + PREFIX external/gtest + URL ${CMAKE_CURRENT_SOURCE_DIR}/vendor/gtest + BUILD_BYPRODUCTS external/gtest/src/gtest-build/lib/libgtest.a + INSTALL_COMMAND : + ) + endif() + enable_testing() + add_subdirectory(test) endif() #install( diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7f5cd1a..9e81ab3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -21,15 +21,12 @@ add_executable( http-gtest http_tests.cc ) -if( NOT XROOTD_PLUGINS_EXTERNAL_GTEST ) - add_dependencies(s3-gtest gtest) - add_dependencies(http-gtest gtest) - include_directories("${PROJECT_SOURCE_DIR}/vendor/gtest/googletest/include") -endif() - if(XROOTD_PLUGINS_EXTERNAL_GTEST) set(LIBGTEST "gtest") else() + add_dependencies(s3-gtest gtest) + add_dependencies(http-gtest gtest) + include_directories("${PROJECT_SOURCE_DIR}/vendor/gtest/googletest/include") set(LIBGTEST "${CMAKE_BINARY_DIR}/external/gtest/src/gtest-build/lib/libgtest.a") endif() From f3a9d8bd0aa2f1a697827402bde00b831ef7c008 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 20:20:14 -0500 Subject: [PATCH 07/14] Small bugfixes found during testing --- src/S3Directory.cc | 12 +++++++++--- src/S3FileSystem.cc | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/S3Directory.cc b/src/S3Directory.cc index 5d8defa..31375b5 100644 --- a/src/S3Directory.cc +++ b/src/S3Directory.cc @@ -111,7 +111,9 @@ int S3Directory::Readdir(char *buff, int blen) { return -EBADF; } - memset(m_stat_buf, '\0', sizeof(struct stat)); + if (m_stat_buf) { + memset(m_stat_buf, '\0', sizeof(struct stat)); + } // m_idx encodes the location inside the current directory. // - m_idx in [0, m_objInfo.size) means return a "file" from the object @@ -155,7 +157,9 @@ int S3Directory::Readdir(char *buff, int blen) { m_idx = 0; m_objInfo.clear(); m_commonPrefixes.clear(); - memset(m_stat_buf, '\0', sizeof(struct stat)); + if (m_stat_buf) { + memset(m_stat_buf, '\0', sizeof(struct stat)); + } auto rv = ListS3Dir(m_ct); if (rv != 0) { m_opened = false; @@ -174,7 +178,9 @@ int S3Directory::Readdir(char *buff, int blen) { m_idx = 0; m_objInfo.clear(); m_commonPrefixes.clear(); - memset(m_stat_buf, '\0', sizeof(struct stat)); + if (m_stat_buf) { + memset(m_stat_buf, '\0', sizeof(struct stat)); + } auto rv = ListS3Dir(m_ct); if (rv != 0) { m_opened = false; diff --git a/src/S3FileSystem.cc b/src/S3FileSystem.cc index 9623421..5a33a6e 100644 --- a/src/S3FileSystem.cc +++ b/src/S3FileSystem.cc @@ -219,6 +219,7 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, } auto ai = getS3AccessInfo(exposedPath, object); if (!ai) { + m_log.Log(XrdHTTPServer::Info, "Stat", "Prefix not configured for Stat"); return -ENOENT; } if (ai->getS3BucketName().empty()) { From 3dd0caccfea7f9c80eef53442613f1fa0cb3d486 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 20:20:32 -0500 Subject: [PATCH 08/14] Add helper function for writing files --- src/shortfile.cc | 30 ++++++++++++++++++++++++++++++ src/shortfile.hh | 2 ++ 2 files changed, 32 insertions(+) diff --git a/src/shortfile.cc b/src/shortfile.cc index ce6b022..a2ac97a 100644 --- a/src/shortfile.cc +++ b/src/shortfile.cc @@ -83,3 +83,33 @@ bool readShortFile(const std::string &fileName, std::string &contents) { return true; } + +bool writeShortFile(const std::string &fileName, std::string &contents, int flags) { + int fd = open(fileName.c_str(), O_WRONLY | flags, 0600); + if (fd < 0) { + return false; + } + + auto ptr = &contents[0]; + ssize_t nwrite; + auto nleft = contents.size(); + + while (nleft > 0) { + REISSUE_WRITE: + nwrite = write(fd, ptr, nleft); + if (nwrite < 0) { + /* error happened, ignore if EINTR, otherwise inform the caller */ + if (errno == EINTR) { + goto REISSUE_WRITE; + } + close(fd); + return false; + } + + nleft -= nwrite; + ptr += nwrite; + } + + close(fd); + return true; +} diff --git a/src/shortfile.hh b/src/shortfile.hh index 34af1e8..1d87eb2 100644 --- a/src/shortfile.hh +++ b/src/shortfile.hh @@ -21,3 +21,5 @@ #include bool readShortFile(const std::string &fileName, std::string &contents); + +bool writeShortFile(const std::string &fileName, std::string &contents, int flags); From 3b99873d7c0604c447390165093a2aa7d390b00a Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 20:22:18 -0500 Subject: [PATCH 09/14] Add simple unit test coverage for directory listing For now, this hardcodes a test bucket in OpenData in AWS; while unfortunate to require on an external service, it helps bootstrap the testing process. --- test/s3_tests.cc | 211 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) diff --git a/test/s3_tests.cc b/test/s3_tests.cc index 8b9fdc6..078705f 100644 --- a/test/s3_tests.cc +++ b/test/s3_tests.cc @@ -17,7 +17,10 @@ ***************************************************************/ #include "../src/S3Commands.hh" +#include "../src/S3FileSystem.hh" +#include "../src/shortfile.hh" +#include #include #include #include @@ -62,6 +65,214 @@ TEST(TestS3URLGeneration, Test1) { ASSERT_EQ(generatedHostUrl, "https://s3-service.com:443/test-object"); } +class FileSystemFixtureBase : public testing::Test { + protected: + FileSystemFixtureBase() + : m_log(new XrdSysLogger(2, 0)) // Log to stderr, no log rotation + {} + + void SetUp() override { + setenv("XRDINSTANCE", "xrootd", 1); + char tmp_configfn[] = "/tmp/xrootd-s3-gtest.cfg.XXXXXX"; + auto result = mkstemp(tmp_configfn); + ASSERT_NE(result, -1) << "Failed to create temp file (" + << strerror(errno) << ", errno=" << errno << ")"; + m_configfn = std::string(tmp_configfn); + + auto contents = GetConfig(); + ASSERT_FALSE(contents.empty()); + ASSERT_TRUE(writeShortFile(m_configfn, contents, 0)) + << "Failed to write to temp file (" << strerror(errno) + << ", errno=" << errno << ")"; + } + + void TearDown() override { + if (!m_configfn.empty()) { + auto rv = unlink(m_configfn.c_str()); + ASSERT_EQ(rv, 0) << "Failed to delete temp file (" + << strerror(errno) << ", errno=" << errno << ")"; + } + } + + virtual std::string GetConfig() = 0; + + std::string m_configfn; + std::unique_ptr m_log; +}; + +class FileSystemS3VirtualBucket : public FileSystemFixtureBase { + protected: + virtual std::string GetConfig() override { + return R"( +s3.begin +s3.path_name /test +s3.bucket_name genome-browser +s3.service_name s3.amazonaws.com +s3.region us-east-1 +s3.service_url https://s3.us-east-1.amazonaws.com +s3.url_style virtual +s3.end +)"; + } +}; + +class FileSystemS3VirtualNoBucket : public FileSystemFixtureBase { + protected: + virtual std::string GetConfig() override { + return R"( +s3.begin +s3.path_name /test +s3.service_name s3.amazonaws.com +s3.region us-east-1 +s3.service_url https://s3.us-east-1.amazonaws.com +s3.url_style virtual +s3.end +)"; + } +}; + +class FileSystemS3PathBucket : public FileSystemFixtureBase { + protected: + virtual std::string GetConfig() override { + return R"( +s3.begin +s3.path_name /test +s3.service_name s3.amazonaws.com +s3.region us-east-1 +s3.bucket_name genome-browser +s3.service_url https://s3.us-east-1.amazonaws.com +s3.url_style path +s3.end +)"; + } +}; + +class FileSystemS3PathNoBucket : public FileSystemFixtureBase { + protected: + virtual std::string GetConfig() override { + return R"( +s3.begin +s3.path_name /test +s3.service_name s3.amazonaws.com +s3.region us-east-1 +s3.service_url https://s3.us-east-1.amazonaws.com +s3.url_style path +s3.end +)"; + } +}; + +void TestDirectoryContents(S3FileSystem &fs, const std::string &dirname) { + std::unique_ptr dir(fs.newDir()); + ASSERT_TRUE(dir); + + XrdOucEnv env; + auto rv = dir->Opendir(dirname.c_str(), env); + ASSERT_EQ(rv, 0); + + struct stat buf; + ASSERT_EQ(dir->StatRet(&buf), 0); + + std::vector name; + name.resize(255); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "cellbrowser.json.bak"); + ASSERT_EQ(buf.st_mode & S_IFREG, S_IFREG); + ASSERT_EQ(buf.st_size, 672); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "dataset.json"); + ASSERT_EQ(buf.st_mode & S_IFREG, S_IFREG); + ASSERT_EQ(buf.st_size, 1847); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "desc.json"); + ASSERT_EQ(buf.st_mode & S_IFREG, S_IFREG); + ASSERT_EQ(buf.st_size, 1091); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "all"); + ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "by-organ"); + ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), "func-compart"); + ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); + + rv = dir->Readdir(&name[0], 255); + ASSERT_EQ(rv, 0); + ASSERT_EQ(std::string(&name[0]), ""); + + ASSERT_EQ(dir->Close(), 0); +} + +TEST_F(FileSystemS3VirtualBucket, Create) { + EXPECT_NO_THROW( + { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); }); +} + +TEST_F(FileSystemS3VirtualBucket, Stat) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + struct stat buff; + auto rv = fs.Stat("/test/cells/tabula-sapiens/cellbrowser.json.bak", &buff); + ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; +} + +TEST_F(FileSystemS3VirtualBucket, List) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + TestDirectoryContents(fs, "/test/cells/tabula-sapiens"); +} + +TEST_F(FileSystemS3VirtualNoBucket, Stat) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + struct stat buff; + auto rv = fs.Stat( + "/test/genome-browser/cells/tabula-sapiens/cellbrowser.json.bak", + &buff); + ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; +} + +TEST_F(FileSystemS3VirtualNoBucket, List) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + TestDirectoryContents(fs, "/test/genome-browser/cells/tabula-sapiens"); +} + +TEST_F(FileSystemS3PathBucket, Stat) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + struct stat buff; + auto rv = fs.Stat("/test/cells/tabula-sapiens/cellbrowser.json.bak", &buff); + ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; +} + +TEST_F(FileSystemS3PathBucket, List) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + TestDirectoryContents(fs, "/test/cells/tabula-sapiens"); +} + +TEST_F(FileSystemS3PathNoBucket, Stat) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + struct stat buff; + auto rv = fs.Stat( + "/test/genome-browser/cells/tabula-sapiens/cellbrowser.json.bak", + &buff); + ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; +} + +TEST_F(FileSystemS3PathNoBucket, List) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + TestDirectoryContents(fs, "/test/genome-browser/cells/tabula-sapiens/"); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); From 73af163b89c412a26d3b36cb805fb793edb60129 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 20:23:31 -0500 Subject: [PATCH 10/14] Fixups from clang-format --- src/S3FileSystem.cc | 3 ++- src/shortfile.cc | 3 ++- src/shortfile.hh | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/S3FileSystem.cc b/src/S3FileSystem.cc index 5a33a6e..e78f501 100644 --- a/src/S3FileSystem.cc +++ b/src/S3FileSystem.cc @@ -219,7 +219,8 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, } auto ai = getS3AccessInfo(exposedPath, object); if (!ai) { - m_log.Log(XrdHTTPServer::Info, "Stat", "Prefix not configured for Stat"); + m_log.Log(XrdHTTPServer::Info, "Stat", + "Prefix not configured for Stat"); return -ENOENT; } if (ai->getS3BucketName().empty()) { diff --git a/src/shortfile.cc b/src/shortfile.cc index a2ac97a..257c4ee 100644 --- a/src/shortfile.cc +++ b/src/shortfile.cc @@ -84,7 +84,8 @@ bool readShortFile(const std::string &fileName, std::string &contents) { return true; } -bool writeShortFile(const std::string &fileName, std::string &contents, int flags) { +bool writeShortFile(const std::string &fileName, std::string &contents, + int flags) { int fd = open(fileName.c_str(), O_WRONLY | flags, 0600); if (fd < 0) { return false; diff --git a/src/shortfile.hh b/src/shortfile.hh index 1d87eb2..5c5f81d 100644 --- a/src/shortfile.hh +++ b/src/shortfile.hh @@ -22,4 +22,5 @@ bool readShortFile(const std::string &fileName, std::string &contents); -bool writeShortFile(const std::string &fileName, std::string &contents, int flags); +bool writeShortFile(const std::string &fileName, std::string &contents, + int flags); From ffcf9cacbc91c0bceb851360a278321bb91369e2 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 20:31:28 -0500 Subject: [PATCH 11/14] Fix integer signedness issue on GCC --- test/s3_tests.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/s3_tests.cc b/test/s3_tests.cc index 078705f..40c5164 100644 --- a/test/s3_tests.cc +++ b/test/s3_tests.cc @@ -179,35 +179,35 @@ void TestDirectoryContents(S3FileSystem &fs, const std::string &dirname) { rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "cellbrowser.json.bak"); - ASSERT_EQ(buf.st_mode & S_IFREG, S_IFREG); + ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 672); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "dataset.json"); - ASSERT_EQ(buf.st_mode & S_IFREG, S_IFREG); + ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 1847); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "desc.json"); - ASSERT_EQ(buf.st_mode & S_IFREG, S_IFREG); + ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 1091); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "all"); - ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); + ASSERT_EQ(buf.st_mode & S_IFDIR, static_cast(S_IFDIR)); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "by-organ"); - ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); + ASSERT_EQ(buf.st_mode & S_IFDIR, static_cast(S_IFDIR)); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "func-compart"); - ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); + ASSERT_EQ(buf.st_mode & S_IFDIR, static_cast(S_IFDIR)); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); From 5b21249478a339f199b687b0a00dd87c3fe0facf Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 20:33:29 -0500 Subject: [PATCH 12/14] Fix clang-format issues --- test/s3_tests.cc | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/test/s3_tests.cc b/test/s3_tests.cc index 40c5164..0711c3b 100644 --- a/test/s3_tests.cc +++ b/test/s3_tests.cc @@ -179,35 +179,41 @@ void TestDirectoryContents(S3FileSystem &fs, const std::string &dirname) { rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "cellbrowser.json.bak"); - ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); + ASSERT_EQ(buf.st_mode & S_IFREG, + static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 672); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "dataset.json"); - ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); + ASSERT_EQ(buf.st_mode & S_IFREG, + static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 1847); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "desc.json"); - ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); + ASSERT_EQ(buf.st_mode & S_IFREG, + static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 1091); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "all"); - ASSERT_EQ(buf.st_mode & S_IFDIR, static_cast(S_IFDIR)); + ASSERT_EQ(buf.st_mode & S_IFDIR, + static_cast(S_IFDIR)); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "by-organ"); - ASSERT_EQ(buf.st_mode & S_IFDIR, static_cast(S_IFDIR)); + ASSERT_EQ(buf.st_mode & S_IFDIR, + static_cast(S_IFDIR)); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "func-compart"); - ASSERT_EQ(buf.st_mode & S_IFDIR, static_cast(S_IFDIR)); + ASSERT_EQ(buf.st_mode & S_IFDIR, + static_cast(S_IFDIR)); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); From 11f76cd2df103a734f6c73e2803592f1ab85b20b Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 20:38:39 -0500 Subject: [PATCH 13/14] Add pre-commit hook to prevent clang-format issues --- .pre-commit-config.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3d80c59 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: v18.1.6 + hooks: + - id: clang-format From 772a60e42d17edd97528e848c979941dfe2592d5 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Sat, 8 Jun 2024 20:39:30 -0500 Subject: [PATCH 14/14] Fixups from first run of pre-commit --- .github/workflows/linter.yml | 5 ++--- .github/workflows/test.yml | 12 ++++++------ CMakeLists.txt | 2 +- README.md | 2 +- rpm/xrootd-s3-http.spec | 1 - test/CMakeLists.txt | 2 +- 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 535250d..c41d18b 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -9,7 +9,7 @@ name: Lint # If the linter fails, the PR can still be completed, but none of the linter changes will be made. -on: +on: push: branches: - main @@ -31,7 +31,7 @@ jobs: - name: Check out repository (push) if: ${{ github.event_name == 'push' }} uses: actions/checkout@v3 - + - name: Check out repository (pull_request_target) if: ${{ github.event_name == 'pull_request_target' }} uses: actions/checkout@v3 @@ -57,4 +57,3 @@ jobs: uses: reviewdog/action-suggester@v1 with: tool_name: lint - \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 098289b..b0391b6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,10 +2,10 @@ name: Test on: pull_request: - branches: + branches: - main push: - branches: + branches: - main env: @@ -29,7 +29,7 @@ jobs: - name: install deps run: | sudo apt update && sudo apt-get install -y cmake libcurl4-openssl-dev libcurl4 pkg-config libssl-dev libxrootd-dev libxrootd-server-dev libgtest-dev - + - name: Create Build Environment # Some projects don't allow in-source building, so create a separate build directory # We'll use this as our working directory for all subsequent commands @@ -40,8 +40,8 @@ jobs: # access regardless of the host operating system shell: bash working-directory: ${{runner.workspace}}/build - # Note the current convention is to use the -S and -B options here to specify source - # and build directories, but this is only available with CMake 3.13 and higher. + # Note the current convention is to use the -S and -B options here to specify source + # and build directories, but this is only available with CMake 3.13 and higher. # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DXROOTD_PLUGINS_BUILD_UNITTESTS=yes -DXROOTD_PLUGINS_EXTERNAL_GTEST=${{ matrix.external-gtest }} @@ -54,6 +54,6 @@ jobs: - name: Test working-directory: ${{runner.workspace}}/build shell: bash - # Execute tests defined by the CMake configuration. + # Execute tests defined by the CMake configuration. # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail run: ctest -C $BUILD_TYPE --verbose diff --git a/CMakeLists.txt b/CMakeLists.txt index e806342..2f5ea1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,7 @@ endmacro(use_cxx17) use_cxx17() if( CMAKE_COMPILER_IS_GNUCXX ) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror" ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror" ) endif() if(NOT APPLE) diff --git a/README.md b/README.md index c4ff82c..6df385d 100644 --- a/README.md +++ b/README.md @@ -161,4 +161,4 @@ In a separate terminal, run ``` curl -v http://localhost:1094// -``` \ No newline at end of file +``` diff --git a/rpm/xrootd-s3-http.spec b/rpm/xrootd-s3-http.spec index 297dac1..092ced9 100644 --- a/rpm/xrootd-s3-http.spec +++ b/rpm/xrootd-s3-http.spec @@ -52,4 +52,3 @@ make install DESTDIR=$RPM_BUILD_ROOT * Tue Dec 06 2022 Brian Bockelman - 0.0.1-1 - Initial, "Hello world" version of the S3 filesystem plugin - diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9e81ab3..f06ebe7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,7 +7,7 @@ add_executable( s3-gtest s3_tests.cc ../src/S3FileSystem.cc ../src/shortfile.cc ../src/stl_string_utils.cc - ../src/HTTPCommands.cc + ../src/HTTPCommands.cc ../src/S3Commands.cc )