diff --git a/CMakeLists.txt b/CMakeLists.txt index b29f2e9..43ff635 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,12 +47,19 @@ if(NOT APPLE) SET( CMAKE_MODULE_LINKER_FLAGS "-Wl,--no-undefined") endif() +if( NOT XROOTD_EXTERNAL_TINYXML2 ) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + add_subdirectory(vendor/tinyxml2) +else() + find_package(tinyxml2::tinyxml2) +endif() + include_directories(${XROOTD_INCLUDES} ${CURL_INCLUDE_DIRS} ${LIBCRYPTO_INCLUDE_DIRS}) -add_library(XrdS3 SHARED src/S3File.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) +add_library(XrdS3 SHARED src/S3File.cc src/S3Directory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) add_library(XrdHTTPServer SHARED src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) -target_link_libraries(XrdS3 -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES}) +target_link_libraries(XrdS3 -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES} tinyxml2::tinyxml2) target_link_libraries(XrdHTTPServer -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES}) # The CMake documentation strongly advises against using these macros; instead, the pkg_check_modules diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index 2b3fa51..7b2073e 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -168,6 +168,8 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, const std::string &uri, const std::string &payload) { + m_log.Log(XrdHTTPServer::Debug, "SendRequest", "Sending HTTP request", + uri.c_str()); CURLcode rv = curl_global_init(CURL_GLOBAL_ALL); if (rv != 0) { this->errorCode = "E_CURL_LIB"; @@ -317,20 +319,6 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, CAFile = x509_ca_file; } - if (CAPath.empty()) { - char *soap_ssl_ca_dir = getenv("GAHP_SSL_CADIR"); - if (soap_ssl_ca_dir != NULL) { - CAPath = soap_ssl_ca_dir; - } - } - - if (CAFile.empty()) { - char *soap_ssl_ca_file = getenv("GAHP_SSL_CAFILE"); - if (soap_ssl_ca_file != NULL) { - CAFile = soap_ssl_ca_file; - } - } - if (!CAPath.empty()) { SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_CAPATH, CAPath.c_str()); } diff --git a/src/S3AccessInfo.cc b/src/S3AccessInfo.cc index 8664833..d0351a1 100644 --- a/src/S3AccessInfo.cc +++ b/src/S3AccessInfo.cc @@ -49,3 +49,9 @@ const std::string &S3AccessInfo::getS3SecretKeyFile() const { void S3AccessInfo::setS3SecretKeyFile(const std::string &s3SecretKeyFile) { s3_secret_key_file = s3SecretKeyFile; } + +const std::string &S3AccessInfo::getS3UrlStyle() const { return s3_url_style; } + +void S3AccessInfo::setS3UrlStyle(const std::string &s3UrlStyle) { + s3_url_style = s3UrlStyle; +} diff --git a/src/S3AccessInfo.hh b/src/S3AccessInfo.hh index 2770ee0..b45e112 100644 --- a/src/S3AccessInfo.hh +++ b/src/S3AccessInfo.hh @@ -2,8 +2,7 @@ // Created by Rich Wellner on 2/29/24. // -#ifndef XROOTD_S3_HTTP_S3ACCESSINFO_HH -#define XROOTD_S3_HTTP_S3ACCESSINFO_HH +#pragma once #include @@ -33,6 +32,12 @@ class S3AccessInfo { void setS3SecretKeyFile(const std::string &s3SecretKeyFile); + const std::string &getS3UrlStyle() const; + + void setS3UrlStyle(const std::string &s3UrlStyle); + + const int getS3SignatureVersion() const {return 4;} + private: std::string s3_bucket_name; std::string s3_service_name; @@ -40,6 +45,5 @@ class S3AccessInfo { std::string s3_service_url; std::string s3_access_key_file; std::string s3_secret_key_file; + std::string s3_url_style; }; - -#endif // XROOTD_S3_HTTP_S3ACCESSINFO_HH diff --git a/src/S3Commands.cc b/src/S3Commands.cc index dc12889..ce50a58 100644 --- a/src/S3Commands.cc +++ b/src/S3Commands.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -44,8 +45,6 @@ bool AmazonRequest::SendRequest() { default: this->errorCode = "E_INTERNAL"; this->errorMessage = "Invalid signature version."; - // dprintf( D_ALWAYS, "Invalid signature version (%d), failing.\n", - // signatureVersion ); return false; } } @@ -57,7 +56,8 @@ std::string AmazonRequest::canonicalizeQueryString() { // Takes in the configured `s3.service_url` and uses the bucket/object requested // to generate the host URL, as well as the canonical URI (which is the path to // the object). -bool AmazonRequest::parseURL(const std::string &url, std::string &path) { +bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path, + std::string &path) { auto i = url.find("://"); if (i == std::string::npos) { return false; @@ -76,8 +76,10 @@ bool AmazonRequest::parseURL(const std::string &url, std::string &path) { // for exporting many buckets from a single endpoint. if (bucket.empty()) { path = "/" + object; + bucket_path = "/" + object.substr(0, object.find('/')); } else { path = "/" + bucket + "/" + object; + bucket_path = bucket; } } else { // In virtual-style requests, the host should be determined as @@ -85,6 +87,7 @@ bool AmazonRequest::parseURL(const std::string &url, std::string &path) { // :// up until the last /, but with appended to the front. host = bucket + "." + substring(url, i + 3); path = "/" + object; + bucket_path = "/"; } return true; @@ -136,6 +139,8 @@ bool AmazonRequest::createV4Signature(const std::string &payload, } trim(saKey); } else { + canonicalQueryString = canonicalizeQueryString(); + requiresSignature = false; // If we don't create a signature, it must not be needed... return true; // If there was no saKey, we need not generate a signature @@ -176,14 +181,8 @@ bool AmazonRequest::createV4Signature(const std::string &payload, canonicalURI = pathEncode(canonicalURI); // The canonical query string is the alphabetically sorted list of - // URI-encoded parameter names '=' values, separated by '&'s. That - // wouldn't be hard to do, but we don't need to, since we send - // everything in the POST body, instead. - std::string canonicalQueryString; - - // This function doesn't (currently) support query parameters, - // but no current caller attempts to use them. - assert((httpVerb != "GET") || query_parameters.size() == 0); + // URI-encoded parameter names '=' values, separated by '&'s. + canonicalQueryString = canonicalizeQueryString(); // The canonical headers must include the Host header, so add that // now if we don't have it. @@ -209,7 +208,6 @@ bool AmazonRequest::createV4Signature(const std::string &payload, if (!doSha256(payload, messageDigest, &mdLength)) { this->errorCode = "E_INTERNAL"; this->errorMessage = "Unable to hash payload."; - // dprintf( D_ALWAYS, "Unable to hash payload, failing.\n" ); return false; } std::string payloadHash; @@ -396,10 +394,6 @@ bool AmazonRequest::sendV4Request(const std::string &payload, return false; } - if (!sendContentSHA) { - // dprintf( D_FULLDEBUG, "Payload is '%s'\n", payload.c_str() ); - } - std::string authorizationValue; if (!createV4Signature(payload, authorizationValue, sendContentSHA)) { if (this->errorCode.empty()) { @@ -417,7 +411,12 @@ bool AmazonRequest::sendV4Request(const std::string &payload, headers["Authorization"] = authorizationValue; } - return sendPreparedRequest(protocol, hostUrl, payload); + // This operation is on the bucket itself; alter the URL + auto url = hostUrl; + if (!canonicalQueryString.empty()) { + url += "?" + canonicalQueryString; + } + return sendPreparedRequest(protocol, url, payload); } // It's stated in the API documentation that you can upload to any region @@ -484,3 +483,85 @@ bool AmazonS3Head::SendRequest() { } // --------------------------------------------------------------------------- + +bool AmazonS3List::SendRequest(const std::string &continuationToken, + size_t max_keys) { + query_parameters["list-type"] = "2"; + query_parameters["delimiter"] = "/"; + query_parameters["prefix"] = urlquote(object); + if (!continuationToken.empty()) { + query_parameters["continuation-token"] = urlquote(continuationToken); + } + query_parameters["max-keys"] = std::to_string(max_keys); + httpVerb = "GET"; + + // Operation is on the bucket itself; alter the URL to remove the object + hostUrl = protocol + "://" + host + bucketPath; + + return SendS3Request(""); +} + +bool AmazonS3List::Results(std::vector &objInfo, + std::vector &commonPrefixes, + std::string &ct, std::string &errMsg) { + tinyxml2::XMLDocument doc; + auto err = doc.Parse(resultString.c_str()); + if (err != tinyxml2::XML_SUCCESS) { + errMsg = doc.ErrorStr(); + return false; + } + + auto elem = doc.RootElement(); + if (strcmp(elem->Name(), "ListBucketResult")) { + errMsg = "S3 ListBucket response is not rooted with ListBucketResult " + "element"; + return false; + } + + for (auto child = elem->FirstChildElement(); child != nullptr; + child = child->NextSiblingElement()) { + if (!strcmp(child->Name(), "CommonPrefixes")) { + auto prefix = child->FirstChildElement("Prefix"); + if (prefix != nullptr) { + auto prefixChar = prefix->GetText(); + if (prefixChar != nullptr) { + auto prefixStr = std::string(prefixChar); + trim(prefixStr); + if (!prefixStr.empty()) { + commonPrefixes.emplace_back(prefixStr); + } + } + } + } else if (!strcmp(child->Name(), "Contents")) { + std::string keyStr; + int64_t size; + bool goodSize = false; + auto key = child->FirstChildElement("Key"); + if (key != nullptr) { + auto keyChar = key->GetText(); + if (keyChar != nullptr) { + keyStr = std::string(keyChar); + trim(keyStr); + } + } + auto sizeElem = child->FirstChildElement("Size"); + if (sizeElem != nullptr) { + goodSize = + (sizeElem->QueryInt64Text(&size) == tinyxml2::XML_SUCCESS); + } + if (goodSize && !keyStr.empty()) { + S3ObjectInfo obj; + obj.m_key = keyStr; + obj.m_size = size; + objInfo.emplace_back(obj); + } + } else if (!strcmp(child->Name(), "NextContinuationToken")) { + auto ctChar = child->GetText(); + if (ctChar) { + ct = ctChar; + trim(ct); + } + } + } + return true; +} diff --git a/src/S3Commands.hh b/src/S3Commands.hh index dee72ee..542ed54 100644 --- a/src/S3Commands.hh +++ b/src/S3Commands.hh @@ -18,12 +18,20 @@ #pragma once +#include "S3AccessInfo.hh" #include "HTTPCommands.hh" #include +#include class AmazonRequest : public HTTPRequest { public: + AmazonRequest(const S3AccessInfo &ai, const std::string objectName, XrdSysError &log) + : AmazonRequest(ai.getS3ServiceUrl(), ai.getS3AccessKeyFile(), + ai.getS3SecretKeyFile(), ai.getS3BucketName(), + objectName, ai.getS3UrlStyle(), ai.getS3SignatureVersion(), log) + {} + AmazonRequest(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, int sv, @@ -37,7 +45,7 @@ class AmazonRequest : public HTTPRequest { // "https://my-url.com:443", the bucket is "my-bucket", and the object // is "my-object", then the host will be "my-bucket.my-url.com:443" and // the canonicalURI will be "/my-object". - if (!parseURL(hostUrl, canonicalURI)) { + if (!parseURL(hostUrl, bucketPath, canonicalURI)) { errorCode = "E_INVALID_SERVICE_URL"; errorMessage = "Failed to parse host and canonicalURI from service URL."; @@ -67,7 +75,7 @@ class AmazonRequest : public HTTPRequest { virtual const std::string *getAccessKey() const { return &accessKeyFile; } virtual const std::string *getSecretKey() const { return &secretKeyFile; } - bool parseURL(const std::string &url, std::string &path); + bool parseURL(const std::string &url, std::string &bucket_path, std::string &path); virtual bool SendRequest(); virtual bool SendS3Request(const std::string &payload); @@ -82,6 +90,8 @@ class AmazonRequest : public HTTPRequest { std::string host; std::string canonicalURI; + std::string bucketPath; // Path to use for bucket-level operations (such as listings). May be empty for DNS-style buckets + std::string canonicalQueryString; std::string bucket; std::string object; @@ -103,6 +113,10 @@ class AmazonS3Upload : public AmazonRequest { using AmazonRequest::SendRequest; public: + AmazonS3Upload(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) + : AmazonRequest(ai, objectName, log) + {} + AmazonS3Upload(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, @@ -122,6 +136,10 @@ class AmazonS3Download : public AmazonRequest { using AmazonRequest::SendRequest; public: + AmazonS3Download(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) + : AmazonRequest(ai, objectName, log) + {} + AmazonS3Download(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, @@ -137,6 +155,10 @@ class AmazonS3Head : public AmazonRequest { using AmazonRequest::SendRequest; public: + AmazonS3Head(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) + : AmazonRequest(ai, objectName, log) + {} + AmazonS3Head(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, @@ -147,3 +169,20 @@ class AmazonS3Head : public AmazonRequest { virtual bool SendRequest(); }; + +struct S3ObjectInfo { + size_t m_size; + std::string m_key; +}; + +class AmazonS3List : public AmazonRequest { + public: + AmazonS3List(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) + : AmazonRequest(ai, objectName, log) + {} + + virtual ~AmazonS3List() {} + + bool SendRequest(const std::string &continuationToken, size_t max_keys=1000); + bool Results(std::vector &objInfo, std::vector &commonPrefixes, std::string &ct, std::string &errMsg); +}; diff --git a/src/S3Directory.cc b/src/S3Directory.cc new file mode 100644 index 0000000..76112a8 --- /dev/null +++ b/src/S3Directory.cc @@ -0,0 +1,202 @@ +/*************************************************************** + * + * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +#include "S3Directory.hh" +#include "S3Commands.hh" +#include "logging.hh" +#include "stl_string_utils.hh" + +#include + +#include + +void S3Directory::Reset() { + m_opened = false; + m_ct = ""; + m_idx = 0; + m_objInfo.clear(); + m_commonPrefixes.clear(); + m_stat_buf = nullptr; + m_ai = S3AccessInfo(); + m_object = ""; +} + +int S3Directory::ListS3Dir(const std::string &ct) { + AmazonS3List listCommand(m_ai, m_object, m_log); + auto res = listCommand.SendRequest(ct); + if (!res) { + switch (listCommand.getResponseCode()) { + case 404: + return -ENOENT; + case 500: + return -EIO; + case 403: + return -EPERM; + default: + return -EIO; + } + } + std::string errMsg; + m_idx = 0; + res = listCommand.Results(m_objInfo, m_commonPrefixes, m_ct, errMsg); + if (!res) { + m_log.Log(XrdHTTPServer::Warning, "Opendir", + "Failed to parse S3 results:", errMsg.c_str()); + return -EIO; + } + m_opened = true; + return 0; +} + +int S3Directory::Opendir(const char *path, XrdOucEnv &env) { + if (m_opened) { + return -EBADF; + } + Reset(); + + std::string exposedPath, object; + int rv = m_fs.parsePath(path, exposedPath, object); + if (rv != 0) { + return rv; + } + + auto ai = m_fs.getS3AccessInfo(exposedPath); + if (!ai) { + return -ENOENT; + } + m_ai = *ai; + m_object = object; + + return ListS3Dir(""); +} + +int S3Directory::Readdir(char *buff, int blen) { + if (!m_opened) { + return -EBADF; + } + + memset(m_stat_buf, '\0', sizeof(struct stat)); + + // m_idx encodes the location inside the current directory. + // - m_idx in [0, m_objInfo.size) means return a "file" from the object + // list. + // - m_idx == m_objectInfo.size means return the first entry in the + // directory/common prefix list. + // - m_idx in (m_commonPrefixes.size, -1] means return an entry from the + // common prefix list. + // - m_idx == -m_commonPrefixes.size means that all the path elements have + // been consumed. + // + // If all the paths entry have been consumed, then if the continuation token + // is set, list more objects in the bucket. If it's unset, then we've + // iterated through all the bucket contents. + auto idx = m_idx; + if (m_objInfo.empty() && m_commonPrefixes.empty()) { + *buff = '\0'; + return XrdOssOK; + } else if (idx >= 0 && idx < static_cast(m_objInfo.size())) { + m_idx++; + std::string full_name = m_objInfo[idx].m_key; + full_name.erase(0, full_name.rfind("/")); + trimslashes(full_name); + strncpy(buff, full_name.c_str(), blen); + if (buff[blen - 1] != '\0') { + buff[blen - 1] = '\0'; + return -ENOMEM; + } + if (m_stat_buf) { + m_stat_buf->st_mode = 0x0600 | S_IFREG; + m_stat_buf->st_nlink = 1; + m_stat_buf->st_size = m_objInfo[idx].m_size; + } + } else if (idx < 0 && + -idx == static_cast(m_commonPrefixes.size())) { + if (!m_ct.empty()) { + // Get the next set of results from S3. + m_idx = 0; + m_objInfo.clear(); + m_commonPrefixes.clear(); + memset(&m_stat_buf, '\0', sizeof(struct stat)); + auto rv = ListS3Dir(m_ct); + if (rv != 0) { + m_opened = false; + return rv; + } + // Recurse to parse the fresh results. + return Readdir(buff, blen); + } + *buff = '\0'; + return XrdOssOK; + } else if (idx == static_cast(m_objInfo.size()) || + -idx < static_cast(m_commonPrefixes.size())) { + if (m_commonPrefixes.empty()) { + *buff = '\0'; + return XrdOssOK; + } + if (idx == static_cast(m_objInfo.size())) { + m_idx = -1; + idx = 0; + } else { + idx = -m_idx; + m_idx--; + } + std::string full_name = m_commonPrefixes[idx]; + trimslashes(full_name); + full_name.erase(0, full_name.rfind("/")); + trimslashes(full_name); + strncpy(buff, full_name.c_str(), blen); + if (buff[blen - 1] != '\0') { + buff[blen - 1] = '\0'; + return -ENOMEM; + } + if (m_stat_buf) { + m_stat_buf->st_mode = 0x0700 | S_IFDIR; + m_stat_buf->st_nlink = 0; + m_stat_buf->st_size = 4096; + } + } else { + return -EBADF; + } + + if (m_stat_buf) { + m_stat_buf->st_uid = 1; + m_stat_buf->st_gid = 1; + m_stat_buf->st_mtime = m_stat_buf->st_ctime = m_stat_buf->st_atime = 0; + m_stat_buf->st_dev = 0; + m_stat_buf->st_ino = 1; // If both st_dev and st_ino are 0, then XRootD + // interprets that as an unavailable file. + } + return XrdOssOK; +} + +int S3Directory::StatRet(struct stat *buf) { + if (!m_opened) { + return -EBADF; + } + + m_stat_buf = buf; + return XrdOssOK; +} + +int S3Directory::Close(long long *retsz) { + if (!m_opened) { + return -EBADF; + } + Reset(); + return XrdOssOK; +} diff --git a/src/S3Directory.hh b/src/S3Directory.hh index 3287eda..910b288 100644 --- a/src/S3Directory.hh +++ b/src/S3Directory.hh @@ -19,27 +19,43 @@ #pragma once #include "HTTPDirectory.hh" +#include "S3Commands.hh" +#include "S3FileSystem.hh" -// Leaving in duplicate definitions for now. It remains -// to be seen if we'll need to change these and have specific -// behaviors for either HTTP or S3 variants in the future. +#include +#include + +class XrdSysError; class S3Directory : public HTTPDirectory { public: - S3Directory(XrdSysError &log) - : HTTPDirectory(log) - // m_log(log) + S3Directory(XrdSysError &log, const S3FileSystem &fs) + : HTTPDirectory(log), + m_fs(fs) {} virtual ~S3Directory() {} - virtual int Opendir(const char *path, XrdOucEnv &env) override { - return -ENOSYS; - } + virtual int Opendir(const char *path, XrdOucEnv &env) override; + + int Readdir(char *buff, int blen) override; + + int StatRet(struct stat *statStruct) override; - int Readdir(char *buff, int blen) override { return -ENOSYS; } + int Close(long long *retsz = 0) override; - int StatRet(struct stat *statStruct) override { return -ENOSYS; } + private: + void Reset(); + int ListS3Dir(const std::string &ct); - int Close(long long *retsz = 0) override { return -ENOSYS; } + bool m_opened{false}; + ssize_t m_idx{0}; + std::vector m_objInfo; + std::vector m_commonPrefixes; + std::string m_prefix; + std::string m_ct; + std::string m_object; + const S3FileSystem &m_fs; + S3AccessInfo m_ai; + struct stat *m_stat_buf{nullptr}; }; diff --git a/src/S3File.cc b/src/S3File.cc index 6b7ffce..13a8060 100644 --- a/src/S3File.cc +++ b/src/S3File.cc @@ -48,81 +48,26 @@ XrdVERSIONINFO(XrdOssGetFileSystem, S3); S3File::S3File(XrdSysError &log, S3FileSystem *oss) : m_log(log), m_oss(oss), content_length(0), last_modified(0) {} -int parse_path(const S3FileSystem &fs, const char *fullPath, - std::string &exposedPath, std::string &object) { - // - // Check the path for validity. - // - std::filesystem::path p(fullPath); - auto pathComponents = p.begin(); - - // Iterate through components of the fullPath until we either find a match - // or we've reached the end of the path. - std::filesystem::path currentPath = *pathComponents; - while (pathComponents != p.end()) { - if (fs.exposedPathExists(currentPath.string())) { - exposedPath = currentPath.string(); - break; - } - ++pathComponents; - if (pathComponents != p.end()) { - currentPath /= *pathComponents; - } else { - return -ENOENT; - } - } - - // Objects names may contain path separators. - ++pathComponents; - if (pathComponents == p.end()) { - return -ENOENT; - } - - std::filesystem::path objectPath = *pathComponents++; - for (; pathComponents != p.end(); ++pathComponents) { - objectPath /= (*pathComponents); +int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { + if (m_log.getMsgMask() & XrdHTTPServer::Debug) { + m_log.Log(LogMask::Warning, "S3File::Open", "Opening file", path); } - object = objectPath.string(); - - fprintf(stderr, "object = %s\n", object.c_str()); - - return 0; -} -int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { std::string exposedPath, object; - int rv = parse_path(*m_oss, path, exposedPath, object); + auto rv = m_oss->parsePath(path, exposedPath, object); if (rv != 0) { return rv; } - if (!m_oss->exposedPathExists(exposedPath)) + auto ai = m_oss->getS3AccessInfo(exposedPath); + if (!ai) { return -ENOENT; - - std::string configured_s3_region = m_oss->getS3Region(exposedPath); - std::string configured_s3_service_url = m_oss->getS3ServiceURL(exposedPath); - std::string configured_s3_access_key = - m_oss->getS3AccessKeyFile(exposedPath); - std::string configured_s3_secret_key = - m_oss->getS3SecretKeyFile(exposedPath); - std::string configured_s3_bucket_name = m_oss->getS3BucketName(exposedPath); - std::string configured_s3_url_style = m_oss->getS3URLStyle(); - - // We used to query S3 here to see if the object existed, but of course - // if you're creating a file on upload, you don't care. - - this->s3_object_name = object; - this->s3_bucket_name = configured_s3_bucket_name; - this->s3_service_url = configured_s3_service_url; - this->s3_access_key = configured_s3_access_key; - this->s3_secret_key = configured_s3_secret_key; - this->s3_url_style = configured_s3_url_style; + } + m_ai = *ai; // This flag is not set when it's going to be a read operation // so we check if the file exists in order to be able to return a 404 if (!Oflag) { - AmazonS3Head head(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Head head(m_ai, s3_object_name, m_log); if (!head.SendRequest()) { return -ENOENT; @@ -133,9 +78,7 @@ int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { } ssize_t S3File::Read(void *buffer, off_t offset, size_t size) { - AmazonS3Download download(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Download download(m_ai, s3_object_name, m_log); if (!download.SendRequest(offset, size)) { std::stringstream ss; @@ -151,9 +94,7 @@ ssize_t S3File::Read(void *buffer, off_t offset, size_t size) { } int S3File::Fstat(struct stat *buff) { - AmazonS3Head head(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Head head(m_ai, s3_object_name, m_log); if (!head.SendRequest()) { // SendRequest() returns false for all errors, including ones @@ -218,9 +159,7 @@ int S3File::Fstat(struct stat *buff) { } ssize_t S3File::Write(const void *buffer, off_t offset, size_t size) { - AmazonS3Upload upload(this->s3_service_url, this->s3_access_key, - this->s3_secret_key, this->s3_bucket_name, - this->s3_object_name, this->s3_url_style, m_log); + AmazonS3Upload upload(m_ai, s3_object_name, m_log); std::string payload((char *)buffer, size); if (!upload.SendRequest(payload, offset, size)) { diff --git a/src/S3File.hh b/src/S3File.hh index 33a3244..b930c3a 100644 --- a/src/S3File.hh +++ b/src/S3File.hh @@ -98,12 +98,8 @@ class S3File : public XrdOssDF { XrdSysError &m_log; S3FileSystem *m_oss; - std::string s3_service_url; - std::string s3_bucket_name; std::string s3_object_name; - std::string s3_access_key; - std::string s3_secret_key; - std::string s3_url_style; + S3AccessInfo m_ai; size_t content_length; time_t last_modified; diff --git a/src/S3FileSystem.cc b/src/S3FileSystem.cc index 3a4e939..10d510b 100644 --- a/src/S3FileSystem.cc +++ b/src/S3FileSystem.cc @@ -20,6 +20,7 @@ #include "S3AccessInfo.hh" #include "S3Directory.hh" #include "S3File.hh" +#include "logging.hh" #include "stl_string_utils.hh" #include @@ -27,7 +28,9 @@ #include #include +#include #include +#include #include #include @@ -180,7 +183,7 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { // Object Allocation Functions // XrdOssDF *S3FileSystem::newDir(const char *user) { - return new S3Directory(m_log); + return new S3Directory(m_log, *this); } XrdOssDF *S3FileSystem::newFile(const char *user) { @@ -189,23 +192,91 @@ XrdOssDF *S3FileSystem::newFile(const char *user) { int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, XrdOucEnv *env) { - std::string error; + m_log.Log(XrdHTTPServer::Debug, "Stat", "Stat'ing path", path); - m_log.Emsg("Stat", "Stat'ing path", path); + std::string exposedPath, object; + auto rv = parsePath(path, exposedPath, object); + if (rv != 0) { + return rv; + } + auto ai = getS3AccessInfo(exposedPath); + if (!ai) { + return -ENOENT; + } - S3File s3file(m_log, this); - int rv = s3file.Open(path, 0, (mode_t)0, *env); - if (rv) { - m_log.Emsg("Stat", "Failed to open path:", path); + trimslashes(object); + AmazonS3List listCommand(*ai, object, m_log); + auto res = listCommand.SendRequest(""); + if (!res) { + if (m_log.getMsgMask() & XrdHTTPServer::Info) { + std::stringstream ss; + ss << "Failed to stat path " << path << "; response code " + << listCommand.getResponseCode(); + m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); + } + switch (listCommand.getResponseCode()) { + case 404: + return -ENOENT; + case 500: + return -EIO; + case 403: + return -EPERM; + default: + return -EIO; + } } - // Assume that S3File::FStat() doesn't write to buff unless it succeeds. - rv = s3file.Fstat(buff); - if (rv != 0) { - formatstr(error, "File %s not found.", path); - m_log.Emsg("Stat", error.c_str()); + + std::string errMsg; + std::vector objInfo; + std::vector commonPrefixes; + std::string ct; + res = listCommand.Results(objInfo, commonPrefixes, ct, errMsg); + if (!res) { + m_log.Log(XrdHTTPServer::Warning, "Stat", + "Failed to parse S3 results:", errMsg.c_str()); + return -EIO; + } + + bool foundObj = false; + size_t objSize = 0; + for (const auto &obj : objInfo) { + if (obj.m_key == object) { + foundObj = true; + objSize = obj.m_size; + break; + } + } + if (foundObj) { + buff->st_mode = 0600 | S_IFREG; + buff->st_nlink = 1; + buff->st_uid = buff->st_gid = 1; + buff->st_size = objSize; + buff->st_mtime = buff->st_atime = buff->st_ctime = 0; + buff->st_dev = 0; + buff->st_ino = 1; + return 0; + } + + auto desiredPrefix = object + "/"; + bool foundPrefix = false; + for (const auto &prefix : commonPrefixes) { + if (prefix == desiredPrefix) { + foundPrefix = true; + break; + } + } + if (!foundPrefix) { return -ENOENT; } + buff->st_mode = 0700 | S_IFDIR; + buff->st_nlink = 0; + buff->st_uid = 1; + buff->st_gid = 1; + buff->st_size = 4096; + buff->st_mtime = buff->st_atime = buff->st_ctime = 0; + buff->st_dev = 0; + buff->st_ino = 1; return 0; } @@ -213,7 +284,7 @@ int S3FileSystem::Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts) { // Is path valid? std::string exposedPath, object; - int rv = parse_path(*this, path, exposedPath, object); + int rv = parsePath(path, exposedPath, object); if (rv != 0) { return rv; } @@ -227,3 +298,42 @@ int S3FileSystem::Create(const char *tid, const char *path, mode_t mode, return 0; } + +int S3FileSystem::parsePath(const char *fullPath, std::string &exposedPath, + std::string &object) const { + // + // Check the path for validity. + // + std::filesystem::path p(fullPath); + auto pathComponents = p.begin(); + + // Iterate through components of the fullPath until we either find a match + // or we've reached the end of the path. + std::filesystem::path currentPath = *pathComponents; + while (pathComponents != p.end()) { + if (exposedPathExists(currentPath.string())) { + exposedPath = currentPath.string(); + break; + } + ++pathComponents; + if (pathComponents != p.end()) { + currentPath /= *pathComponents; + } else { + return -ENOENT; + } + } + + // Objects names may contain path separators. + ++pathComponents; + if (pathComponents == p.end()) { + return -ENOENT; + } + + std::filesystem::path objectPath = *pathComponents++; + for (; pathComponents != p.end(); ++pathComponents) { + objectPath /= (*pathComponents); + } + object = objectPath.string(); + + return 0; +} diff --git a/src/S3FileSystem.hh b/src/S3FileSystem.hh index 5cdee6a..41d70f3 100644 --- a/src/S3FileSystem.hh +++ b/src/S3FileSystem.hh @@ -101,6 +101,13 @@ class S3FileSystem : public XrdOss { return nullptr; } + // Given a path as seen by XRootD, split it into the configured prefix and the object + // within the prefix. + // + // The returned `exposedPath` can be later used with the `get*` functions to fetch + // the required S3 configuration. + int parsePath(const char *fullPath, std::string &exposedPath, std::string &object) const; + bool exposedPathExists(const std::string &exposedPath) const { return s3_access_map.count(exposedPath) > 0; } @@ -126,6 +133,11 @@ class S3FileSystem : public XrdOss { } const std::string &getS3URLStyle() const { return s3_url_style; } + const S3AccessInfo * + getS3AccessInfo(const std::string &exposedPath) const { + return s3_access_map.at(exposedPath); + } + private: XrdOucEnv *m_env; XrdSysError m_log; diff --git a/src/stl_string_utils.cc b/src/stl_string_utils.cc index 4623b6e..9c3e3dc 100644 --- a/src/stl_string_utils.cc +++ b/src/stl_string_utils.cc @@ -146,3 +146,40 @@ int formatstr_cat(std::string &s, const char *format, ...) { va_end(args); return r; } + +std::string urlquote(const std::string input) { + std::string output; + output.reserve(3 * input.size()); + for (char val : input) { + if ((val >= 48 || val <= 57) || // Digits 0-9 + (val >= 65 || val <= 90) || // Uppercase A-Z + (val >= 97 || val <= 122) || // Lowercase a-z + (val == 95 || val == 46 || val == 45 || val == 126 || + val == 47)) // '_.-~/' + { + output += val; + } else { + output += "%" + std::to_string(val); + } + } + return output; +} + +void trimslashes(std::string &path) { + if (path.empty()) { + return; + } + size_t begin = 0; + while (begin < path.length() && (path[begin] == '/')) { + ++begin; + } + + auto end = path.length() - 1; + while (end >= 0 && end >= begin && (path[end] == '/')) { + --end; + } + + if (begin != 0 || end != (path.length()) - 1) { + path = path.substr(begin, (end - begin) + 1); + } +} diff --git a/src/stl_string_utils.hh b/src/stl_string_utils.hh index 9222f52..d119e4e 100644 --- a/src/stl_string_utils.hh +++ b/src/stl_string_utils.hh @@ -37,3 +37,19 @@ int formatstr(std::string &s, const char *format, ...) CHECK_PRINTF_FORMAT(2, 3); int formatstr_cat(std::string &s, const char *format, ...) CHECK_PRINTF_FORMAT(2, 3); + +// Given an input string, quote it to a form that is safe +// for embedding in a URL query parameter. +// +// Letters, digits, and the characters '_.-~/' are never +// quoted; otherwise, the byte is represented with its percent-encoded +// ASCII representation (e.g., ' ' becomes %20) +std::string urlquote(const std::string input); + +// Trim the slash(es) from a given object name +// +// foo/bar/ -> foo/bar +// bar/baz -> bar/baz +// foo/bar/// -> foo/bar +// /a/b -> a/b +void trimslashes(std::string &path); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 438758c..7f5cd1a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,6 +2,7 @@ add_executable( s3-gtest s3_tests.cc ../src/AWSv4-impl.cc ../src/logging.cc ../src/S3AccessInfo.cc + ../src/S3Directory.cc ../src/S3File.cc ../src/S3FileSystem.cc ../src/shortfile.cc diff --git a/vendor/tinyxml2 b/vendor/tinyxml2 new file mode 160000 index 0000000..312a809 --- /dev/null +++ b/vendor/tinyxml2 @@ -0,0 +1 @@ +Subproject commit 312a8092245df393db14a0b2427457ed2ba75e1b