Skip to content

Commit

Permalink
Implement directory listings for S3 class
Browse files Browse the repository at this point in the history
Adds a new request type, list, and interprets the results as a
directory listing.
  • Loading branch information
bbockelm committed May 23, 2024
1 parent 9403474 commit 2ec6cca
Show file tree
Hide file tree
Showing 16 changed files with 597 additions and 142 deletions.
11 changes: 9 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,19 @@ if(NOT APPLE)
SET( CMAKE_MODULE_LINKER_FLAGS "-Wl,--no-undefined")
endif()

if( NOT XROOTD_EXTERNAL_TINYXML2 )
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(vendor/tinyxml2)
else()
find_package(tinyxml2::tinyxml2)
endif()

include_directories(${XROOTD_INCLUDES} ${CURL_INCLUDE_DIRS} ${LIBCRYPTO_INCLUDE_DIRS})

add_library(XrdS3 SHARED src/S3File.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc)
add_library(XrdS3 SHARED src/S3File.cc src/S3Directory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc)
add_library(XrdHTTPServer SHARED src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc)

target_link_libraries(XrdS3 -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES})
target_link_libraries(XrdS3 -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES} tinyxml2::tinyxml2)
target_link_libraries(XrdHTTPServer -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES})

# The CMake documentation strongly advises against using these macros; instead, the pkg_check_modules
Expand Down
16 changes: 2 additions & 14 deletions src/HTTPCommands.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol,
const std::string &uri,
const std::string &payload) {

m_log.Log(XrdHTTPServer::Debug, "SendRequest", "Sending HTTP request",
uri.c_str());
CURLcode rv = curl_global_init(CURL_GLOBAL_ALL);
if (rv != 0) {
this->errorCode = "E_CURL_LIB";
Expand Down Expand Up @@ -317,20 +319,6 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol,
CAFile = x509_ca_file;
}

if (CAPath.empty()) {
char *soap_ssl_ca_dir = getenv("GAHP_SSL_CADIR");
if (soap_ssl_ca_dir != NULL) {
CAPath = soap_ssl_ca_dir;
}
}

if (CAFile.empty()) {
char *soap_ssl_ca_file = getenv("GAHP_SSL_CAFILE");
if (soap_ssl_ca_file != NULL) {
CAFile = soap_ssl_ca_file;
}
}

if (!CAPath.empty()) {
SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_CAPATH, CAPath.c_str());
}
Expand Down
6 changes: 6 additions & 0 deletions src/S3AccessInfo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,9 @@ const std::string &S3AccessInfo::getS3SecretKeyFile() const {
void S3AccessInfo::setS3SecretKeyFile(const std::string &s3SecretKeyFile) {
s3_secret_key_file = s3SecretKeyFile;
}

const std::string &S3AccessInfo::getS3UrlStyle() const { return s3_url_style; }

void S3AccessInfo::setS3UrlStyle(const std::string &s3UrlStyle) {
s3_url_style = s3UrlStyle;
}
12 changes: 8 additions & 4 deletions src/S3AccessInfo.hh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
// Created by Rich Wellner on 2/29/24.
//

#ifndef XROOTD_S3_HTTP_S3ACCESSINFO_HH
#define XROOTD_S3_HTTP_S3ACCESSINFO_HH
#pragma once

#include <string>

Expand Down Expand Up @@ -33,13 +32,18 @@ class S3AccessInfo {

void setS3SecretKeyFile(const std::string &s3SecretKeyFile);

const std::string &getS3UrlStyle() const;

void setS3UrlStyle(const std::string &s3UrlStyle);

const int getS3SignatureVersion() const {return 4;}

private:
std::string s3_bucket_name;
std::string s3_service_name;
std::string s3_region;
std::string s3_service_url;
std::string s3_access_key_file;
std::string s3_secret_key_file;
std::string s3_url_style;
};

#endif // XROOTD_S3_HTTP_S3ACCESSINFO_HH
115 changes: 98 additions & 17 deletions src/S3Commands.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <XrdSys/XrdSysError.hh>
#include <curl/curl.h>
#include <openssl/hmac.h>
#include <tinyxml2.h>

#include <algorithm>
#include <cassert>
Expand All @@ -44,8 +45,6 @@ bool AmazonRequest::SendRequest() {
default:
this->errorCode = "E_INTERNAL";
this->errorMessage = "Invalid signature version.";
// dprintf( D_ALWAYS, "Invalid signature version (%d), failing.\n",
// signatureVersion );
return false;
}
}
Expand All @@ -57,7 +56,8 @@ std::string AmazonRequest::canonicalizeQueryString() {
// Takes in the configured `s3.service_url` and uses the bucket/object requested
// to generate the host URL, as well as the canonical URI (which is the path to
// the object).
bool AmazonRequest::parseURL(const std::string &url, std::string &path) {
bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path,
std::string &path) {
auto i = url.find("://");
if (i == std::string::npos) {
return false;
Expand All @@ -76,15 +76,18 @@ bool AmazonRequest::parseURL(const std::string &url, std::string &path) {
// for exporting many buckets from a single endpoint.
if (bucket.empty()) {
path = "/" + object;
bucket_path = "/" + object.substr(0, object.find('/'));
} else {
path = "/" + bucket + "/" + object;
bucket_path = bucket;
}
} else {
// In virtual-style requests, the host should be determined as
// everything between
// :// up until the last /, but with <bucket> appended to the front.
host = bucket + "." + substring(url, i + 3);
path = "/" + object;
bucket_path = "/";
}

return true;
Expand Down Expand Up @@ -136,6 +139,8 @@ bool AmazonRequest::createV4Signature(const std::string &payload,
}
trim(saKey);
} else {
canonicalQueryString = canonicalizeQueryString();

requiresSignature =
false; // If we don't create a signature, it must not be needed...
return true; // If there was no saKey, we need not generate a signature
Expand Down Expand Up @@ -176,14 +181,8 @@ bool AmazonRequest::createV4Signature(const std::string &payload,
canonicalURI = pathEncode(canonicalURI);

// The canonical query string is the alphabetically sorted list of
// URI-encoded parameter names '=' values, separated by '&'s. That
// wouldn't be hard to do, but we don't need to, since we send
// everything in the POST body, instead.
std::string canonicalQueryString;

// This function doesn't (currently) support query parameters,
// but no current caller attempts to use them.
assert((httpVerb != "GET") || query_parameters.size() == 0);
// URI-encoded parameter names '=' values, separated by '&'s.
canonicalQueryString = canonicalizeQueryString();

// The canonical headers must include the Host header, so add that
// now if we don't have it.
Expand All @@ -209,7 +208,6 @@ bool AmazonRequest::createV4Signature(const std::string &payload,
if (!doSha256(payload, messageDigest, &mdLength)) {
this->errorCode = "E_INTERNAL";
this->errorMessage = "Unable to hash payload.";
// dprintf( D_ALWAYS, "Unable to hash payload, failing.\n" );
return false;
}
std::string payloadHash;
Expand Down Expand Up @@ -396,10 +394,6 @@ bool AmazonRequest::sendV4Request(const std::string &payload,
return false;
}

if (!sendContentSHA) {
// dprintf( D_FULLDEBUG, "Payload is '%s'\n", payload.c_str() );
}

std::string authorizationValue;
if (!createV4Signature(payload, authorizationValue, sendContentSHA)) {
if (this->errorCode.empty()) {
Expand All @@ -417,7 +411,12 @@ bool AmazonRequest::sendV4Request(const std::string &payload,
headers["Authorization"] = authorizationValue;
}

return sendPreparedRequest(protocol, hostUrl, payload);
// This operation is on the bucket itself; alter the URL
auto url = hostUrl;
if (!canonicalQueryString.empty()) {
url += "?" + canonicalQueryString;
}
return sendPreparedRequest(protocol, url, payload);
}

// It's stated in the API documentation that you can upload to any region
Expand Down Expand Up @@ -484,3 +483,85 @@ bool AmazonS3Head::SendRequest() {
}

// ---------------------------------------------------------------------------

bool AmazonS3List::SendRequest(const std::string &continuationToken,
size_t max_keys) {
query_parameters["list-type"] = "2";
query_parameters["delimiter"] = "/";
query_parameters["prefix"] = urlquote(object);
if (!continuationToken.empty()) {
query_parameters["continuation-token"] = urlquote(continuationToken);
}
query_parameters["max-keys"] = std::to_string(max_keys);
httpVerb = "GET";

// Operation is on the bucket itself; alter the URL to remove the object
hostUrl = protocol + "://" + host + bucketPath;

return SendS3Request("");
}

bool AmazonS3List::Results(std::vector<S3ObjectInfo> &objInfo,
std::vector<std::string> &commonPrefixes,
std::string &ct, std::string &errMsg) {
tinyxml2::XMLDocument doc;
auto err = doc.Parse(resultString.c_str());
if (err != tinyxml2::XML_SUCCESS) {
errMsg = doc.ErrorStr();
return false;
}

auto elem = doc.RootElement();
if (strcmp(elem->Name(), "ListBucketResult")) {
errMsg = "S3 ListBucket response is not rooted with ListBucketResult "
"element";
return false;
}

for (auto child = elem->FirstChildElement(); child != nullptr;
child = child->NextSiblingElement()) {
if (!strcmp(child->Name(), "CommonPrefixes")) {
auto prefix = child->FirstChildElement("Prefix");
if (prefix != nullptr) {
auto prefixChar = prefix->GetText();
if (prefixChar != nullptr) {
auto prefixStr = std::string(prefixChar);
trim(prefixStr);
if (!prefixStr.empty()) {
commonPrefixes.emplace_back(prefixStr);
}
}
}
} else if (!strcmp(child->Name(), "Contents")) {
std::string keyStr;
int64_t size;
bool goodSize = false;
auto key = child->FirstChildElement("Key");
if (key != nullptr) {
auto keyChar = key->GetText();
if (keyChar != nullptr) {
keyStr = std::string(keyChar);
trim(keyStr);
}
}
auto sizeElem = child->FirstChildElement("Size");
if (sizeElem != nullptr) {
goodSize =
(sizeElem->QueryInt64Text(&size) == tinyxml2::XML_SUCCESS);
}
if (goodSize && !keyStr.empty()) {
S3ObjectInfo obj;
obj.m_key = keyStr;
obj.m_size = size;
objInfo.emplace_back(obj);
}
} else if (!strcmp(child->Name(), "NextContinuationToken")) {
auto ctChar = child->GetText();
if (ctChar) {
ct = ctChar;
trim(ct);
}
}
}
return true;
}
43 changes: 41 additions & 2 deletions src/S3Commands.hh
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,20 @@

#pragma once

#include "S3AccessInfo.hh"
#include "HTTPCommands.hh"

#include <string>
#include <vector>

class AmazonRequest : public HTTPRequest {
public:
AmazonRequest(const S3AccessInfo &ai, const std::string objectName, XrdSysError &log)
: AmazonRequest(ai.getS3ServiceUrl(), ai.getS3AccessKeyFile(),
ai.getS3SecretKeyFile(), ai.getS3BucketName(),
objectName, ai.getS3UrlStyle(), ai.getS3SignatureVersion(), log)
{}

AmazonRequest(const std::string &s, const std::string &akf,
const std::string &skf, const std::string &b,
const std::string &o, const std::string &style, int sv,
Expand All @@ -37,7 +45,7 @@ class AmazonRequest : public HTTPRequest {
// "https://my-url.com:443", the bucket is "my-bucket", and the object
// is "my-object", then the host will be "my-bucket.my-url.com:443" and
// the canonicalURI will be "/my-object".
if (!parseURL(hostUrl, canonicalURI)) {
if (!parseURL(hostUrl, bucketPath, canonicalURI)) {
errorCode = "E_INVALID_SERVICE_URL";
errorMessage =
"Failed to parse host and canonicalURI from service URL.";
Expand Down Expand Up @@ -67,7 +75,7 @@ class AmazonRequest : public HTTPRequest {
virtual const std::string *getAccessKey() const { return &accessKeyFile; }
virtual const std::string *getSecretKey() const { return &secretKeyFile; }

bool parseURL(const std::string &url, std::string &path);
bool parseURL(const std::string &url, std::string &bucket_path, std::string &path);

virtual bool SendRequest();
virtual bool SendS3Request(const std::string &payload);
Expand All @@ -82,6 +90,8 @@ class AmazonRequest : public HTTPRequest {

std::string host;
std::string canonicalURI;
std::string bucketPath; // Path to use for bucket-level operations (such as listings). May be empty for DNS-style buckets
std::string canonicalQueryString;

std::string bucket;
std::string object;
Expand All @@ -103,6 +113,10 @@ class AmazonS3Upload : public AmazonRequest {
using AmazonRequest::SendRequest;

public:
AmazonS3Upload(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log)
: AmazonRequest(ai, objectName, log)
{}

AmazonS3Upload(const std::string &s, const std::string &akf,
const std::string &skf, const std::string &b,
const std::string &o, const std::string &style,
Expand All @@ -122,6 +136,10 @@ class AmazonS3Download : public AmazonRequest {
using AmazonRequest::SendRequest;

public:
AmazonS3Download(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log)
: AmazonRequest(ai, objectName, log)
{}

AmazonS3Download(const std::string &s, const std::string &akf,
const std::string &skf, const std::string &b,
const std::string &o, const std::string &style,
Expand All @@ -137,6 +155,10 @@ class AmazonS3Head : public AmazonRequest {
using AmazonRequest::SendRequest;

public:
AmazonS3Head(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log)
: AmazonRequest(ai, objectName, log)
{}

AmazonS3Head(const std::string &s, const std::string &akf,
const std::string &skf, const std::string &b,
const std::string &o, const std::string &style,
Expand All @@ -147,3 +169,20 @@ class AmazonS3Head : public AmazonRequest {

virtual bool SendRequest();
};

struct S3ObjectInfo {
size_t m_size;
std::string m_key;
};

class AmazonS3List : public AmazonRequest {
public:
AmazonS3List(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log)
: AmazonRequest(ai, objectName, log)
{}

virtual ~AmazonS3List() {}

bool SendRequest(const std::string &continuationToken, size_t max_keys=1000);
bool Results(std::vector<S3ObjectInfo> &objInfo, std::vector<std::string> &commonPrefixes, std::string &ct, std::string &errMsg);
};
Loading

0 comments on commit 2ec6cca

Please sign in to comment.