Skip to content

Commit

Permalink
Merge pull request #1827 from Expensify/main
Browse files Browse the repository at this point in the history
Update expensify_prod branch
  • Loading branch information
iwiznia authored Jul 29, 2024
2 parents 6b2a4f3 + aa77afd commit 69ecfbf
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/bedrock.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
wget -qO - https://package.perforce.com/perforce.pubkey --no-check-certificate | sudo apt-key add -
echo "deb [arch=amd64] https://travis:${{ secrets.TRAVIS_APT_PASSWORD }}@$APT_MIRROR_URL/mirror/ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu focal main" | sudo tee -a /etc/apt/sources.list
sudo apt-get update -y
sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install rsyslog cmake gcc-13 g++-13 libsodium-dev libgpgme11-dev libstdc++-13-dev
sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install rsyslog cmake gcc-13 g++-13 libpcre2-dev libsodium-dev libgpgme11-dev libstdc++-13-dev
sudo locale-gen "en_US.UTF-8"
sudo service rsyslog start
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ INTERMEDIATEDIR = .build

# We use the same library paths and required libraries for all binaries.
LIBPATHS =-L$(PROJECT) -Lmbedtls/library
LIBRARIES =-Wl,--start-group -lbedrock -lstuff -Wl,--end-group -ldl -lpcrecpp -lpthread -lmbedtls -lmbedx509 -lmbedcrypto -lz -lm
LIBRARIES =-Wl,--start-group -lbedrock -lstuff -Wl,--end-group -ldl -lpcrecpp -lpcre2-8 -lpthread -lmbedtls -lmbedx509 -lmbedcrypto -lz -lm

# These targets aren't actual files.
.PHONY: all test clustertest clean testplugin
Expand Down
109 changes: 87 additions & 22 deletions libstuff/libstuff.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@
#endif
#endif

#include <pcrecpp.h> // sudo apt-get install libpcre++-dev
// Setting this default allows us to not have to worry about calling the specific unit size methods, see https://www.pcre.org/current/doc/html/pcre2.html
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h> // sudo apt-get install libpcre2-dev

// Common error definitions
#define S_errno errno
Expand Down Expand Up @@ -511,7 +513,7 @@ string SUnescape(const char* lhs, char escaper) {
else if (utfValue <= 0x07ff) {
// UTF-8 2 byte header is 110.
// 1100.0000 | Top 5 bits of utfValue
char byte = 0xc0 | (utfValue >> 6);
char byte = (char)(0xc0 | (utfValue >> 6));
working += byte;

// Cancel out the bits we just used.
Expand All @@ -524,7 +526,7 @@ string SUnescape(const char* lhs, char escaper) {
else if (utfValue <= 0xffff) {
// UTF-8 3 byte header is 1110.
// 1110.0000 | Top 4 bits of utfValue.
char byte = 0xe0 | (utfValue >> 12);
char byte = (char)(0xe0 | (utfValue >> 12));
working += byte;

// Cancel out the bits we just used.
Expand Down Expand Up @@ -1703,7 +1705,7 @@ string SGUnzip (const string& content) {
return "";
}

strm.avail_in = content.size();
strm.avail_in = (decltype(strm.avail_in))content.size();
strm.next_in = (unsigned char*)content.c_str();

do {
Expand Down Expand Up @@ -2004,14 +2006,12 @@ bool S_recvappend(int s, SFastBuffer& recvBuffer) {

// Keep trying to receive as long as we can
char buffer[4096];
int totalRecv = 0;
ssize_t numRecv = 0;
sockaddr_in fromAddr;
socklen_t fromAddrLen = sizeof(fromAddr);
while ((numRecv = recvfrom(s, buffer, sizeof(buffer), 0, (sockaddr*)&fromAddr, &fromAddrLen)) > 0) {
// Got some more data
recvBuffer.append(buffer, numRecv);
totalRecv += numRecv;

// If this is a blocking socket, don't try again, once is enough
if (blocking) {
Expand Down Expand Up @@ -2108,7 +2108,7 @@ int S_poll(fd_map& fdm, uint64_t timeout) {

// Timeout is specified in microseconds, but poll uses milliseconds, so we divide by 1000.
int timeoutVal = int(timeout / 1000);
int returnValue = poll(&pollvec[0], fdm.size(), timeoutVal);
int returnValue = poll(&pollvec[0], (nfds_t)fdm.size(), timeoutVal);

// And write our returned events back to our original structure.
for (pollfd pfd : pollvec) {
Expand Down Expand Up @@ -2283,7 +2283,7 @@ bool SFileSave(const string& path, const string& buffer) {
// --------------------------------------------------------------------------
bool SFileCopy(const string& fromPath, const string& toPath) {
// Figure out the size of the file we're copying.
uint64_t fromSize = SFileSize(fromPath);
size_t fromSize = SFileSize(fromPath);
if (!fromSize) {
SWARN("File " << fromPath << " is empty! Copying anyway.");
}
Expand All @@ -2307,8 +2307,8 @@ bool SFileCopy(const string& fromPath, const string& toPath) {
// Read and write
char buf[1024 * 64];
size_t numRead = 0;
uint64_t completeBytes = 0;
int completePercent = 0;
size_t completeBytes = 0;
size_t completePercent = 0;
bool readAny = false;
bool writtenAny = false;
while ((numRead = fread(buf, 1, sizeof(buf), from)) > 0) {
Expand All @@ -2325,7 +2325,7 @@ bool SFileCopy(const string& fromPath, const string& toPath) {
SINFO("Wrote first " << numRead << " bytes to " << toPath << ".");
}
completeBytes += numRead;
int percent = fromSize ? ((completeBytes * 100) / fromSize) : 0;
size_t percent = fromSize ? ((completeBytes * 100) / fromSize) : 0;
if (percent > completePercent) {
SINFO("Copying " << fromPath << " to " << toPath << " is " << percent << "% complete.");
completePercent = percent;
Expand Down Expand Up @@ -2398,7 +2398,7 @@ string SHashSHA256(const string& buffer) {

// --------------------------------------------------------------------------

string SEncodeBase64(const unsigned char* buffer, int size) {
string SEncodeBase64(const unsigned char* buffer, size_t size) {
// First, get the required buffer size
size_t olen = 0;
mbedtls_base64_encode(0, 0, &olen, buffer, size);
Expand All @@ -2415,7 +2415,7 @@ string SEncodeBase64(const string& bufferString) {
}

// --------------------------------------------------------------------------
string SDecodeBase64(const unsigned char* buffer, int size) {
string SDecodeBase64(const unsigned char* buffer, size_t size) {
// First, get the required buffer size
size_t olen = 0;
mbedtls_base64_decode(0, 0, &olen, buffer, size);
Expand Down Expand Up @@ -2576,7 +2576,7 @@ int SQuery(sqlite3* db, const char* e, const string& sql, SQResult& result, int6
// Calling strlen() or any function that iterates across the whole string here is a giant performance problem, and all the operations chosen here have
// been picked specifically to avoid that.
size_t maxLength = sql.size() - (statementRemainder - sql.c_str()) + 1;
error = sqlite3_prepare_v2(db, statementRemainder, maxLength, &preparedStatement, &statementRemainder);
error = sqlite3_prepare_v2(db, statementRemainder, (int)maxLength, &preparedStatement, &statementRemainder);
if (isSyncThread) {
prepareTimeUS += STimeNow() - beforePrepare;
}
Expand Down Expand Up @@ -2805,25 +2805,90 @@ bool SIsValidSQLiteDateModifier(const string& modifier) {
return true;
}

bool SREMatch(const string& regExp, const string& s) {
return pcrecpp::RE(regExp, pcrecpp::RE_Options().set_match_limit_recursion(1000)).FullMatch(s);
}
bool SREMatch(const string& regExp, const string& input, bool caseSensitive, bool partialMatch, vector<string>* matches) {
int errornumber = 0;
PCRE2_SIZE erroroffset = 0;
uint32_t matchFlags = 0;

// These require full-string matches as that's the historical way this function works.
uint32_t compileFlags = partialMatch ? 0 : PCRE2_ANCHORED | PCRE2_ENDANCHORED;
if (!caseSensitive) {
compileFlags |= PCRE2_CASELESS;
}
pcre2_code* re = pcre2_compile((PCRE2_SPTR8)regExp.c_str(), PCRE2_ZERO_TERMINATED, compileFlags, &errornumber, &erroroffset, 0);
if (!re) {
STHROW("Bad regex: " + regExp);
}

pcre2_match_context* matchContext = pcre2_match_context_create(0);
pcre2_set_depth_limit(matchContext, 1000);
pcre2_match_data* matchData = pcre2_match_data_create_from_pattern(re, 0);

int result = pcre2_match(re, (PCRE2_SPTR8)input.c_str(), input.size(), 0, matchFlags, matchData, matchContext);

// If the caller wanted to receive matches, figure them out.
if (matches) {
PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(matchData);
int count = pcre2_get_ovector_count(matchData);
for (int i = 0; i < count; ++i) {
PCRE2_SIZE start = ovector[2 * i];
PCRE2_SIZE end = ovector[2 * i + 1];
if (start == PCRE2_UNSET || end == PCRE2_UNSET) {
continue;
}
matches->push_back(input.substr(start, end - start));
}
}

pcre2_code_free(re);
pcre2_match_context_free(matchContext);
pcre2_match_data_free(matchData);

return result > 0;
}

string SREReplace(const string& regExp, const string& input, const string& replacement, bool caseSensitive) {
char* output = nullptr;
size_t outSize = 0;
int errornumber = 0;
PCRE2_SIZE erroroffset = 0;
uint32_t compileFlags = caseSensitive ? 0 : PCRE2_CASELESS;
uint32_t substituteFlags = PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
pcre2_code* re = pcre2_compile((PCRE2_SPTR8)regExp.c_str(), PCRE2_ZERO_TERMINATED, compileFlags, &errornumber, &erroroffset, 0);
if (!re) {
STHROW("Bad regex: " + regExp);
}
pcre2_match_context* matchContext = pcre2_match_context_create(0);
pcre2_set_depth_limit(matchContext, 1000);
for (int i = 0; i < 2; i++) {
int result = pcre2_substitute(re, (PCRE2_SPTR8)input.c_str(), input.size(), 0, substituteFlags, 0, matchContext, (PCRE2_SPTR8)replacement.c_str(), replacement.size(), (PCRE2_UCHAR*)output, &outSize);
if (i == 0 && result == PCRE2_ERROR_NOMEMORY) {
// This is the expected case on the first run, there's not enough space to store the result, so we allocate the space and do it again.
output = (char*)malloc(outSize);
} else if (result) {
SWARN("Regex replacement failed with result " << result << ", returning nothing.");
break;
}
}
string outputString(output);
pcre2_code_free(re);
pcre2_match_context_free(matchContext);
free(output);

bool SREMatch(const string& regExp, const string& s, string& match) {
return pcrecpp::RE(regExp, pcrecpp::RE_Options().set_match_limit_recursion(1000)).FullMatch(s, &match);
return outputString;
}

void SRedactSensitiveValues(string& s) {
// This code removing authTokens is a quick fix and should be removed once https://github.com/Expensify/Expensify/issues/144185 is done.
// The message may be truncated midway through the authToken, so there may not be a closing quote (") at the end of
// the authToken, so we need to optionally match the closing quote with a question mark (?).
pcrecpp::RE("\"authToken\":\".*\"?").GlobalReplace("\"authToken\":<REDACTED>", &s);
s = SREReplace("\"authToken\":\".*\"?", s, "\"authToken\":<REDACTED>");

// Redact queries that contain encrypted fields since there's no value in logging them.
pcrecpp::RE("v[0-9]+:[0-9A-F]{10,}").GlobalReplace("<REDACTED>", &s);
s = SREReplace("v[0-9]+:[0-9A-F]{10,}", s, "<REDACTED>");

// Remove anything inside "html" because we intentionally don't log chats.
pcrecpp::RE("\"html\":\".*\"").GlobalReplace("\"html\":\"<REDACTED>\"", &s);
s = SREReplace("\"html\":\".*\"", s, "\"html\":\"<REDACTED>\"");
}

SStopwatch::SStopwatch() {
Expand Down
16 changes: 8 additions & 8 deletions libstuff/libstuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,19 +303,19 @@ namespace std {
bool is_lock_free() const {
return false;
}
void store(string desired, std::memory_order order = std::memory_order_seq_cst) {
void store(string desired, [[maybe_unused]] std::memory_order order = std::memory_order_seq_cst) {
lock_guard<decltype(m)> l(m);
_string = desired;
};
string load(std::memory_order order = std::memory_order_seq_cst) const {
string load([[maybe_unused]] std::memory_order order = std::memory_order_seq_cst) const {
lock_guard<decltype(m)> l(m);
return _string;
}
operator string() const {
lock_guard<decltype(m)> l(m);
return _string;
}
string exchange(string desired, std::memory_order order = std::memory_order_seq_cst) {
string exchange(string desired, [[maybe_unused]] std::memory_order order = std::memory_order_seq_cst) {
lock_guard<decltype(m)> l(m);
string existing = _string;
_string = desired;
Expand Down Expand Up @@ -387,9 +387,9 @@ bool SEndsWith(const string& haystack, const string& needle);
bool SConstantTimeEquals(const string& secret, const string& userInput);
bool SConstantTimeIEquals(const string& secret, const string& userInput);

// Perform a full regex match. The '^' and '$' symbols are implicit.
bool SREMatch(const string& regExp, const string& s);
bool SREMatch(const string& regExp, const string& s, string& match);
// Unless `partialMatch` is specified, perform a full regex match (the '^' and '$' symbols are implicit).
bool SREMatch(const string& regExp, const string& input, bool caseSensitive = true, bool partialMatch = false, vector<string>* matches = nullptr);
string SREReplace(const string& regExp, const string& input, const string& replacement, bool caseSensitive = true);

// Redact values that should not be logged.
void SRedactSensitiveValues(string& s);
Expand Down Expand Up @@ -549,9 +549,9 @@ string SHashSHA1(const string& buffer);
string SHashSHA256(const string& buffer);

// Various encoding/decoding functions
string SEncodeBase64(const unsigned char* buffer, const int size);
string SEncodeBase64(const unsigned char* buffer, const size_t size);
string SEncodeBase64(const string& buffer);
string SDecodeBase64(const unsigned char* buffer, const int size);
string SDecodeBase64(const unsigned char* buffer, const size_t size);
string SDecodeBase64(const string& buffer);

// HMAC (for use with Amazon S3)
Expand Down
9 changes: 5 additions & 4 deletions plugins/MySQL.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#include "MySQL.h"

#include <pcrecpp.h>

#include <bedrockVersion.h>
#include <libstuff/SQResult.h>

#include <cstring>

#undef SLOGPREFIX
#define SLOGPREFIX "{" << getName() << "} "

Expand Down Expand Up @@ -269,9 +269,10 @@ void BedrockPlugin_MySQL::onPortRecv(STCPManager::Socket* s, SData& request) {
SINFO("Processing query '" << query << "'");

// See if it's asking for a global variable
string varName;
string regExp = "^(?:(?:SELECT\\s+)?@@(?:\\w+\\.)?|SHOW VARIABLES LIKE ')(\\w+).*$";
if (pcrecpp::RE(regExp, pcrecpp::RE_Options().set_caseless(true)).FullMatch(query, &varName)) {
vector<string> matches;
if (SREMatch(regExp, query, false, false, &matches)) {
string varName = matches[0];
// Loop across and look for it
SQResult result;
result.headers.push_back(varName);
Expand Down
14 changes: 7 additions & 7 deletions test/clustertest/testplugin/TestPlugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ class TestPluginCommand : public BedrockCommand {
public:
TestPluginCommand(SQLiteCommand&& baseCommand, BedrockPlugin_TestPlugin* plugin);
~TestPluginCommand();
virtual void prePeek(SQLite& db);
virtual bool peek(SQLite& db);
virtual void process(SQLite& db);
virtual void postProcess(SQLite& db);
virtual bool shouldPrePeek();
virtual bool shouldPostProcess();
virtual void prePeek(SQLite& db) override;
virtual bool peek(SQLite& db) override;
virtual void process(SQLite& db) override;
virtual void postProcess(SQLite& db) override;
virtual bool shouldPrePeek() override;
virtual bool shouldPostProcess() override;
virtual void reset(BedrockCommand::STAGE stage) override;
bool shouldEnableOnPrepareNotification(const SQLite& db, void (**handler)(SQLite& _db, int64_t tableID));
bool shouldEnableOnPrepareNotification(const SQLite& db, void (**handler)(SQLite& _db, int64_t tableID)) override;

private:
BedrockPlugin_TestPlugin& plugin() { return static_cast<BedrockPlugin_TestPlugin&>(*_plugin); }
Expand Down
52 changes: 52 additions & 0 deletions test/tests/LibStuffTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ struct LibStuff : tpunit::TestFixture {
TEST(LibStuff::testBase32Conversion),
TEST(LibStuff::testContains),
TEST(LibStuff::testFirstOfMonth),
TEST(LibStuff::SREMatchTest),
TEST(LibStuff::SREReplaceTest),
TEST(LibStuff::SQResultTest)
)
{ }
Expand Down Expand Up @@ -638,6 +640,56 @@ struct LibStuff : tpunit::TestFixture {
ASSERT_EQUAL(SFirstOfMonth(timeStamp4, -25), "2018-06-01");
}

void SREMatchTest() {
// Basic case.
ASSERT_TRUE(SREMatch(".*cat.*", "this contains cat"));
ASSERT_FALSE(SREMatch(".*cat.*", "this does not"));

// Case sensitive but case doesn't match.
ASSERT_FALSE(SREMatch(".*CAT.*", "this contains cat"));

// Case-insensitive.
ASSERT_TRUE(SREMatch(".*CAT.*", "this contains cat", false));
ASSERT_FALSE(SREMatch(".*CAT.*", "this does not", false));

// Capture groups don't break internal code.
ASSERT_TRUE(SREMatch(".*cat.*", "(this) (contains) (cat)"));
ASSERT_FALSE(SREMatch(".*cat.*", "(this) (does) (not)"));

// Partial matches aren't counted.
ASSERT_FALSE(SREMatch("cat", "this contains cat"));

// Now try with partial specified, should work.
ASSERT_TRUE(SREMatch("cat", "this contains cat", true, true));

// Test returning matches.
vector<string> matches;
SREMatch(R"((\w+) (\w+) (\w+))", "this contains cat", false, false, &matches);

// The whole string, and the three groups.
ASSERT_EQUAL(matches.size(), 4);
ASSERT_EQUAL(matches[0], "this contains cat");
ASSERT_EQUAL(matches[1], "this");
ASSERT_EQUAL(matches[2], "contains");
ASSERT_EQUAL(matches[3], "cat");
}

void SREReplaceTest() {
// This specifically tests multiple replacements and that the final string is longer than the starting string.
string from = "a cat is not a dog it is a cat";
string expected = "a dinosaur is not a dog it is a dinosaur";
string result = SREReplace("cat", from, "dinosaur");
ASSERT_EQUAL(result, expected);

// And test case sensitivity (disabled)
string result2 = SREReplace("CAT", from, "dinosaur");
ASSERT_EQUAL(result2, from);

// And test case sensitivity (enabled)
string result3 = SREReplace("CAT", from, "dinosaur", false);
ASSERT_EQUAL(result3, expected);
}

void SQResultTest() {
SQLite db(":memory:", 1000, 1000, 1);
db.beginTransaction(SQLite::TRANSACTION_TYPE::EXCLUSIVE);
Expand Down

0 comments on commit 69ecfbf

Please sign in to comment.