Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(udf): support datediff dates before 1900 #3499

Merged
merged 1 commit into from
Oct 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 37 additions & 5 deletions hybridse/src/codegen/udf_ir_builder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1078,21 +1078,30 @@ TEST_F(UdfIRBuilderTest, DateDiff) {
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 44924, "2022-12-31", "1900-01-01");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 50, "20220620",
"2022-05-01 11:11:11");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 0, "2022-05-01", "20220501");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 0,
"2022-05-01", "20220501");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "2022-02-29", "20220501");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1899-05-20",
"2020-05-20");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 9, "1899-05-20", "1899-05-11");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "2022-05-40",
"2020-05-20");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "2020-05-20",
"1899-05-20");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, -30, "1199-10-12", "1199-11-11");
// rfc3399 full format
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(
func_name, 20, "2000-01-01t00:12:00.1+08:00", "1999-12-12T12:12:12+08:00");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(
func_name, 19, "2000-01-01t00:12:00.1+08:00", "1999-12-12T20:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(
func_name, 20, "2000-01-01t06:12:00.1+08:00", "1999-12-12T12:12:12Z");

CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, nullptr, "20220501");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "2022-05-01", nullptr);
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, nullptr, nullptr);

// mix types
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<Date>>(func_name, -19, "2022-05-01", Date(2022, 5, 20));
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, 19, Date(2022, 5, 20), "2022-05-01");
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, 3, Date(1900, 1, 1), "1899-12-29");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<Date>>(func_name, -3, "1899-12-29", Date(1900, 1, 1));
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, nullptr, nullptr, "2022-05-01");
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, nullptr, Date(2022, 5, 20), nullptr);
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, nullptr, nullptr, nullptr);
Expand All @@ -1101,6 +1110,29 @@ TEST_F(UdfIRBuilderTest, DateDiff) {
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<Date>>(func_name, nullptr, nullptr, nullptr);
}

TEST_F(UdfIRBuilderTest, DateDiffNull) {
auto func_name = "datediff";

// out-of-range format
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1900-01-00",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1977-13-01",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "19771232",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T25:12:12Z",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T12:66:12Z",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T12:00:61Z",
"1999-12-12T12:12:12Z");

// invalid format
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T12:12:12Z",
"202 2-12-2 9");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T12:12:12Z",
"12:30:30");
}

class UdfIRCastTest : public ::testing::TestWithParam<std::pair<absl::string_view, Nullable<int64_t>>> {};

Expand Down
28 changes: 12 additions & 16 deletions hybridse/src/udf/default_udf_library.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2591,16 +2591,21 @@ void DefaultUdfLibrary::InitTimeAndDateUdf() {
});

RegisterExternal("datediff")
.args<Date, Date>(reinterpret_cast<void*>(static_cast<void (*)(Date*, Date*, int32_t*, bool*)>(v1::date_diff)))
.return_by_arg(true)
.returns<Nullable<int32_t>>()
.args<Date, Date>(static_cast<void (*)(Date*, Date*, int32_t*, bool*)>(v1::date_diff))
.doc(R"(
@brief days difference from date1 to date2
Supported date string style:
- yyyy-mm-dd
- yyyymmdd
- yyyy-mm-dd hh:mm:ss
- yyyy-mm-dd HH:MM:SS
- yyyy-mm-ddTHH:MM:SS.fff+HH:MM (RFC3399 format)
Dates from string are transformed into the same time zone (which is currently always UTC+8) before differentiation,
dates from date type by default is at UTC+8, you may see a +1/-1 difference if the two date string have different time zones.
Hint: since openmldb date type limits range from year 1900, to datadiff from/to a date before
1900, pass it as string.
Example:
Expand All @@ -2614,20 +2619,11 @@ void DefaultUdfLibrary::InitTimeAndDateUdf() {
@endcode
@since 0.7.0)");
RegisterExternal("datediff")
.args<StringRef, StringRef>(
reinterpret_cast<void*>(static_cast<void (*)(StringRef*, StringRef*, int32_t*, bool*)>(v1::date_diff)))
.return_by_arg(true)
.returns<Nullable<int32_t>>();
.args<StringRef, StringRef>(static_cast<void (*)(StringRef*, StringRef*, int32_t*, bool*)>(v1::date_diff));
RegisterExternal("datediff")
.args<StringRef, Date>(
reinterpret_cast<void*>(static_cast<void (*)(StringRef*, Date*, int32_t*, bool*)>(v1::date_diff)))
.return_by_arg(true)
.returns<Nullable<int32_t>>();
.args<StringRef, Date>(static_cast<void (*)(StringRef*, Date*, int32_t*, bool*)>(v1::date_diff));
RegisterExternal("datediff")
.args<Date, StringRef>(
reinterpret_cast<void*>(static_cast<void (*)(Date*, StringRef*, int32_t*, bool*)>(v1::date_diff)))
.return_by_arg(true)
.returns<Nullable<int32_t>>();
.args<Date, StringRef>(static_cast<void (*)(Date*, StringRef*, int32_t*, bool*)>(v1::date_diff));

RegisterExternal("unix_timestamp")
.args<Date>(reinterpret_cast<void*>(static_cast<void (*)(Date*, int64_t*, bool*)>(v1::date_to_unix_timestamp)))
Expand Down
106 changes: 79 additions & 27 deletions hybridse/src/udf/udf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include "udf/udf.h"

#include <absl/time/time.h>
#include <stdint.h>
#include <time.h>

Expand All @@ -28,6 +27,7 @@
#include "absl/strings/ascii.h"
#include "absl/strings/str_replace.h"
#include "absl/time/civil_time.h"
#include "absl/time/time.h"
#include "base/iterator.h"
#include "boost/date_time.hpp"
#include "boost/date_time/gregorian/parsers.hpp"
Expand All @@ -37,7 +37,7 @@
#include "codec/row.h"
#include "codec/type_codec.h"
#include "codegen/fn_ir_builder.h"
#include "farmhash.h" // NOLINT
#include "farmhash.h"
#include "node/node_manager.h"
#include "node/sql_node.h"
#include "re2/re2.h"
Expand All @@ -57,6 +57,20 @@ using openmldb::base::StringRef;
using openmldb::base::Timestamp;
using openmldb::base::Date;

// strftime()-like formatting options with extensions
// ref absl::FormatTime
static constexpr char DATE_FMT_YMD_1[] = "%E4Y-%m-%d";
static constexpr char DATE_FMT_YMD_2[] = "%E4Y%m%d";
static constexpr char DATE_FMT_YMDHMS[] = "%E4Y-%m-%d %H:%M:%S";
static constexpr char DATE_FMT_RF3399_FULL[] = "%Y-%m-%d%ET%H:%M:%E*S%Ez";

// TODO(chenjing): 时区统一配置
static constexpr int32_t TZ = 8;
static const absl::TimeZone DEFAULT_TZ = absl::FixedTimeZone(TZ * 60 * 60);
static constexpr time_t TZ_OFFSET = TZ * 3600000;
static constexpr int MAX_ALLOC_SIZE = 2 * 1024 * 1024; // 2M
bthread_key_t B_THREAD_LOCAL_MEM_POOL_KEY;

void hex(StringRef *str, StringRef *output) {
std::ostringstream ss;
for (uint32_t i=0; i < str->size_; i++) {
Expand Down Expand Up @@ -104,12 +118,6 @@ void unhex(StringRef *str, StringRef *output, bool* is_null) {
}
}

// TODO(chenjing): 时区统一配置
constexpr int32_t TZ = 8;
constexpr time_t TZ_OFFSET = TZ * 3600000;
constexpr int MAX_ALLOC_SIZE = 2 * 1024 * 1024; // 2M
bthread_key_t B_THREAD_LOCAL_MEM_POOL_KEY;

void trivial_fun() {}

void dayofyear(int64_t ts, int32_t* out, bool* is_null) {
Expand Down Expand Up @@ -818,7 +826,26 @@ void string_to_date(StringRef *str, Date *output,
return;
}

void date_diff(Date *date1, Date *date2, int *diff, bool *is_null) {
absl::StatusOr<absl::Time> string_to_time(absl::string_view ref) {
absl::string_view fmt = DATE_FMT_RF3399_FULL;
if (19 == ref.size()) {
fmt = DATE_FMT_YMDHMS;
} else if (10 == ref.size()) {
fmt = DATE_FMT_YMD_1;
} else if (8 == ref.size()) {
fmt = DATE_FMT_YMD_2;
}
absl::Time tm;
std::string err;
bool ret = absl::ParseTime(fmt, ref, &tm, &err);

if (!ret) {
return absl::InvalidArgumentError(err);
}
return tm;
}

void date_diff(Date *date1, Date *date2, int32_t *diff, bool *is_null) {
if (date1 == nullptr || date2 == nullptr || date1->date_ <= 0 || date2->date_ <= 0) {
*is_null = true;
return;
Expand All @@ -838,36 +865,61 @@ void date_diff(Date *date1, Date *date2, int *diff, bool *is_null) {
*is_null = false;
}

void date_diff(StringRef *date1, StringRef *date2, int *diff, bool *is_null) {
Date d1;
string_to_date(date1, &d1, is_null);
if (*is_null) {
void date_diff(StringRef *date1, StringRef *date2, int32_t *diff, bool *is_null) {
auto t1 = string_to_time(absl::string_view(date1->data_, date1->size_));
if (!t1.ok()) {
*is_null = true;
return;
}
Date d2;
string_to_date(date2, &d2, is_null);
if (*is_null) {
auto t2 = string_to_time(absl::string_view(date2->data_, date2->size_));
if (!t2.ok()) {
*is_null = true;
return;
}
date_diff(&d1, &d2, diff, is_null);

auto d1 = absl::ToCivilDay(t1.value(), DEFAULT_TZ);
auto d2 = absl::ToCivilDay(t2.value(), DEFAULT_TZ);

*diff = d1 - d2;
*is_null = false;
}

void date_diff(StringRef *date1, Date *date2, int *diff, bool *is_null) {
Date d1;
string_to_date(date1, &d1, is_null);
if (*is_null) {
void date_diff(StringRef *date1, Date *date2, int32_t *diff, bool *is_null) {
auto t1 = string_to_time(absl::string_view(date1->data_, date1->size_));
if (!t1.ok()) {
*is_null = true;
return;
}
date_diff(&d1, date2, diff, is_null);
auto d1 = absl::ToCivilDay(t1.value(), DEFAULT_TZ);

int32_t year, month, day;
if (!Date::Decode(date2->date_, &year, &month, &day)) {
*is_null = true;
return;
}
auto d2 = absl::CivilDay(year, month, day);

*diff = d1 - d2;
*is_null = false;
}

void date_diff(Date *date1, StringRef *date2, int *diff, bool *is_null) {
Date d2;
string_to_date(date2, &d2, is_null);
if (*is_null) {
void date_diff(Date *date1, StringRef *date2, int32_t *diff, bool *is_null) {
auto t2 = string_to_time(absl::string_view(date2->data_, date2->size_));
if (!t2.ok()) {
*is_null = true;
return;
}
date_diff(date1, &d2, diff, is_null);
auto d2 = absl::ToCivilDay(t2.value(), DEFAULT_TZ);

int32_t year, month, day;
if (!Date::Decode(date1->date_, &year, &month, &day)) {
*is_null = true;
return;
}
auto d1 = absl::CivilDay(year, month, day);

*diff = d1 - d2;
*is_null = false;
}

// cast string to timestamp with yyyy-mm-dd or YYYY-mm-dd HH:MM:SS
Expand Down
10 changes: 6 additions & 4 deletions hybridse/src/udf/udf.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,10 @@ void timestamp_to_date(Timestamp *timestamp, Date *output, bool *is_null);

void date_to_string(Date *date, StringRef *output);

void date_diff(Date *date1, Date *date2, int *diff, bool *is_null);
void date_diff(StringRef *date1, StringRef *date2, int *diff, bool *is_null);
void date_diff(StringRef *date1, Date *date2, int *diff, bool *is_null);
void date_diff(Date *date1, StringRef *date2, int *diff, bool *is_null);
void date_diff(Date *date1, Date *date2, int32_t *diff, bool *is_null);
void date_diff(StringRef *date1, StringRef *date2, int32_t *diff, bool *is_null);
void date_diff(StringRef *date1, Date *date2, int32_t *diff, bool *is_null);
void date_diff(Date *date1, StringRef *date2, int32_t *diff, bool *is_null);

void like(StringRef *name, StringRef *pattern,
StringRef *escape, bool *out, bool *is_null);
Expand All @@ -384,6 +384,8 @@ void regexp_like(StringRef *name, StringRef *pattern, bool *out, bool *is_null);

void date_to_timestamp(Date *date, Timestamp *output, bool *is_null);
void string_to_date(StringRef *str, Date *output, bool *is_null);
absl::StatusOr<absl::Time> string_to_time(absl::string_view str);

void string_to_timestamp(StringRef *str, Timestamp *output, bool *is_null);
void date_to_unix_timestamp(Date *date, int64_t *output, bool *is_null);
void string_to_unix_timestamp(StringRef *str, int64_t *output, bool *is_null);
Expand Down