Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DX-90582: move 3 commits from gerrit #4

Merged
merged 3 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions flight_sql/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ if (MSVC)
-DCMAKE_DEPENDS_USE_COMPILER=FALSE
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp
)
elseif(APPLE)
set(ARROW_CMAKE_ARGS
Expand All @@ -90,7 +89,6 @@ elseif(APPLE)
-DCMAKE_DEPENDS_USE_COMPILER=FALSE
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp
)
if (DEFINED CMAKE_TOOLCHAIN_FILE)
list(APPEND ARROW_CMAKE_ARGS -DARROW_DEPENDENCY_SOURCE=VCPKG)
Expand All @@ -109,7 +107,6 @@ else()
-DCMAKE_DEPENDS_USE_COMPILER=FALSE
-DOPENSSL_INCLUDE_DIR=${OPENSSL_INCLUDE_DIR}
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install
${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp
)
endif()

Expand All @@ -120,6 +117,7 @@ message("Using Arrow from ${ARROW_GIT_REPOSITORY} on tag ${ARROW_GIT_TAG}")
ExternalProject_Add(ApacheArrow
GIT_REPOSITORY ${ARROW_GIT_REPOSITORY}
GIT_TAG ${ARROW_GIT_TAG}
SOURCE_SUBDIR "cpp"
CMAKE_ARGS ${ARROW_CMAKE_ARGS})

include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install/include)
Expand Down
50 changes: 34 additions & 16 deletions flight_sql/accessors/date_array_accessor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,23 @@ using namespace arrow;
using namespace odbcabstraction;

TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {
std::vector<int32_t> values = {7589, 12320, 18980, 19095};
std::vector<int32_t> values = {7589, 12320, 18980, 19095, -1, 0};
std::vector<DATE_STRUCT> expected = {
{1990, 10, 12},
{2003, 9, 25},
{2021, 12, 19},
{2022, 4, 13},
{1969, 12, 31},
{1970, 1, 1},
};

std::shared_ptr<Array> array;
ArrayFromVector<Date32Type, int32_t>(values, &array);

DateArrayFlightSqlAccessor<CDataType_DATE, Date32Array> accessor(
dynamic_cast<NumericArray<Date32Type> *>(array.get()));

std::vector<tagDATE_STRUCT> buffer(values.size());
std::vector<DATE_STRUCT> buffer(values.size());
std::vector<ssize_t> strlen_buffer(values.size());

ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data());
Expand All @@ -37,27 +45,39 @@ TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {

for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
tm date{};

int64_t converted_time = values[i] * 86400;
GetTimeForSecondsSinceEpoch(date, converted_time);
ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
ASSERT_EQ(date.tm_mday, buffer[i].day);
ASSERT_EQ(expected[i].year, buffer[i].year);
ASSERT_EQ(expected[i].month, buffer[i].month);
ASSERT_EQ(expected[i].day, buffer[i].day);
}
}

TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
std::vector<int64_t> values = {86400000, 172800000, 259200000, 1649793238110,
345600000, 432000000, 518400000};
std::vector<int64_t> values = {86400000, 172800000, 259200000, 1649793238110, 0,
345600000, 432000000, 518400000, -86400000, -17987443200000};
std::vector<DATE_STRUCT> expected = {
/* year(16), month(u16), day(u16) */
{1970, 1, 2},
{1970, 1, 3},
{1970, 1, 4},
{2022, 4, 12},
{1970, 1, 1},
{1970, 1, 5},
{1970, 1, 6},
{1970, 1, 7},
{1969, 12, 31},
// This is the documented lower limit of supported Gregorian dates for some parts of Boost,
// however boost::posix_time may go lower?
{1400, 1, 1},
};

std::shared_ptr<Array> array;
ArrayFromVector<Date64Type, int64_t>(values, &array);

DateArrayFlightSqlAccessor<CDataType_DATE, Date64Array> accessor(
dynamic_cast<NumericArray<Date64Type> *>(array.get()));

std::vector<tagDATE_STRUCT> buffer(values.size());
std::vector<DATE_STRUCT> buffer(values.size());
std::vector<ssize_t> strlen_buffer(values.size());

ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data());
Expand All @@ -71,11 +91,9 @@ TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
tm date{};

int64_t converted_time = values[i] / 1000;
GetTimeForSecondsSinceEpoch(date, converted_time);
ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
ASSERT_EQ(date.tm_mday, buffer[i].day);
ASSERT_EQ(expected[i].year, buffer[i].year);
ASSERT_EQ(expected[i].month, buffer[i].month);
ASSERT_EQ(expected[i].day, buffer[i].day);
}
}

Expand Down
54 changes: 34 additions & 20 deletions flight_sql/accessors/timestamp_array_accessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
#include "timestamp_array_accessor.h"
#include "odbcabstraction/calendar_utils.h"

#include <cmath>
#include <limits>

using namespace arrow;

namespace {
int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
inline int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
int64_t divisor = 1;
switch (unit) {
case TimeUnit::SECOND:
Expand All @@ -32,27 +35,25 @@ int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
return divisor;
}

uint32_t CalculateFraction(TimeUnit::type unit, uint64_t units_since_epoch) {
uint32_t CalculateFraction(TimeUnit::type unit, int64_t units_since_epoch) {
// Convert the given remainder and time unit to nanoseconds
// since the fraction field on TIMESTAMP_STRUCT is in nanoseconds.
switch (unit) {
case TimeUnit::SECOND:
if (unit == TimeUnit::SECOND)
return 0;
case TimeUnit::MILLI:
// 1000000 nanoseconds = 1 millisecond.
return (units_since_epoch %
driver::odbcabstraction::MILLI_TO_SECONDS_DIVISOR) *
1000000;
case TimeUnit::MICRO:
// 1000 nanoseconds = 1 microsecond.
return (units_since_epoch %
driver::odbcabstraction::MICRO_TO_SECONDS_DIVISOR) * 1000;
case TimeUnit::NANO:
// 1000 nanoseconds = 1 microsecond.
return (units_since_epoch %
driver::odbcabstraction::NANO_TO_SECONDS_DIVISOR);
}
return 0;

const int64_t divisor = GetConversionToSecondsDivisor(unit);
const int64_t nano_divisor = GetConversionToSecondsDivisor(TimeUnit::NANO);

if (units_since_epoch < 0)
if (units_since_epoch <= (std::numeric_limits<decltype(units_since_epoch)>::min() + divisor))
// Prevent trying to derive and add a value larger than INT64_MAX (i.e. the time value at the start of
// the second which is used to shift the value positive before the modulo operation)) in next statement.
units_since_epoch += divisor;
// See below regarding floor division; here we want ceiling division.
// FIXME this goes poorly (trying to use a value > INT64_MAX when units_since_epoch is
// less than the smallest multiple of divisor greater than INT64_MIN.
units_since_epoch += divisor * std::abs((units_since_epoch - (divisor - 1)) / divisor);
return (units_since_epoch % divisor) * (nano_divisor / divisor);
}
} // namespace

Expand All @@ -72,11 +73,24 @@ TimestampArrayFlightSqlAccessor<TARGET_TYPE, UNIT>::MoveSingleCell_impl(
ColumnBinding *binding, int64_t arrow_row, int64_t cell_counter,
int64_t &value_offset, bool update_value_offset,
odbcabstraction::Diagnostics &diagnostics) {
// Times less than the minimum integer number of seconds that can be represented
// for each time unit will not convert correctly. This is mostly interesting for
// nanoseconds as timestamps in other units are outside of the accepted range of
// Gregorian dates.
auto *buffer = static_cast<TIMESTAMP_STRUCT *>(binding->buffer);

int64_t value = this->GetArray()->Value(arrow_row);
const auto divisor = GetConversionToSecondsDivisor(UNIT);
const auto converted_result_seconds = value / divisor;
const auto converted_result_seconds =
// We want floor division here; C++ will round towards zero
(value < 0)
// Floor division: Shift all "fractional" (not a multiple of divisor) values so they round towards
// zero (and to the same value) along with the "floor" less than them, then add 1 to get back to
// the floor. Althernative we could shift negatively by (divisor - 1) but this breaks near
// INT64_MIN causing underflow..
? ((value + 1) / divisor) - 1
// Towards zero is already floor
: value / divisor;
tm timestamp = {0};

GetTimeForSecondsSinceEpoch(timestamp, converted_result_seconds);
Expand Down
141 changes: 86 additions & 55 deletions flight_sql/accessors/timestamp_array_accessor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,27 @@ using namespace arrow;
using namespace odbcabstraction;

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
std::vector<int64_t> values = {86400370, 172800000, 259200000, 1649793238110LL,
345600000, 432000000, 518400000};

std::vector<int64_t> values = {86400370, 172800000, 259200000, 1649793238110LL, 345600000,
432000000, 518400000, -86399000, 0, -86399999, -86399001,
86400001, 86400999};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 2, 0, 0, 0, 370000000},
{1970, 1, 3, 0, 0, 0, 0},
{1970, 1, 4, 0, 0, 0, 0},
{2022, 4, 12, 19, 53, 58, 110000000},
{1970, 1, 5, 0, 0, 0, 0},
{1970, 1, 6, 0, 0, 0, 0},
{1970, 1, 7, 0, 0, 0, 0},
{1969, 12, 31, 0, 0, 1, 0},
{1970, 1, 1, 0, 0, 0, 0},
/* Tests both ends of the fraction rounding range to ensure we don't tip the wrong way */
{1969, 12, 31, 0, 0, 0, 1000000},
{1969, 12, 31, 0, 0, 0, 999000000},
{1970, 1, 2, 0, 0, 0, 1000000},
{1970, 1, 2, 0, 0, 0, 999000000},
};

std::shared_ptr<Array> timestamp_array;

auto timestamp_field = field("timestamp_field", timestamp(TimeUnit::MILLI));
Expand All @@ -40,26 +58,31 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);

tm date{};

auto converted_time = values[i] / MILLI_TO_SECONDS_DIVISOR;
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);

constexpr uint32_t NANOSECONDS_PER_MILLI = 1000000;
ASSERT_EQ(buffer[i].fraction, (values[i] % MILLI_TO_SECONDS_DIVISOR) * NANOSECONDS_PER_MILLI);
ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) {
std::vector<int64_t> values = {86400, 172800, 259200, 1649793238,
345600, 432000, 518400};
345600, 432000, 518400, -86399, 0};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 2, 0, 0, 0, 0},
{1970, 1, 3, 0, 0, 0, 0},
{1970, 1, 4, 0, 0, 0, 0},
{2022, 4, 12, 19, 53, 58, 0},
{1970, 1, 5, 0, 0, 0, 0},
{1970, 1, 6, 0, 0, 0, 0},
{1970, 1, 7, 0, 0, 0, 0},
{1969, 12, 31, 0, 0, 1, 0},
{1970, 1, 1, 0, 0, 0, 0},
};

std::shared_ptr<Array> timestamp_array;

Expand All @@ -81,23 +104,27 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) {

for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
tm date{};

auto converted_time = values[i];
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);
ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, 0);
}
}

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) {
std::vector<int64_t> values = {86400000000, 1649793238000000};
std::vector<int64_t> values = {0, 86400000000, 1649793238000000, -86399999999, -86399000001};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 1, 0, 0, 0, 0},
{1970, 1, 2, 0, 0, 0, 0},
{2022, 4, 12, 19, 53, 58, 0},
{1969, 12, 31, 0, 0, 0, 1000},
{1969, 12, 31, 0, 0, 0, 999999000},
};

std::shared_ptr<Array> timestamp_array;

Expand All @@ -120,24 +147,31 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) {
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);

tm date{};

auto converted_time = values[i] / MICRO_TO_SECONDS_DIVISOR;
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);
constexpr uint32_t MICROS_PER_NANO = 1000;
ASSERT_EQ(buffer[i].fraction, (values[i] % MICRO_TO_SECONDS_DIVISOR) * MICROS_PER_NANO);
ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) {
std::vector<int64_t> values = {86400000010000, 1649793238000000000};
std::vector<int64_t> values = {86400000010000, 1649793238000000000, -86399999999999, -86399000000001,
86400000000001, 86400999999999, 0, -9223372036000000001};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 2, 0, 0, 0, 10000},
{2022, 4, 12, 19, 53, 58, 0},
{1969, 12, 31, 0, 0, 0, 1},
{1969, 12, 31, 0, 0, 0, 999999999},
{1970, 1, 2, 0, 0, 0, 1},
{1970, 1, 2, 0, 0, 0, 999999999},
{1970, 1, 1, 0, 0, 0, 0},
/* Test within range where floor (seconds) value is below INT64_MIN in nanoseconds */
{1677, 9, 21, 0, 12, 43, 999999999},
};

std::shared_ptr<Array> timestamp_array;

Expand All @@ -159,19 +193,16 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) {

for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
tm date{};

auto converted_time = values[i] / NANO_TO_SECONDS_DIVISOR;
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);
ASSERT_EQ(buffer[i].fraction, (values[i] % NANO_TO_SECONDS_DIVISOR));

ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}

} // namespace flight_sql
} // namespace driver
Loading