From 2298a06f7189a9f9f1a287366fc56ea6dc822063 Mon Sep 17 00:00:00 2001 From: Matthew Fidler Date: Sat, 21 Sep 2024 00:35:22 -0500 Subject: [PATCH] Some more changes to the index lookup --- NAMESPACE | 1 + R/RcppExports.R | 71 ++++ src/RcppExports.cpp | 32 ++ src/init.c | 7 + src/poped.cpp | 247 ++++++++++++++ src/timsort.h | 811 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1169 insertions(+) create mode 100644 src/timsort.h diff --git a/NAMESPACE b/NAMESPACE index 7f8f777e..019eef09 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -173,6 +173,7 @@ export(nonmem2rx) export(nonmemControl) export(pkncaControl) export(popedControl) +export(popedMultipleEndpointIndexDataFrame) export(rxModelVars) export(rxToMonolix) export(rxToNonmem) diff --git a/R/RcppExports.R b/R/RcppExports.R index 6941a2fb..415503ff 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -5,6 +5,77 @@ convertDataBack <- function(id, time, amt, ii, evid, cmt, cmtDvid, dvidDvid, lin .Call(`_babelmixr2_convertDataBack`, id, time, amt, ii, evid, cmt, cmtDvid, dvidDvid, linNcmt, linKa, neq, replaceEvid, zeroDose2) } +#' @title Get Multiple Endpoint Modeling Times +#' +#' @description +#' +#' This function takes a vector of times and a corresponding vector +#' of IDs, groups the times by their IDs, initializes an internal +#' C++ global TimeIndexer, that is used to efficiently lookup the +#' final output from the rxode2 solve and then returns the sorted +#' unique times +#' +#' @param times A numeric vector of times. +#' +#' @param modelSwitch An integer vector of model switch indicator +#' corresponding to the times +#' +#' @return A numeric vector of sorted unique times. +#' +#' @examples +#' +#' \donttest{ +#' +#' times <- c(1.1, 1.2, 1.3, 2.1, 2.2, 3.1) +#' modelSwitch <- c(1, 1, 1, 2, 2, 3) +#' sortedTimes <- popedGetMultipleEndpointModelingTimes(times, modelSwitch) +#' print(sortedTimes) +#' +#' # now show the output of the data frame representing the model +#' # switch to endpoint index +#' +#' popedMultipleEndpointIndexDataFrame() +#' +#' # now show a more complex example with overlaps etc. +#' +#' times <- c(1.1, 1.2, 1.3, 0.5, 2.2, 1.1, 0.75,0.75) +#' modelSwitch <- c(1, 1, 1, 2, 2, 2, 3, 3) +#' sortedTimes <- popedGetMultipleEndpointModelingTimes(times, modelSwitch) +#' print(sortedTimes) +#' +#' popedMultipleEndpointIndexDataFrame() +#' +#' } +popedGetMultipleEndpointModelingTimes <- function(times, modelSwitch) { + .Call(`_babelmixr2_popedGetMultipleEndpointModelingTimes`, times, modelSwitch) +} + +#' @title Reset the Global Time Indexer for Multiple Endpoint Modeling +#' +#' @description +#' +#' This clears the memory and resets the global time indexer used for +#' multiple endpoint modeling. +#' +#' @return NULL, called for side effects +#' +#' @examples +#' +#' \donttest{ +#' +#' popedMultipleEndpointResetTimeIndex() +#' +#' } +popedMultipleEndpointResetTimeIndex <- function() { + .Call(`_babelmixr2_popedMultipleEndpointResetTimeIndex`) +} + +#' @rdname popedGetMultipleEndpointModelingTimes +#' @export +popedMultipleEndpointIndexDataFrame <- function() { + .Call(`_babelmixr2_popedMultipleEndpointIndexDataFrame`) +} + popedFree <- function() { .Call(`_babelmixr2_popedFree`) } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index cf830bbb..37ab3ab3 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -34,6 +34,38 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// popedGetMultipleEndpointModelingTimes +Rcpp::NumericVector popedGetMultipleEndpointModelingTimes(Rcpp::NumericVector times, Rcpp::IntegerVector modelSwitch); +RcppExport SEXP _babelmixr2_popedGetMultipleEndpointModelingTimes(SEXP timesSEXP, SEXP modelSwitchSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type times(timesSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type modelSwitch(modelSwitchSEXP); + rcpp_result_gen = Rcpp::wrap(popedGetMultipleEndpointModelingTimes(times, modelSwitch)); + return rcpp_result_gen; +END_RCPP +} +// popedMultipleEndpointResetTimeIndex +Rcpp::RObject popedMultipleEndpointResetTimeIndex(); +RcppExport SEXP _babelmixr2_popedMultipleEndpointResetTimeIndex() { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = Rcpp::wrap(popedMultipleEndpointResetTimeIndex()); + return rcpp_result_gen; +END_RCPP +} +// popedMultipleEndpointIndexDataFrame +Rcpp::List popedMultipleEndpointIndexDataFrame(); +RcppExport SEXP _babelmixr2_popedMultipleEndpointIndexDataFrame() { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = Rcpp::wrap(popedMultipleEndpointIndexDataFrame()); + return rcpp_result_gen; +END_RCPP +} // popedFree RObject popedFree(); RcppExport SEXP _babelmixr2_popedFree() { diff --git a/src/init.c b/src/init.c index c6bb660a..7e7e4689 100644 --- a/src/init.c +++ b/src/init.c @@ -18,8 +18,15 @@ SEXP _babelmixr2_popedSolveIdN(SEXP thetaSEXP, SEXP mtSEXP, SEXP idSEXP, SEXP to SEXP _babelmixr2_popedSolveIdME(SEXP thetaSEXP, SEXP umtSEXP, SEXP mtSEXP, SEXP msSEXP, SEXP nendSEXP, SEXP idSEXP, SEXP totnSEXP); SEXP _babelmixr2_popedSolveIdME2(SEXP thetaSEXP, SEXP umtSEXP, SEXP mtSEXP, SEXP msSEXP, SEXP nendSEXP, SEXP idSEXP, SEXP totnSEXP); SEXP _babelmixr2_popedSolveIdN2(SEXP thetaSEXP, SEXP mtSEXP, SEXP idSEXP, SEXP totnSEXP); +SEXP _babelmixr2_popedGetMultipleEndpointModelingTimes(SEXP, SEXP); +SEXP _babelmixr2_popedMultipleEndpointResetTimeIndex(void); +SEXP _babelmixr2_popedMultipleEndpointIndexDataFrame(void); static const R_CallMethodDef CallEntries[] = { + {"_babelmixr2_popedMultipleEndpointIndexDataFrame", (DL_FUNC) &_babelmixr2_popedMultipleEndpointIndexDataFrame, 0}, + {"_babelmixr2_popedMultipleEndpointResetTimeIndex", (DL_FUNC) &_babelmixr2_popedMultipleEndpointResetTimeIndex, 0}, + {"_babelmixr2_popedGetMultipleEndpointModelingTimes", + (DL_FUNC) &_babelmixr2_popedGetMultipleEndpointModelingTimes, 2}, {"_babelmixr2_popedFree", (DL_FUNC) &_babelmixr2_popedFree, 0}, {"_babelmixr2_popedSetup", (DL_FUNC) &_babelmixr2_popedSetup, 2}, {"_babelmixr2_popedSolveIdN", (DL_FUNC) &_babelmixr2_popedSolveIdN, 4}, diff --git a/src/poped.cpp b/src/poped.cpp index 4b28b241..d3de1627 100644 --- a/src/poped.cpp +++ b/src/poped.cpp @@ -12,6 +12,253 @@ #include #include +#include "timsort.h" +#include +#include +#include +#include +#include + +struct timeInfo { + int id; + int firstIndex; + int count; +}; + + +class timeIndexer { +public: + + timeIndexer() : initialized(false), sorted(false), nIds(0) {} + + timeIndexer(const std::vector& ids, + const std::vector>& times) { + initialize(ids, times); + } + + void initialize(const std::vector& ids, const std::vector>& times) { + reset(); + nIds = ids.size(); + size_t k = 0; + std::unordered_map> timeIdCount; + std::unordered_map> timeIdFirstIndex; + + for (size_t i = ids.size(); i--;) { + int id = ids[i]; + for (size_t j = 0; j < times[i].size(); ++j) { + double time = times[i][j]; + if (timeIdCount[time].find(id) == timeIdCount[time].end()) { + timeIdCount[time][id] = 0; + timeIdFirstIndex[time][id] = k; + } + timeIdCount[time][id]++; + uniqueTimes.insert(time); + k++; + } + } + + for (const auto& timeEntry : timeIdCount) { + double time = timeEntry.first; + for (const auto& idEntry : timeEntry.second) { + int id = idEntry.first; + int count = idEntry.second; + int firstIndex = timeIdFirstIndex[time][id]; + timeToInfo[time].emplace_back(timeInfo{id, firstIndex, count}); + } + } + + initialized = true; + sorted = false; + } + + void reset() { + timeToInfo.clear(); + uniqueTimes.clear(); + sortedTimes.clear(); + initialized = false; + sorted = false; + nIds=0; + } + + size_t getNid() { + return nIds; + } + + bool isInitialized() const { + return initialized; + } + + std::vector getSortedUniqueTimes() { + if (!initialized) { + throw std::runtime_error("timeIndexer has not been initialized"); + } + if (!sorted) { + sortedTimes.assign(uniqueTimes.begin(), uniqueTimes.end()); + gfx::timsort(sortedTimes.begin(), sortedTimes.end()); + sorted = true; + } + return sortedTimes; + } + + const std::vector& getTimeInfo(double time) const { + if (!initialized) { + throw std::runtime_error("timeIndexer has not been initialized"); + } + return timeToInfo.at(time); + } + +private: + std::unordered_map> timeToInfo; + std::set uniqueTimes; + std::vector sortedTimes; + bool initialized; + bool sorted; + size_t nIds; +}; + + +timeIndexer globalTimeIndexer; + +void convertToTimeIndexerStructure(const std::vector& times, + const std::vector& ids, + std::vector& outIds, + std::vector>& outTimes) { + + std::unordered_map> idToTimesMap; + + for (size_t i = 0; i < times.size(); ++i) { + idToTimesMap[ids[i]].push_back(times[i]); + } + + for (const auto& pair : idToTimesMap) { + outIds.push_back(pair.first); + outTimes.push_back(pair.second); + } +} + + +//' @title Get Multiple Endpoint Modeling Times +//' +//' @description +//' +//' This function takes a vector of times and a corresponding vector +//' of IDs, groups the times by their IDs, initializes an internal +//' C++ global TimeIndexer, that is used to efficiently lookup the +//' final output from the rxode2 solve and then returns the sorted +//' unique times +//' +//' @param times A numeric vector of times. +//' +//' @param modelSwitch An integer vector of model switch indicator +//' corresponding to the times +//' +//' @return A numeric vector of sorted unique times. +//' +//' @examples +//' +//' \donttest{ +//' +//' times <- c(1.1, 1.2, 1.3, 2.1, 2.2, 3.1) +//' modelSwitch <- c(1, 1, 1, 2, 2, 3) +//' sortedTimes <- popedGetMultipleEndpointModelingTimes(times, modelSwitch) +//' print(sortedTimes) +//' +//' # now show the output of the data frame representing the model +//' # switch to endpoint index +//' +//' popedMultipleEndpointIndexDataFrame() +//' +//' # now show a more complex example with overlaps etc. +//' +//' times <- c(1.1, 1.2, 1.3, 0.5, 2.2, 1.1, 0.75,0.75) +//' modelSwitch <- c(1, 1, 1, 2, 2, 2, 3, 3) +//' sortedTimes <- popedGetMultipleEndpointModelingTimes(times, modelSwitch) +//' print(sortedTimes) +//' +//' popedMultipleEndpointIndexDataFrame() +//' +//' } +// [[Rcpp::export]] +Rcpp::NumericVector popedGetMultipleEndpointModelingTimes(Rcpp::NumericVector times, + Rcpp::IntegerVector modelSwitch) { + std::vector idsVec = Rcpp::as>(modelSwitch); + std::vector timesVec = Rcpp::as>(times); + + std::vector outIds; + std::vector> outTimes; + + convertToTimeIndexerStructure(timesVec, idsVec, outIds, outTimes); + + // this will reset if needed + globalTimeIndexer.initialize(outIds, outTimes); + + return Rcpp::wrap(globalTimeIndexer.getSortedUniqueTimes()); +} + +//' @title Reset the Global Time Indexer for Multiple Endpoint Modeling +//' +//' @description +//' +//' This clears the memory and resets the global time indexer used for +//' multiple endpoint modeling. +//' +//' @return NULL, called for side effects +//' +//' @examples +//' +//' \donttest{ +//' +//' popedMultipleEndpointResetTimeIndex() +//' +//' } +// [[Rcpp::export]] +Rcpp::RObject popedMultipleEndpointResetTimeIndex() { + globalTimeIndexer.reset(); + return R_NilValue; +} + +//' @rdname popedGetMultipleEndpointModelingTimes +//' @export +//[[Rcpp::export]] +Rcpp::List popedMultipleEndpointIndexDataFrame() { + if (!globalTimeIndexer.isInitialized()) { + Rcpp::stop("Time indexer has not been initialized"); + } + Rcpp::NumericVector times = Rcpp::wrap(globalTimeIndexer.getSortedUniqueTimes()); + size_t nId = globalTimeIndexer.getNid(); + Rcpp::List ret(1+nId*2); + ret[0] = times; + // initialize the rest with NA_integer_ + for (size_t i = 0; i < nId*2; ++i) { + Rcpp::IntegerVector cur(times.size()); + std::fill_n(cur.begin(), cur.size(), NA_INTEGER); + ret[i+1] = cur; + } + size_t timei = 0; + for (const auto& time : times) { + const auto& infos= globalTimeIndexer.getTimeInfo(time); + for (const auto& info : infos) { + if (info.id > (int)nId || + info.id <= 0) { + Rcpp::stop("IDs need to be sequential 1, 2, 3, ..., n"); + } + INTEGER(ret[2*(info.id-1)+1])[timei] = info.firstIndex + 1; + INTEGER(ret[2*(info.id-1)+2])[timei] = info.count; + } + timei++; + } + Rcpp::CharacterVector names(nId*2+1); + names[0] = "time"; + for (size_t i = 0; i < nId*2; i+=2) { + names[i+1] = "MS:" + std::to_string(i+1); + names[i+2] = "N:" + std::to_string(i+1); + } + ret.names() = names; + ret.attr("class") = "data.frame"; + ret.attr("row.names") = Rcpp::IntegerVector::create(NA_INTEGER, -times.size()); + return ret; +} + #define max2( a , b ) ( (a) > (b) ? (a) : (b) ) #define isSameTime(xout, xp) (fabs((xout)-(xp)) <= DBL_EPSILON*max2(fabs(xout),fabs(xp))) diff --git a/src/timsort.h b/src/timsort.h new file mode 100644 index 00000000..aad5ce36 --- /dev/null +++ b/src/timsort.h @@ -0,0 +1,811 @@ +/* + * C++ implementation of timsort + * + * ported from Python's and OpenJDK's: + * - http://svn.python.org/projects/python/trunk/Objects/listobject.c + * - http://cr.openjdk.java.net/~martin/webrevs/openjdk7/timsort/raw_files/new/src/share/classes/java/util/TimSort.java + * + * Copyright (c) 2011 Fuji, Goro (gfx) . + * Copyright (c) 2019-2021 Morwenn. + * Copyright (c) 2021 Igor Kushnir . + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef GFX_TIMSORT_HPP +#define GFX_TIMSORT_HPP + +#include +#include +#include +#include +#include + +// Semantic versioning macros + +#define GFX_TIMSORT_VERSION_MAJOR 2 +#define GFX_TIMSORT_VERSION_MINOR 1 +#define GFX_TIMSORT_VERSION_PATCH 0 + +// Diagnostic selection macros + +#if defined(GFX_TIMSORT_ENABLE_ASSERT) || defined(GFX_TIMSORT_ENABLE_AUDIT) +# include +#endif + +#ifdef GFX_TIMSORT_ENABLE_ASSERT +# define GFX_TIMSORT_ASSERT(expr) assert(expr) +#else +# define GFX_TIMSORT_ASSERT(expr) ((void)0) +#endif + +#ifdef GFX_TIMSORT_ENABLE_AUDIT +# define GFX_TIMSORT_AUDIT(expr) assert(expr) +#else +# define GFX_TIMSORT_AUDIT(expr) ((void)0) +#endif + +#ifdef GFX_TIMSORT_ENABLE_LOG +# include +# define GFX_TIMSORT_LOG(expr) (std::clog << "# " << __func__ << ": " << expr << std::endl) +#else +# define GFX_TIMSORT_LOG(expr) ((void)0) +#endif + + +namespace gfx { + +// --------------------------------------- +// Implementation details +// --------------------------------------- + +namespace detail { + +// Equivalent to C++20 std::identity +struct identity { + template + constexpr T&& operator()(T&& value) const noexcept + { + return std::forward(value); + } +}; + +// Merge a predicate and a projection function +template +struct projection_compare { + projection_compare(Compare comp, Projection proj) : compare(comp), projection(proj) { + } + + template + bool operator()(T &&lhs, U &&rhs) { +#ifdef __cpp_lib_invoke + return static_cast(std::invoke(compare, + std::invoke(projection, std::forward(lhs)), + std::invoke(projection, std::forward(rhs)) + )); +#else + return static_cast(compare( + projection(std::forward(lhs)), + projection(std::forward(rhs)) + )); +#endif + } + + Compare compare; + Projection projection; +}; + +template struct run { + typedef typename std::iterator_traits::difference_type diff_t; + + Iterator base; + diff_t len; + + run(Iterator b, diff_t l) : base(b), len(l) { + } +}; + +template class TimSort { + typedef RandomAccessIterator iter_t; + typedef typename std::iterator_traits::value_type value_t; + typedef typename std::iterator_traits::reference ref_t; + typedef typename std::iterator_traits::difference_type diff_t; + + static const int MIN_MERGE = 32; + static const int MIN_GALLOP = 7; + + int minGallop_; // default to MIN_GALLOP + + std::vector tmp_; // temp storage for merges + typedef typename std::vector::iterator tmp_iter_t; + + std::vector > pending_; + + static void binarySort(iter_t const lo, iter_t const hi, iter_t start, Compare compare) { + GFX_TIMSORT_ASSERT(lo <= start); + GFX_TIMSORT_ASSERT(start <= hi); + if (start == lo) { + ++start; + } + for (; start < hi; ++start) { + GFX_TIMSORT_ASSERT(lo <= start); + value_t pivot = std::move(*start); + + iter_t const pos = std::upper_bound(lo, start, pivot, compare); + for (iter_t p = start; p > pos; --p) { + *p = std::move(*(p - 1)); + } + *pos = std::move(pivot); + } + } + + static diff_t countRunAndMakeAscending(iter_t const lo, iter_t const hi, Compare compare) { + GFX_TIMSORT_ASSERT(lo < hi); + + iter_t runHi = lo + 1; + if (runHi == hi) { + return 1; + } + + if (compare(*runHi, *lo)) { // decreasing + do { + ++runHi; + } while (runHi < hi && compare(*runHi, *(runHi - 1))); + std::reverse(lo, runHi); + } else { // non-decreasing + do { + ++runHi; + } while (runHi < hi && !compare(*runHi, *(runHi - 1))); + } + + return runHi - lo; + } + + static diff_t minRunLength(diff_t n) { + GFX_TIMSORT_ASSERT(n >= 0); + + diff_t r = 0; + while (n >= 2 * MIN_MERGE) { + r |= (n & 1); + n >>= 1; + } + return n + r; + } + + TimSort() : minGallop_(MIN_GALLOP) { + } + + // Silence GCC -Winline warning + ~TimSort() {} + + void pushRun(iter_t const runBase, diff_t const runLen) { + pending_.push_back(run(runBase, runLen)); + } + + void mergeCollapse(Compare compare) { + while (pending_.size() > 1) { + diff_t n = pending_.size() - 2; + + if ((n > 0 && pending_[n - 1].len <= pending_[n].len + pending_[n + 1].len) || + (n > 1 && pending_[n - 2].len <= pending_[n - 1].len + pending_[n].len)) { + if (pending_[n - 1].len < pending_[n + 1].len) { + --n; + } + mergeAt(n, compare); + } else if (pending_[n].len <= pending_[n + 1].len) { + mergeAt(n, compare); + } else { + break; + } + } + } + + void mergeForceCollapse(Compare compare) { + while (pending_.size() > 1) { + diff_t n = pending_.size() - 2; + + if (n > 0 && pending_[n - 1].len < pending_[n + 1].len) { + --n; + } + mergeAt(n, compare); + } + } + + void mergeAt(diff_t const i, Compare compare) { + diff_t const stackSize = pending_.size(); + GFX_TIMSORT_ASSERT(stackSize >= 2); + GFX_TIMSORT_ASSERT(i >= 0); + GFX_TIMSORT_ASSERT(i == stackSize - 2 || i == stackSize - 3); + + iter_t base1 = pending_[i].base; + diff_t len1 = pending_[i].len; + iter_t base2 = pending_[i + 1].base; + diff_t len2 = pending_[i + 1].len; + + pending_[i].len = len1 + len2; + + if (i == stackSize - 3) { + pending_[i + 1] = pending_[i + 2]; + } + + pending_.pop_back(); + + mergeConsecutiveRuns(base1, len1, base2, len2, std::move(compare)); + } + + void mergeConsecutiveRuns(iter_t base1, diff_t len1, iter_t base2, diff_t len2, Compare compare) { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 0); + GFX_TIMSORT_ASSERT(base1 + len1 == base2); + + diff_t const k = gallopRight(*base2, base1, len1, 0, compare); + GFX_TIMSORT_ASSERT(k >= 0); + + base1 += k; + len1 -= k; + + if (len1 == 0) { + return; + } + + len2 = gallopLeft(*(base1 + (len1 - 1)), base2, len2, len2 - 1, compare); + GFX_TIMSORT_ASSERT(len2 >= 0); + if (len2 == 0) { + return; + } + + if (len1 <= len2) { + mergeLo(base1, len1, base2, len2, compare); + } else { + mergeHi(base1, len1, base2, len2, compare); + } + } + + template + static diff_t gallopLeft(ref_t key, Iter const base, diff_t const len, + diff_t const hint, Compare compare) { + GFX_TIMSORT_ASSERT(len > 0); + GFX_TIMSORT_ASSERT(hint >= 0); + GFX_TIMSORT_ASSERT(hint < len); + + diff_t lastOfs = 0; + diff_t ofs = 1; + + if (compare(*(base + hint), key)) { + diff_t const maxOfs = len - hint; + while (ofs < maxOfs && compare(*(base + (hint + ofs)), key)) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + + if (ofs <= 0) { // int overflow + ofs = maxOfs; + } + } + if (ofs > maxOfs) { + ofs = maxOfs; + } + + lastOfs += hint; + ofs += hint; + } else { + diff_t const maxOfs = hint + 1; + while (ofs < maxOfs && !compare(*(base + (hint - ofs)), key)) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + + if (ofs <= 0) { + ofs = maxOfs; + } + } + if (ofs > maxOfs) { + ofs = maxOfs; + } + + diff_t const tmp = lastOfs; + lastOfs = hint - ofs; + ofs = hint - tmp; + } + GFX_TIMSORT_ASSERT(-1 <= lastOfs); + GFX_TIMSORT_ASSERT(lastOfs < ofs); + GFX_TIMSORT_ASSERT(ofs <= len); + + return std::lower_bound(base + (lastOfs + 1), base + ofs, key, compare) - base; + } + + template + static diff_t gallopRight(ref_t key, Iter const base, diff_t const len, + diff_t const hint, Compare compare) { + GFX_TIMSORT_ASSERT(len > 0); + GFX_TIMSORT_ASSERT(hint >= 0); + GFX_TIMSORT_ASSERT(hint < len); + + diff_t ofs = 1; + diff_t lastOfs = 0; + + if (compare(key, *(base + hint))) { + diff_t const maxOfs = hint + 1; + while (ofs < maxOfs && compare(key, *(base + (hint - ofs)))) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + + if (ofs <= 0) { + ofs = maxOfs; + } + } + if (ofs > maxOfs) { + ofs = maxOfs; + } + + diff_t const tmp = lastOfs; + lastOfs = hint - ofs; + ofs = hint - tmp; + } else { + diff_t const maxOfs = len - hint; + while (ofs < maxOfs && !compare(key, *(base + (hint + ofs)))) { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + + if (ofs <= 0) { // int overflow + ofs = maxOfs; + } + } + if (ofs > maxOfs) { + ofs = maxOfs; + } + + lastOfs += hint; + ofs += hint; + } + GFX_TIMSORT_ASSERT(-1 <= lastOfs); + GFX_TIMSORT_ASSERT(lastOfs < ofs); + GFX_TIMSORT_ASSERT(ofs <= len); + + return std::upper_bound(base + (lastOfs + 1), base + ofs, key, compare) - base; + } + + static void rotateLeft(iter_t first, iter_t last) + { + value_t tmp = std::move(*first); + iter_t last_1 = std::move(first + 1, last, first); + *last_1 = std::move(tmp); + } + + static void rotateRight(iter_t first, iter_t last) + { + iter_t last_1 = last - 1; + value_t tmp = std::move(*last_1); + std::move_backward(first, last_1, last); + *first = std::move(tmp); + } + + + void mergeLo(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2, Compare compare) { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 0); + GFX_TIMSORT_ASSERT(base1 + len1 == base2); + + if (len1 == 1) { + return rotateLeft(base1, base2 + len2); + } + if (len2 == 1) { + return rotateRight(base1, base2 + len2); + } + + copy_to_tmp(base1, len1); + + tmp_iter_t cursor1 = tmp_.begin(); + iter_t cursor2 = base2; + iter_t dest = base1; + + *dest = std::move(*cursor2); + ++cursor2; + ++dest; + --len2; + + int minGallop(minGallop_); + + // outer: + while (true) { + diff_t count1 = 0; + diff_t count2 = 0; + + do { + GFX_TIMSORT_ASSERT(len1 > 1); + GFX_TIMSORT_ASSERT(len2 > 0); + + if (compare(*cursor2, *cursor1)) { + *dest = std::move(*cursor2); + ++cursor2; + ++dest; + ++count2; + count1 = 0; + if (--len2 == 0) { + goto epilogue; + } + } else { + *dest = std::move(*cursor1); + ++cursor1; + ++dest; + ++count1; + count2 = 0; + if (--len1 == 1) { + goto epilogue; + } + } + } while ((count1 | count2) < minGallop); + + do { + GFX_TIMSORT_ASSERT(len1 > 1); + GFX_TIMSORT_ASSERT(len2 > 0); + + count1 = gallopRight(*cursor2, cursor1, len1, 0, compare); + if (count1 != 0) { + std::move_backward(cursor1, cursor1 + count1, dest + count1); + dest += count1; + cursor1 += count1; + len1 -= count1; + + if (len1 <= 1) { + goto epilogue; + } + } + *dest = std::move(*cursor2); + ++cursor2; + ++dest; + if (--len2 == 0) { + goto epilogue; + } + + count2 = gallopLeft(*cursor1, cursor2, len2, 0, compare); + if (count2 != 0) { + std::move(cursor2, cursor2 + count2, dest); + dest += count2; + cursor2 += count2; + len2 -= count2; + if (len2 == 0) { + goto epilogue; + } + } + *dest = std::move(*cursor1); + ++cursor1; + ++dest; + if (--len1 == 1) { + goto epilogue; + } + + --minGallop; + } while ((count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP)); + + if (minGallop < 0) { + minGallop = 0; + } + minGallop += 2; + } // end of "outer" loop + + epilogue: // merge what is left from either cursor1 or cursor2 + + minGallop_ = (std::min)(minGallop, 1); + + if (len1 == 1) { + GFX_TIMSORT_ASSERT(len2 > 0); + std::move(cursor2, cursor2 + len2, dest); + *(dest + len2) = std::move(*cursor1); + } else { + GFX_TIMSORT_ASSERT(len1 != 0 && "Comparison function violates its general contract"); + GFX_TIMSORT_ASSERT(len2 == 0); + GFX_TIMSORT_ASSERT(len1 > 1); + std::move(cursor1, cursor1 + len1, dest); + } + } + + void mergeHi(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2, Compare compare) { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 0); + GFX_TIMSORT_ASSERT(base1 + len1 == base2); + + if (len1 == 1) { + return rotateLeft(base1, base2 + len2); + } + if (len2 == 1) { + return rotateRight(base1, base2 + len2); + } + + copy_to_tmp(base2, len2); + + iter_t cursor1 = base1 + len1; + tmp_iter_t cursor2 = tmp_.begin() + (len2 - 1); + iter_t dest = base2 + (len2 - 1); + + *dest = std::move(*(--cursor1)); + --dest; + --len1; + + int minGallop(minGallop_); + + // outer: + while (true) { + diff_t count1 = 0; + diff_t count2 = 0; + + // The next loop is a hot path of the algorithm, so we decrement + // eagerly the cursor so that it always points directly to the value + // to compare, but we have to implement some trickier logic to make + // sure that it points to the next value again by the end of said loop + --cursor1; + + do { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 1); + + if (compare(*cursor2, *cursor1)) { + *dest = std::move(*cursor1); + --dest; + ++count1; + count2 = 0; + if (--len1 == 0) { + goto epilogue; + } + --cursor1; + } else { + *dest = std::move(*cursor2); + --cursor2; + --dest; + ++count2; + count1 = 0; + if (--len2 == 1) { + ++cursor1; // See comment before the loop + goto epilogue; + } + } + } while ((count1 | count2) < minGallop); + ++cursor1; // See comment before the loop + + do { + GFX_TIMSORT_ASSERT(len1 > 0); + GFX_TIMSORT_ASSERT(len2 > 1); + + count1 = len1 - gallopRight(*cursor2, base1, len1, len1 - 1, compare); + if (count1 != 0) { + dest -= count1; + cursor1 -= count1; + len1 -= count1; + std::move_backward(cursor1, cursor1 + count1, dest + (1 + count1)); + + if (len1 == 0) { + goto epilogue; + } + } + *dest = std::move(*cursor2); + --cursor2; + --dest; + if (--len2 == 1) { + goto epilogue; + } + + count2 = len2 - gallopLeft(*(cursor1 - 1), tmp_.begin(), len2, len2 - 1, compare); + if (count2 != 0) { + dest -= count2; + cursor2 -= count2; + len2 -= count2; + std::move(cursor2 + 1, cursor2 + (1 + count2), dest + 1); + if (len2 <= 1) { + goto epilogue; + } + } + *dest = std::move(*(--cursor1)); + --dest; + if (--len1 == 0) { + goto epilogue; + } + + --minGallop; + } while ((count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP)); + + if (minGallop < 0) { + minGallop = 0; + } + minGallop += 2; + } // end of "outer" loop + + epilogue: // merge what is left from either cursor1 or cursor2 + + minGallop_ = (std::min)(minGallop, 1); + + if (len2 == 1) { + GFX_TIMSORT_ASSERT(len1 > 0); + dest -= len1; + std::move_backward(cursor1 - len1, cursor1, dest + (1 + len1)); + *dest = std::move(*cursor2); + } else { + GFX_TIMSORT_ASSERT(len2 != 0 && "Comparison function violates its general contract"); + GFX_TIMSORT_ASSERT(len1 == 0); + GFX_TIMSORT_ASSERT(len2 > 1); + std::move(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1)); + } + } + + void copy_to_tmp(iter_t const begin, diff_t len) { + tmp_.assign(std::make_move_iterator(begin), + std::make_move_iterator(begin + len)); + } + +public: + + static void merge(iter_t const lo, iter_t const mid, iter_t const hi, Compare compare) { + GFX_TIMSORT_ASSERT(lo <= mid); + GFX_TIMSORT_ASSERT(mid <= hi); + + if (lo == mid || mid == hi) { + return; // nothing to do + } + + TimSort ts; + ts.mergeConsecutiveRuns(lo, mid - lo, mid, hi - mid, std::move(compare)); + + GFX_TIMSORT_LOG("1st size: " << (mid - lo) << "; 2nd size: " << (hi - mid) + << "; tmp_.size(): " << ts.tmp_.size()); + } + + static void sort(iter_t const lo, iter_t const hi, Compare compare) { + GFX_TIMSORT_ASSERT(lo <= hi); + + diff_t nRemaining = (hi - lo); + if (nRemaining < 2) { + return; // nothing to do + } + + if (nRemaining < MIN_MERGE) { + diff_t const initRunLen = countRunAndMakeAscending(lo, hi, compare); + GFX_TIMSORT_LOG("initRunLen: " << initRunLen); + binarySort(lo, hi, lo + initRunLen, compare); + return; + } + + TimSort ts; + diff_t const minRun = minRunLength(nRemaining); + iter_t cur = lo; + do { + diff_t runLen = countRunAndMakeAscending(cur, hi, compare); + + if (runLen < minRun) { + diff_t const force = (std::min)(nRemaining, minRun); + binarySort(cur, cur + force, cur + runLen, compare); + runLen = force; + } + + ts.pushRun(cur, runLen); + ts.mergeCollapse(compare); + + cur += runLen; + nRemaining -= runLen; + } while (nRemaining != 0); + + GFX_TIMSORT_ASSERT(cur == hi); + ts.mergeForceCollapse(compare); + GFX_TIMSORT_ASSERT(ts.pending_.size() == 1); + + GFX_TIMSORT_LOG("size: " << (hi - lo) << " tmp_.size(): " << ts.tmp_.size() + << " pending_.size(): " << ts.pending_.size()); + } +}; + +} // namespace detail + + +// --------------------------------------- +// Public interface implementation +// --------------------------------------- + +/** + * Stably merges two consecutive sorted ranges [first, middle) and [middle, last) into one + * sorted range [first, last) with a comparison function and a projection function. + */ +template +void timmerge(RandomAccessIterator first, RandomAccessIterator middle, + RandomAccessIterator last, Compare compare, Projection projection) { + typedef detail::projection_compare compare_t; + compare_t comp(std::move(compare), std::move(projection)); + GFX_TIMSORT_AUDIT(std::is_sorted(first, middle, comp) && "Precondition"); + GFX_TIMSORT_AUDIT(std::is_sorted(middle, last, comp) && "Precondition"); + detail::TimSort::merge(first, middle, last, comp); + GFX_TIMSORT_AUDIT(std::is_sorted(first, last, comp) && "Postcondition"); +} + +/** + * Same as std::inplace_merge(first, middle, last, compare). + */ +template +void timmerge(RandomAccessIterator first, RandomAccessIterator middle, + RandomAccessIterator last, Compare compare) { + gfx::timmerge(first, middle, last, compare, detail::identity()); +} + +/** + * Same as std::inplace_merge(first, middle, last). + */ +template +void timmerge(RandomAccessIterator first, RandomAccessIterator middle, + RandomAccessIterator last) { + typedef typename std::iterator_traits::value_type value_type; + gfx::timmerge(first, middle, last, std::less(), detail::identity()); +} + +/** + * Stably sorts a range with a comparison function and a projection function. + */ +template +void timsort(RandomAccessIterator const first, RandomAccessIterator const last, + Compare compare, Projection projection) { + typedef detail::projection_compare compare_t; + compare_t comp(std::move(compare), std::move(projection)); + detail::TimSort::sort(first, last, comp); + GFX_TIMSORT_AUDIT(std::is_sorted(first, last, comp) && "Postcondition"); +} + +/** + * Same as std::stable_sort(first, last, compare). + */ +template +void timsort(RandomAccessIterator const first, RandomAccessIterator const last, Compare compare) { + gfx::timsort(first, last, compare, detail::identity()); +} + +/** + * Same as std::stable_sort(first, last). + */ +template +void timsort(RandomAccessIterator const first, RandomAccessIterator const last) { + typedef typename std::iterator_traits::value_type value_type; + gfx::timsort(first, last, std::less(), detail::identity()); +} + +/** + * Stably sorts a range with a comparison function and a projection function. + */ +template +void timsort(RandomAccessRange &range, Compare compare, Projection projection) { + gfx::timsort(std::begin(range), std::end(range), compare, projection); +} + +/** + * Same as std::stable_sort(std::begin(range), std::end(range), compare). + */ +template +void timsort(RandomAccessRange &range, Compare compare) { + gfx::timsort(std::begin(range), std::end(range), compare); +} + +/** + * Same as std::stable_sort(std::begin(range), std::end(range)). + */ +template +void timsort(RandomAccessRange &range) { + gfx::timsort(std::begin(range), std::end(range)); +} + +} // namespace gfx + +#undef GFX_TIMSORT_ENABLE_ASSERT +#undef GFX_TIMSORT_ASSERT +#undef GFX_TIMSORT_ENABLE_AUDIT +#undef GFX_TIMSORT_AUDIT +#undef GFX_TIMSORT_ENABLE_LOG +#undef GFX_TIMSORT_LOG + +#endif // GFX_TIMSORT_HPP